From 30a07c632e431902d7ffad6fc6f630f092fd45b8 Mon Sep 17 00:00:00 2001 From: Prince Datta Date: Wed, 28 Feb 2024 10:16:54 +0530 Subject: [PATCH] Add presubmit tests for 2.2 and remove for 1.5 --- cloudbuild/cloudbuild.yaml | 66 +++++++++++++++---------------- connectors/connectors.sh | 2 +- dask/dask.sh | 4 +- hue/test_hue.py | 2 + kafka/test_kafka.py | 4 +- livy/test_livy.py | 14 +++++-- spark-rapids/test_spark_rapids.py | 9 +++++ 7 files changed, 60 insertions(+), 41 deletions(-) diff --git a/cloudbuild/cloudbuild.yaml b/cloudbuild/cloudbuild.yaml index c3ac41836..26e7caf27 100644 --- a/cloudbuild/cloudbuild.yaml +++ b/cloudbuild/cloudbuild.yaml @@ -9,39 +9,6 @@ steps: id: 'gcr-push' args: ['push', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID'] - # Run presubmit tests in parallel for 1.5 Debian image - - name: 'gcr.io/cloud-builders/kubectl' - id: 'dataproc-1.5-debian10-tests' - waitFor: ['gcr-push'] - entrypoint: 'bash' - args: ['cloudbuild/run-presubmit-on-k8s.sh', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID', '$BUILD_ID', '1.5-debian10'] - env: - - 'COMMIT_SHA=$COMMIT_SHA' - - 'CLOUDSDK_COMPUTE_REGION=us-central1' - - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' - - # Run presubmit tests in parallel for 1.5 Rocky Linux image - - name: 'gcr.io/cloud-builders/kubectl' - id: 'dataproc-1.5-rocky8-tests' - waitFor: ['gcr-push'] - entrypoint: 'bash' - args: ['cloudbuild/run-presubmit-on-k8s.sh', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID', '$BUILD_ID', '1.5-rocky8'] - env: - - 'COMMIT_SHA=$COMMIT_SHA' - - 'CLOUDSDK_COMPUTE_REGION=us-central1' - - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' - - # Run presubmit tests in parallel for 1.5 Ubuntu image - - name: 'gcr.io/cloud-builders/kubectl' - id: 'dataproc-1.5-ubuntu18-tests' - waitFor: ['gcr-push'] - entrypoint: 'bash' - args: ['cloudbuild/run-presubmit-on-k8s.sh', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID', '$BUILD_ID', '1.5-ubuntu18'] - env: - - 'COMMIT_SHA=$COMMIT_SHA' - - 'CLOUDSDK_COMPUTE_REGION=us-central1' - - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' - # Run presubmit tests in parallel for 2.0 Debian image - name: 'gcr.io/cloud-builders/kubectl' id: 'dataproc-2.0-debian10-tests' @@ -108,6 +75,39 @@ steps: - 'CLOUDSDK_COMPUTE_REGION=us-central1' - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' + # Run presubmit tests in parallel for 2.2 Debian image + - name: 'gcr.io/cloud-builders/kubectl' + id: 'dataproc-2.2-debian12-tests' + waitFor: ['gcr-push'] + entrypoint: 'bash' + args: ['cloudbuild/run-presubmit-on-k8s.sh', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID', '$BUILD_ID', '2.2-debian12'] + env: + - 'COMMIT_SHA=$COMMIT_SHA' + - 'CLOUDSDK_COMPUTE_REGION=us-central1' + - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' + + # Run presubmit tests in parallel for 2.2 Rocky Linux image + - name: 'gcr.io/cloud-builders/kubectl' + id: 'dataproc-2.2-rocky9-tests' + waitFor: ['gcr-push'] + entrypoint: 'bash' + args: ['cloudbuild/run-presubmit-on-k8s.sh', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID', '$BUILD_ID', '2.2-rocky9'] + env: + - 'COMMIT_SHA=$COMMIT_SHA' + - 'CLOUDSDK_COMPUTE_REGION=us-central1' + - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' + + # Run presubmit tests in parallel for 2.2 Ubuntu image + - name: 'gcr.io/cloud-builders/kubectl' + id: 'dataproc-2.2-ubuntu22-tests' + waitFor: ['gcr-push'] + entrypoint: 'bash' + args: ['cloudbuild/run-presubmit-on-k8s.sh', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID', '$BUILD_ID', '2.2-ubuntu22'] + env: + - 'COMMIT_SHA=$COMMIT_SHA' + - 'CLOUDSDK_COMPUTE_REGION=us-central1' + - 'CLOUDSDK_CONTAINER_CLUSTER=init-actions-presubmit' + # Delete Docker image from GCR - name: 'gcr.io/cloud-builders/gcloud' args: ['container', 'images', 'delete', 'gcr.io/$PROJECT_ID/init-actions-image:$BUILD_ID'] diff --git a/connectors/connectors.sh b/connectors/connectors.sh index a5a1697c2..22157dafa 100755 --- a/connectors/connectors.sh +++ b/connectors/connectors.sh @@ -66,7 +66,7 @@ function get_connector_url() { exit 1 fi ;; - "1.5" | "2.0" | "2.1") + "1.5" | "2.0" | "2.1" | "2.2") scala_version="2.12" ;; *) diff --git a/dask/dask.sh b/dask/dask.sh index b9ec9e533..0a23e6a9f 100644 --- a/dask/dask.sh +++ b/dask/dask.sh @@ -43,7 +43,7 @@ readonly KNOX_HOME=/usr/lib/knox readonly KNOX_DASK_DIR=${KNOX_HOME}/data/services/dask/0.1.0 readonly KNOX_DASKWS_DIR=${KNOX_HOME}/data/services/daskws/0.1.0 -CONDA_PACKAGES=("dask=${DASK_VERSION}" 'dask-bigquery' 'dask-ml' 'dask-sql') +CONDA_PACKAGES=('dask-bigquery' 'dask-ml' 'dask-sql') if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then # Pin `distributed` package version because `dask-yarn` 0.9 @@ -343,6 +343,8 @@ EOF function main() { + #Install dask with the help of conda as installing with mamba causes version conflicts + execute_with_retries "conda install -y dask=${DASK_VERSION}" # Install conda packages execute_with_retries "mamba install -y ${CONDA_PACKAGES[*]}" diff --git a/hue/test_hue.py b/hue/test_hue.py index b119dffcd..80a1128d4 100644 --- a/hue/test_hue.py +++ b/hue/test_hue.py @@ -60,6 +60,8 @@ def test_hue(self, configuration, machine_suffixes): 'STANDARD', ) def test_hue_job(self, configuration): + if self.getImageVersion() >= pkg_resources.parse_version("2.2"): + self.skipTest("Not supported in 2.2 image") self.createCluster(configuration, self.INIT_ACTIONS) self.__submit_pyspark_job(self.getClusterName()) diff --git a/kafka/test_kafka.py b/kafka/test_kafka.py index 4d4703031..102d5957c 100644 --- a/kafka/test_kafka.py +++ b/kafka/test_kafka.py @@ -104,7 +104,7 @@ def test_kafka_manager(self, configuration, machine_suffixes): if self.getImageVersion() <= pkg_resources.parse_version("2.0"): self.skipTest("Java 11 or higher is required for CMAK") - metadata = 'run-on-master=true, kafka-enable-jmx=true' + metadata = 'run-on-master=true,kafka-enable-jmx=true' self.createCluster(configuration, self.KAFKA_MANAGER_INIT_ACTION, metadata=metadata) for machine_suffix in machine_suffixes: self.verify_instance("{}-{}".format(self.getClusterName(), @@ -121,7 +121,7 @@ def test_kafka_manager_job(self, configuration): if self.getImageVersion() <= pkg_resources.parse_version("2.0"): self.skipTest("Java 11 or higher is required for CMAK") - metadata = 'run-on-master=true, kafka-enable-jmx=true, install-kafka-python=true' + metadata = 'run-on-master=true,kafka-enable-jmx=true,install-kafka-python=true' properties = 'dataproc:alpha.components=ZOOKEEPER' self.createCluster(configuration, self.KAFKA_MANAGER_INIT_ACTION, metadata=metadata, properties=properties) diff --git a/livy/test_livy.py b/livy/test_livy.py index 5fb2bcd7a..3b4a57063 100644 --- a/livy/test_livy.py +++ b/livy/test_livy.py @@ -27,10 +27,16 @@ def _verify_instance(self, name): self.remove_test_script(self.TEST_SCRIPT_FILE_NAME, name) def _run_python_test_file(self, name): - self.assert_instance_command( - name, - "sudo apt-get install -y python3-pip && sudo pip3 install requests" - ) + if self.getImageVersion() >= pkg_resources.parse_version("2.2"): + self.assert_instance_command( + name, + "sudo apt install python3-requests" + ) + else: + self.assert_instance_command( + name, + "sudo apt-get install -y python3-pip && sudo pip3 install requests" + ) self.assert_instance_command( name, "sudo python3 {}".format(self.TEST_SCRIPT_FILE_NAME)) diff --git a/spark-rapids/test_spark_rapids.py b/spark-rapids/test_spark_rapids.py index 267e800b5..4bdf4a876 100644 --- a/spark-rapids/test_spark_rapids.py +++ b/spark-rapids/test_spark_rapids.py @@ -64,6 +64,9 @@ def test_spark_rapids(self, configuration, machine_suffixes, accelerator): if self.getImageVersion() == pkg_resources.parse_version("2.1") or self.getImageOs() == "rocky": self.skipTest("Not supported in image2.1 or rocky images") + if self.getImageVersion() == pkg_resources.parse_version("2.2") and self.getImageOs() == "debian": + self.skipTest("The Debian version (12) for Dataproc 2.2 is not supported") + optional_components = None metadata = "gpu-driver-provider=NVIDIA,rapids-runtime=SPARK" @@ -94,6 +97,9 @@ def test_spark_rapids_sql(self, configuration, machine_suffixes, accelerator): if self.getImageVersion() == pkg_resources.parse_version("2.1") or self.getImageOs() == "rocky": self.skipTest("Not supported in image2.1 or rocky images") + if self.getImageVersion() == pkg_resources.parse_version("2.2") and self.getImageOs() == "debian": + self.skipTest("The Debian version (12) for Dataproc 2.2 is not supported") + optional_components = None metadata = "gpu-driver-provider=NVIDIA,rapids-runtime=SPARK" @@ -124,6 +130,9 @@ def test_non_default_cuda_versions(self, configuration, machine_suffixes, if self.getImageVersion() == pkg_resources.parse_version("2.1") or self.getImageOs() == "rocky": self.skipTest("Not supported in image2.1 or rocky images") + if self.getImageVersion() == pkg_resources.parse_version("2.2") and self.getImageOs() == "debian": + self.skipTest("The Debian version (12) for Dataproc 2.2 is not supported") + metadata = ("gpu-driver-provider=NVIDIA,rapids-runtime=SPARK" ",cuda-version={0},driver-version={1}".format(cuda_version, driver_version))