diff --git a/aggregator/pom.xml b/aggregator/pom.xml
index 8f8b6da47fc..6c285fa7eaa 100644
--- a/aggregator/pom.xml
+++ b/aggregator/pom.xml
@@ -611,6 +611,23 @@
+
+ release341db
+
+
+ buildver
+ 341db
+
+
+
+
+ com.nvidia
+ rapids-4-spark-delta-spark341db_${scala.binary.version}
+ ${project.version}
+ ${spark.version.classifier}
+
+
+
release333
diff --git a/integration_tests/src/main/python/delta_lake_merge_test.py b/integration_tests/src/main/python/delta_lake_merge_test.py
index 1d43259434b..0ba63380aba 100644
--- a/integration_tests/src/main/python/delta_lake_merge_test.py
+++ b/integration_tests/src/main/python/delta_lake_merge_test.py
@@ -97,7 +97,7 @@ def checker(data_path, do_merge):
merge_sql=merge_sql,
check_func=checker)
-@allow_non_gpu("ExecutedCommandExec,BroadcastHashJoinExec,ColumnarToRowExec,BroadcastExchangeExec,DataWritingCommandExec", *delta_meta_allow)
+@allow_non_gpu("ExecutedCommandExec,BroadcastHashJoinExec,ColumnarToRowExec,BroadcastExchangeExec,DataWritingCommandExec", delta_write_fallback_allow, *delta_meta_allow)
@delta_lake
@ignore_order
@pytest.mark.skipif(is_databricks_runtime() and spark_version() < "3.3.2", reason="NOT MATCHED BY SOURCE added in DBR 12.2")
diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py
index d2636d58d01..be6f6807004 100644
--- a/integration_tests/src/main/python/fastparquet_compatibility_test.py
+++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py
@@ -17,7 +17,7 @@
from asserts import assert_gpu_and_cpu_are_equal_collect
from data_gen import *
from fastparquet_utils import get_fastparquet_result_canonicalizer
-from spark_session import spark_version, with_cpu_session, with_gpu_session
+from spark_session import is_databricks_runtime, spark_version, with_cpu_session, with_gpu_session
def fastparquet_unavailable():
@@ -107,8 +107,12 @@ def read_with_fastparquet_or_plugin(spark):
pytest.param(IntegerGen(nullable=True),
marks=pytest.mark.xfail(reason="Nullables cause merge errors, when converting to Spark dataframe")),
LongGen(nullable=False),
- FloatGen(nullable=False),
- DoubleGen(nullable=False),
+ pytest.param(FloatGen(nullable=False),
+ marks=pytest.mark.xfail(is_databricks_runtime(),
+ reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
+ pytest.param(DoubleGen(nullable=False),
+ marks=pytest.mark.xfail(is_databricks_runtime(),
+ reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
StringGen(nullable=False),
pytest.param(DecimalGen(nullable=False),
marks=pytest.mark.xfail(reason="fastparquet reads Decimal columns as Float, as per "
@@ -131,8 +135,11 @@ def read_with_fastparquet_or_plugin(spark):
marks=pytest.mark.xfail(reason="Conversion from Pandas dataframe (read with fastparquet) to Spark dataframe "
"fails: \"Unable to infer the type of the field a\".")),
- StructGen(children=[("first", IntegerGen(nullable=False)),
- ("second", FloatGen(nullable=False))], nullable=False)
+ pytest.param(
+ StructGen(children=[("first", IntegerGen(nullable=False)),
+ ("second", FloatGen(nullable=False))], nullable=False),
+ marks=pytest.mark.xfail(is_databricks_runtime(),
+ reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
], ids=idfn)
def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
"""
@@ -176,8 +183,12 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
LongGen(nullable=False),
pytest.param(LongGen(nullable=True),
marks=pytest.mark.xfail(reason="Nullables cause merge errors, when converting to Spark dataframe")),
- FloatGen(nullable=False),
- DoubleGen(nullable=False),
+ pytest.param(FloatGen(nullable=False),
+ marks=pytest.mark.xfail(is_databricks_runtime(),
+ reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
+ pytest.param(DoubleGen(nullable=False),
+ marks=pytest.mark.xfail(is_databricks_runtime(),
+ reason="https://github.com/NVIDIA/spark-rapids/issues/9778")),
StringGen(nullable=False),
pytest.param(DecimalGen(nullable=False),
marks=pytest.mark.xfail(reason="fastparquet reads Decimal columns as Float, as per "
diff --git a/integration_tests/src/main/python/udf_cudf_test.py b/integration_tests/src/main/python/udf_cudf_test.py
index 04416315702..6d94a5da206 100644
--- a/integration_tests/src/main/python/udf_cudf_test.py
+++ b/integration_tests/src/main/python/udf_cudf_test.py
@@ -37,10 +37,15 @@
from typing import Iterator
from pyspark.sql import Window
from pyspark.sql.functions import pandas_udf, PandasUDFType
-from spark_session import with_cpu_session, with_gpu_session
+from spark_session import is_databricks_runtime, is_spark_340_or_later, with_cpu_session, with_gpu_session
from marks import cudf_udf
+if is_databricks_runtime() and is_spark_340_or_later():
+ # Databricks 13.3 does not use separate reader/writer threads for Python UDFs
+ # which can lead to hangs. Skipping these tests until the Python UDF handling is updated.
+ pytestmark = pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9493")
+
_conf = {
'spark.rapids.sql.exec.AggregateInPandasExec': 'true',
'spark.rapids.sql.exec.FlatMapCoGroupsInPandasExec': 'true',
diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py
index 14fc57cf972..db8425f6387 100644
--- a/integration_tests/src/main/python/udf_test.py
+++ b/integration_tests/src/main/python/udf_test.py
@@ -15,7 +15,7 @@
import pytest
from conftest import is_at_least_precommit_run
-from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_350_or_later
+from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_340_or_later
from pyspark.sql.pandas.utils import require_minimum_pyarrow_version, require_minimum_pandas_version
@@ -43,6 +43,12 @@
import pyarrow
from typing import Iterator, Tuple
+
+if is_databricks_runtime() and is_spark_340_or_later():
+ # Databricks 13.3 does not use separate reader/writer threads for Python UDFs
+ # which can lead to hangs. Skipping these tests until the Python UDF handling is updated.
+ pytestmark = pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9493")
+
arrow_udf_conf = {
'spark.sql.execution.arrow.pyspark.enabled': 'true',
'spark.rapids.sql.exec.WindowInPandasExec': 'true',
diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks
index 0ea835d39a9..27c42f59aab 100644
--- a/jenkins/Jenkinsfile-blossom.premerge-databricks
+++ b/jenkins/Jenkinsfile-blossom.premerge-databricks
@@ -88,7 +88,7 @@ pipeline {
// 'name' and 'value' only supprt literal string in the declarative Jenkins
// Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
name 'DB_RUNTIME'
- values '10.4', '11.3', '12.2'
+ values '10.4', '11.3', '12.2', '13.3'
}
}
stages {
diff --git a/pom.xml b/pom.xml
index 297492604de..b4e0ae4f8dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -509,6 +509,31 @@
delta-lake/delta-spark332db
+
+
+ release341db
+
+
+ buildver
+ 341db
+
+
+
+
+ 3.4.4
+ spark341db
+ ${spark341db.version}
+ ${spark341db.version}
+ 3.3.1
+ true
+ 1.12.0
+ ${spark330.iceberg.version}
+
+
+ shim-deps/databricks
+ delta-lake/delta-spark341db
+
+
release350
@@ -691,6 +716,7 @@
3.3.2.3.3.7190.0-91
3.3.0-databricks
3.3.2-databricks
+ 3.4.1-databricks
3.5.0
3.12.4
4.3.0
@@ -745,7 +771,8 @@
321db,
330db,
- 332db
+ 332db,
+ 341db
+ release341db
+
+
+ buildver
+ 341db
+
+
+
+
+ 3.4.4
+ spark341db
+ ${spark341db.version}
+ ${spark341db.version}
+ 3.3.1
+ true
+ 1.12.0
+ ${spark330.iceberg.version}
+
+
+ shim-deps/databricks
+ delta-lake/delta-spark341db
+
+
release350
@@ -691,6 +716,7 @@
3.3.2.3.3.7190.0-91
3.3.0-databricks
3.3.2-databricks
+ 3.4.1-databricks
3.5.0
3.12.4
4.3.0
@@ -745,7 +771,8 @@
321db,
330db,
- 332db
+ 332db,
+ 341db