Skip to content

Commit

Permalink
Set seed=0 for the delta lake part roundtrip tests (#9741)
Browse files Browse the repository at this point in the history
Signed-off-by: Alessandro Bellina <[email protected]>
  • Loading branch information
abellina authored Nov 16, 2023
1 parent a7fa0df commit ce53657
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
1 change: 1 addition & 0 deletions integration_tests/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ markers =
regexp: Mark a test that tests regular expressions on the GPU (only works when UTF-8 is enabled)
large_data_test: Mark tests with large data
pyarrow_test: Mark pyarrow tests
datagen_overrides: Mark that allows overriding datagen settings (i.e. seed) for a test
filterwarnings =
ignore:.*pytest.mark.order.*:_pytest.warning_types.PytestUnknownMarkWarning
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/delta_lake_write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def test_delta_write_round_trip_unmanaged(spark_tmp_path):
@ignore_order
@pytest.mark.parametrize("gens", parquet_part_write_gens, ids=idfn)
@pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x")
@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9738')
def test_delta_part_write_round_trip_unmanaged(spark_tmp_path, gens):
gen_list = [("a", RepeatSeqGen(gens, 10)), ("b", gens)]
data_path = spark_tmp_path + "/DELTA_DATA"
Expand All @@ -110,6 +111,7 @@ def test_delta_part_write_round_trip_unmanaged(spark_tmp_path, gens):
@ignore_order
@pytest.mark.parametrize("gens", parquet_part_write_gens, ids=idfn)
@pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x")
@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9738')
def test_delta_multi_part_write_round_trip_unmanaged(spark_tmp_path, gens):
gen_list = [("a", RepeatSeqGen(gens, 10)), ("b", gens), ("c", SetValuesGen(StringType(), ["x", "y", "z"]))]
data_path = spark_tmp_path + "/DELTA_DATA"
Expand Down

0 comments on commit ce53657

Please sign in to comment.