diff --git a/src/postgres/src/backend/utils/misc/guc.c b/src/postgres/src/backend/utils/misc/guc.c index 73ee009845a4..16160865f634 100644 --- a/src/postgres/src/backend/utils/misc/guc.c +++ b/src/postgres/src/backend/utils/misc/guc.c @@ -647,6 +647,12 @@ const struct config_enum_entry yb_read_after_commit_visibility_options[] = { {NULL, 0, false} }; +const struct config_enum_entry yb_sampling_algorithm_options[] = { + {"full_table_scan", YB_SAMPLING_ALGORITHM_FULL_TABLE_SCAN, false}, + {"block_based_sampling", YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING, false}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -3095,6 +3101,19 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"yb_allow_block_based_sampling_algorithm", PGC_SUSET, CUSTOM_OPTIONS, + gettext_noop("Autoflag to allow " + "YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING. Not to " + "be touched by users."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &yb_allow_block_based_sampling_algorithm, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -6652,6 +6671,21 @@ static struct config_enum ConfigureNamesEnum[] = yb_check_no_txn, NULL, NULL }, + { + {"yb_sampling_algorithm", PGC_USERSET, QUERY_TUNING_OTHER, + gettext_noop("Which sampling algorithm to use for YSQL. full_table_scan - scan the" + " whole table and pick random rows, block_based_sampling - sample the" + " table for a set of blocks, then scan selected blocks to form a final" + " rows sample."), + NULL, + 0 + }, + &yb_sampling_algorithm, + YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING, + yb_sampling_algorithm_options, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL diff --git a/src/postgres/src/backend/utils/misc/postgresql.conf.sample b/src/postgres/src/backend/utils/misc/postgresql.conf.sample index 6609792060ba..3bf063caaea7 100644 --- a/src/postgres/src/backend/utils/misc/postgresql.conf.sample +++ b/src/postgres/src/backend/utils/misc/postgresql.conf.sample @@ -442,6 +442,7 @@ # force_custom_plan #recursive_worktable_factor = 10.0 # range 0.001-1000000 +#yb_sampling_algorithm = block_based_sampling #------------------------------------------------------------------------------ # REPORTING AND LOGGING diff --git a/src/yb/common/common.proto b/src/yb/common/common.proto index b7f7ea76354a..3454519abb06 100644 --- a/src/yb/common/common.proto +++ b/src/yb/common/common.proto @@ -733,7 +733,8 @@ enum DocDbBlocksSamplingMethod { COMBINE_INTERSECTING_BLOCKS = 2; } -// See ysql_sampling_algorithm flag description. +// See yb_sampling_algorithm flag description. +// Should be in sync with YBSamplingAlgorithmEnum in GUC (ybc_util.h). enum YsqlSamplingAlgorithm { FULL_TABLE_SCAN = 0; BLOCK_BASED_SAMPLING = 1; diff --git a/src/yb/yql/pggate/pg_sample.cc b/src/yb/yql/pggate/pg_sample.cc index 03ef0775b175..1b1c4e354414 100644 --- a/src/yb/yql/pggate/pg_sample.cc +++ b/src/yb/yql/pggate/pg_sample.cc @@ -31,16 +31,11 @@ #include "yb/yql/pggate/pg_select_index.h" +#include "yb/yql/pggate/util/yb_guc.h" + DEFINE_test_flag(int64, delay_after_table_analyze_ms, 0, "Add this delay after each table is analyzed."); -DEFINE_RUNTIME_AUTO_int32( - ysql_sampling_algorithm, kLocalVolatile, - static_cast(yb::YsqlSamplingAlgorithm::FULL_TABLE_SCAN), - static_cast(yb::YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING), - "Which sampling algorithm to use for YSQL. 0 - scan the whole table, 1 - sample the table for " - "a set of blocks, then scan selected blocks to form a final rows sample."); - DEFINE_RUNTIME_int32( ysql_docdb_blocks_sampling_method, yb::DocDbBlocksSamplingMethod::SPLIT_INTERSECTING_BLOCKS_V3, "Controls how we define blocks for 1st phase of block-based sampling."); @@ -113,7 +108,11 @@ class PgSamplePicker : public PgSelectIndex { sampling_state.set_samplerows(0); // rows scanned so far sampling_state.set_rowstoskip(-1); // rows to skip before selecting another sampling_state.set_rstate_w(rand_state.w); // Vitter algorithm's W - sampling_state.set_sampling_algorithm(YsqlSamplingAlgorithm(FLAGS_ysql_sampling_algorithm)); + if (yb_allow_block_based_sampling_algorithm) { + sampling_state.set_sampling_algorithm(YsqlSamplingAlgorithm(yb_sampling_algorithm)); + } else { + sampling_state.set_sampling_algorithm(YsqlSamplingAlgorithm::FULL_TABLE_SCAN); + } sampling_state.set_docdb_blocks_sampling_method( DocDbBlocksSamplingMethod(FLAGS_ysql_docdb_blocks_sampling_method)); auto& rand = *sampling_state.mutable_rand_state(); diff --git a/src/yb/yql/pggate/util/yb_guc.cc b/src/yb/yql/pggate/util/yb_guc.cc index bb82d582c1ae..f5da8ea81cc9 100644 --- a/src/yb/yql/pggate/util/yb_guc.cc +++ b/src/yb/yql/pggate/util/yb_guc.cc @@ -81,3 +81,9 @@ uint64_t yb_read_time = 0; bool yb_is_read_time_ht = false; int yb_read_after_commit_visibility = 0; + +bool yb_allow_block_based_sampling_algorithm = true; + +// TODO(#24089): Once code duplication between yb_guc and ybc_util is removed, we should be able +// to use YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING instead of 1 and do it in one place. +int32_t yb_sampling_algorithm = 1 /* YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING */; diff --git a/src/yb/yql/pggate/util/yb_guc.h b/src/yb/yql/pggate/util/yb_guc.h index 0401d048b687..9ef7b9017853 100644 --- a/src/yb/yql/pggate/util/yb_guc.h +++ b/src/yb/yql/pggate/util/yb_guc.h @@ -191,6 +191,10 @@ extern int yb_walsender_poll_sleep_duration_empty_ms; extern int yb_read_after_commit_visibility; +extern bool yb_allow_block_based_sampling_algorithm; + +extern int32_t yb_sampling_algorithm; + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/yb/yql/pggate/util/ybc_util.h b/src/yb/yql/pggate/util/ybc_util.h index 8f9945dece6b..df70543d3465 100644 --- a/src/yb/yql/pggate/util/ybc_util.h +++ b/src/yb/yql/pggate/util/ybc_util.h @@ -234,6 +234,16 @@ typedef enum { /* GUC for the enum above. */ extern int yb_read_after_commit_visibility; +extern bool yb_allow_block_based_sampling_algorithm; + +// Should be in sync with YsqlSamplingAlgorithm protobuf. +typedef enum { + YB_SAMPLING_ALGORITHM_FULL_TABLE_SCAN = 0, + YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING = 1, +} YBSamplingAlgorithmEnum; + +extern int32_t yb_sampling_algorithm; + typedef struct YBCStatusStruct* YBCStatus; bool YBCStatusIsNotFound(YBCStatus s); diff --git a/src/yb/yql/pgwrapper/pg_analyze-test.cc b/src/yb/yql/pgwrapper/pg_analyze-test.cc index cc923d319974..f1359ea1dcc6 100644 --- a/src/yb/yql/pgwrapper/pg_analyze-test.cc +++ b/src/yb/yql/pgwrapper/pg_analyze-test.cc @@ -25,7 +25,6 @@ #include "yb/yql/pgwrapper/pg_mini_test_base.h" DECLARE_int32(ysql_docdb_blocks_sampling_method); -DECLARE_int32(ysql_sampling_algorithm); DECLARE_int64(db_block_size_bytes); DECLARE_int64(db_write_buffer_size); @@ -182,6 +181,14 @@ size_t EstimateDistinct(size_t d, size_t k, size_t n) { return d * (1 - pow(1 - 1.0 * n / d / k, k)); } +std::string GetYbSamplingAlgorithm(YsqlSamplingAlgorithm algorithm) { + switch (algorithm) { + case YsqlSamplingAlgorithm::FULL_TABLE_SCAN: return "full_table_scan"; + case YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING: return "block_based_sampling"; + } + FATAL_INVALID_PB_ENUM_VALUE(YsqlSamplingAlgorithm, algorithm); +} + } // namespace class PgAnalyzeTest : public PgMiniTestBase { @@ -335,7 +342,8 @@ TEST_F(PgAnalyzeTest, AnalyzeSamplingColocated) { } for (const auto ysql_sampling_algorithm : GetAllPbEnumValues()) { - ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_sampling_algorithm) = ysql_sampling_algorithm; + ASSERT_OK(conn.ExecuteFormat( + "SET yb_sampling_algorithm = $0", GetYbSamplingAlgorithm(ysql_sampling_algorithm))); std::vector blocks_sampling_methods; if (ysql_sampling_algorithm == YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING) { @@ -350,9 +358,6 @@ TEST_F(PgAnalyzeTest, AnalyzeSamplingColocated) { ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_docdb_blocks_sampling_method) = blocks_sampling_method; - ASSERT_OK(RestartPostgres()); - conn = ASSERT_RESULT(ConnectToDB(kColocatedDatabaseName)); - const auto num_distinct_tolerace = kNumDistinctTolerance[blocks_sampling_method]; const auto null_frac_tolerance = kNullFracTolerance[blocks_sampling_method]; const auto estimated_total_rows_accuracy = @@ -414,7 +419,7 @@ TEST_F(PgAnalyzeTest, AnalyzeSamplingColocated) { ASSERT_GT(correlation, - 1 - kEps); // YsqlSamplingAlgorithm::FULL_TABLE_SCAN calculates correlation incorrectly as of // 2024-12-12, so skip it. - if (FLAGS_ysql_sampling_algorithm != YsqlSamplingAlgorithm::FULL_TABLE_SCAN) { + if (ysql_sampling_algorithm != YsqlSamplingAlgorithm::FULL_TABLE_SCAN) { if (column_name == "k" || column_name == "v" || column_name == "v_d") { // These column values are in the scan order. ASSERT_GT(correlation, 1 - kEps); diff --git a/src/yb/yql/pgwrapper/pg_wrapper.cc b/src/yb/yql/pgwrapper/pg_wrapper.cc index b594be6ad264..2cf62c28ef79 100644 --- a/src/yb/yql/pgwrapper/pg_wrapper.cc +++ b/src/yb/yql/pgwrapper/pg_wrapper.cc @@ -271,6 +271,9 @@ DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_replication_commands, kLocalPersiste DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_replica_identity, kLocalPersisted, false, true, "Enable replica identity command for Alter Table query"); +DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_allow_block_based_sampling_algorithm, + kLocalVolatile, false, true, "Allow YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING"); + DEFINE_RUNTIME_PG_FLAG( string, yb_default_replica_identity, "CHANGE", "The default replica identity to be assigned to user defined tables at the time of creation. " @@ -307,6 +310,12 @@ DEFINE_RUNTIME_PG_FLAG(bool, yb_enable_fkey_catcache, true, DEFINE_RUNTIME_PG_FLAG(bool, yb_enable_nop_alter_role_optimization, true, "Enable nop alter role statement optimization."); +DEFINE_RUNTIME_PG_FLAG(string, yb_sampling_algorithm, + "block_based_sampling", + "Which sampling algorithm to use for YSQL. full_table_scan - scan the whole table and pick " + "random rows, block_based_sampling - sample the table for a set of blocks, then scan selected " + "blocks to form a final rows sample."); + DEFINE_validator(ysql_yb_xcluster_consistency_level, FLAG_IN_SET_VALIDATOR("database", "tablet")); DEFINE_NON_RUNTIME_string(ysql_conn_mgr_warmup_db, "yugabyte",