Skip to content

Commit

Permalink
[#25557] docdb: Fixed enabling block-based sampling flags to enable i…
Browse files Browse the repository at this point in the history
…t by default

Summary:
ysql_sampling_algorithm auto-flag is propagated to postgres process at postgres process start but not getting updated automatically when auto-flag is promoted because this is not yet implemented. That leads to block-based sampling algorithm is not enabled by default as was expected.

To solve this issue, ysql_sampling_algorithm auto-flag is replaced by pg flags which are passed to postgres via GUC: yb_allow_block_based_sampling_algorithm (auto) + yb_sampling_algorithm (runtime).

Upgrade/Rollback safety:
- Before release branch is cut it is safe to remove ysql_sampling_algorithm auto-flag.
- In .proto file only comment has been updated.

Jira: DB-14811

Test Plan:
1) PgAnalyzeTest.AnalyzeSamplingColocated test
2) Create universe and make sure block-based sampling algorithm is enabled.
3) Upgrade older (pre-GH-24112) universe and make sure block-based sampling algorithm is enabled.

Reviewers: hsunder, amartsinchyk

Reviewed By: hsunder

Subscribers: rthallam, yql, ybase

Tags: #jenkins-ready, #jenkins-trigger

Differential Revision: https://phorge.dev.yugabyte.com/D41131
  • Loading branch information
ttyusupov committed Jan 10, 2025
1 parent 563c474 commit 66fd3f9
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 15 deletions.
34 changes: 34 additions & 0 deletions src/postgres/src/backend/utils/misc/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,12 @@ const struct config_enum_entry yb_read_after_commit_visibility_options[] = {
{NULL, 0, false}
};

const struct config_enum_entry yb_sampling_algorithm_options[] = {
{"full_table_scan", YB_SAMPLING_ALGORITHM_FULL_TABLE_SCAN, false},
{"block_based_sampling", YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING, false},
{NULL, 0, false}
};

/*
* Options for enum values stored in other modules
*/
Expand Down Expand Up @@ -3095,6 +3101,19 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},

{
{"yb_allow_block_based_sampling_algorithm", PGC_SUSET, CUSTOM_OPTIONS,
gettext_noop("Autoflag to allow "
"YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING. Not to "
"be touched by users."),
NULL,
GUC_NOT_IN_SAMPLE
},
&yb_allow_block_based_sampling_algorithm,
true,
NULL, NULL, NULL
},

/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
Expand Down Expand Up @@ -6652,6 +6671,21 @@ static struct config_enum ConfigureNamesEnum[] =
yb_check_no_txn, NULL, NULL
},

{
{"yb_sampling_algorithm", PGC_USERSET, QUERY_TUNING_OTHER,
gettext_noop("Which sampling algorithm to use for YSQL. full_table_scan - scan the"
" whole table and pick random rows, block_based_sampling - sample the"
" table for a set of blocks, then scan selected blocks to form a final"
" rows sample."),
NULL,
0
},
&yb_sampling_algorithm,
YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING,
yb_sampling_algorithm_options,
NULL, NULL, NULL
},

/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
Expand Down
1 change: 1 addition & 0 deletions src/postgres/src/backend/utils/misc/postgresql.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@
# force_custom_plan
#recursive_worktable_factor = 10.0 # range 0.001-1000000

#yb_sampling_algorithm = block_based_sampling

#------------------------------------------------------------------------------
# REPORTING AND LOGGING
Expand Down
3 changes: 2 additions & 1 deletion src/yb/common/common.proto
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,8 @@ enum DocDbBlocksSamplingMethod {
COMBINE_INTERSECTING_BLOCKS = 2;
}

// See ysql_sampling_algorithm flag description.
// See yb_sampling_algorithm flag description.
// Should be in sync with YBSamplingAlgorithmEnum in GUC (ybc_util.h).
enum YsqlSamplingAlgorithm {
FULL_TABLE_SCAN = 0;
BLOCK_BASED_SAMPLING = 1;
Expand Down
15 changes: 7 additions & 8 deletions src/yb/yql/pggate/pg_sample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,11 @@

#include "yb/yql/pggate/pg_select_index.h"

#include "yb/yql/pggate/util/yb_guc.h"

DEFINE_test_flag(int64, delay_after_table_analyze_ms, 0,
"Add this delay after each table is analyzed.");

DEFINE_RUNTIME_AUTO_int32(
ysql_sampling_algorithm, kLocalVolatile,
static_cast<int32_t>(yb::YsqlSamplingAlgorithm::FULL_TABLE_SCAN),
static_cast<int32_t>(yb::YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING),
"Which sampling algorithm to use for YSQL. 0 - scan the whole table, 1 - sample the table for "
"a set of blocks, then scan selected blocks to form a final rows sample.");

DEFINE_RUNTIME_int32(
ysql_docdb_blocks_sampling_method, yb::DocDbBlocksSamplingMethod::SPLIT_INTERSECTING_BLOCKS_V3,
"Controls how we define blocks for 1st phase of block-based sampling.");
Expand Down Expand Up @@ -113,7 +108,11 @@ class PgSamplePicker : public PgSelectIndex {
sampling_state.set_samplerows(0); // rows scanned so far
sampling_state.set_rowstoskip(-1); // rows to skip before selecting another
sampling_state.set_rstate_w(rand_state.w); // Vitter algorithm's W
sampling_state.set_sampling_algorithm(YsqlSamplingAlgorithm(FLAGS_ysql_sampling_algorithm));
if (yb_allow_block_based_sampling_algorithm) {
sampling_state.set_sampling_algorithm(YsqlSamplingAlgorithm(yb_sampling_algorithm));
} else {
sampling_state.set_sampling_algorithm(YsqlSamplingAlgorithm::FULL_TABLE_SCAN);
}
sampling_state.set_docdb_blocks_sampling_method(
DocDbBlocksSamplingMethod(FLAGS_ysql_docdb_blocks_sampling_method));
auto& rand = *sampling_state.mutable_rand_state();
Expand Down
6 changes: 6 additions & 0 deletions src/yb/yql/pggate/util/yb_guc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,9 @@ uint64_t yb_read_time = 0;
bool yb_is_read_time_ht = false;

int yb_read_after_commit_visibility = 0;

bool yb_allow_block_based_sampling_algorithm = true;

// TODO(#24089): Once code duplication between yb_guc and ybc_util is removed, we should be able
// to use YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING instead of 1 and do it in one place.
int32_t yb_sampling_algorithm = 1 /* YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING */;
4 changes: 4 additions & 0 deletions src/yb/yql/pggate/util/yb_guc.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ extern int yb_walsender_poll_sleep_duration_empty_ms;

extern int yb_read_after_commit_visibility;

extern bool yb_allow_block_based_sampling_algorithm;

extern int32_t yb_sampling_algorithm;

#ifdef __cplusplus
} // extern "C"
#endif
10 changes: 10 additions & 0 deletions src/yb/yql/pggate/util/ybc_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,16 @@ typedef enum {
/* GUC for the enum above. */
extern int yb_read_after_commit_visibility;

extern bool yb_allow_block_based_sampling_algorithm;

// Should be in sync with YsqlSamplingAlgorithm protobuf.
typedef enum {
YB_SAMPLING_ALGORITHM_FULL_TABLE_SCAN = 0,
YB_SAMPLING_ALGORITHM_BLOCK_BASED_SAMPLING = 1,
} YBSamplingAlgorithmEnum;

extern int32_t yb_sampling_algorithm;

typedef struct YBCStatusStruct* YBCStatus;

bool YBCStatusIsNotFound(YBCStatus s);
Expand Down
17 changes: 11 additions & 6 deletions src/yb/yql/pgwrapper/pg_analyze-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "yb/yql/pgwrapper/pg_mini_test_base.h"

DECLARE_int32(ysql_docdb_blocks_sampling_method);
DECLARE_int32(ysql_sampling_algorithm);

DECLARE_int64(db_block_size_bytes);
DECLARE_int64(db_write_buffer_size);
Expand Down Expand Up @@ -182,6 +181,14 @@ size_t EstimateDistinct(size_t d, size_t k, size_t n) {
return d * (1 - pow(1 - 1.0 * n / d / k, k));
}

std::string GetYbSamplingAlgorithm(YsqlSamplingAlgorithm algorithm) {
switch (algorithm) {
case YsqlSamplingAlgorithm::FULL_TABLE_SCAN: return "full_table_scan";
case YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING: return "block_based_sampling";
}
FATAL_INVALID_PB_ENUM_VALUE(YsqlSamplingAlgorithm, algorithm);
}

} // namespace

class PgAnalyzeTest : public PgMiniTestBase {
Expand Down Expand Up @@ -335,7 +342,8 @@ TEST_F(PgAnalyzeTest, AnalyzeSamplingColocated) {
}

for (const auto ysql_sampling_algorithm : GetAllPbEnumValues<YsqlSamplingAlgorithm>()) {
ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_sampling_algorithm) = ysql_sampling_algorithm;
ASSERT_OK(conn.ExecuteFormat(
"SET yb_sampling_algorithm = $0", GetYbSamplingAlgorithm(ysql_sampling_algorithm)));

std::vector<DocDbBlocksSamplingMethod> blocks_sampling_methods;
if (ysql_sampling_algorithm == YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING) {
Expand All @@ -350,9 +358,6 @@ TEST_F(PgAnalyzeTest, AnalyzeSamplingColocated) {
ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_docdb_blocks_sampling_method) =
blocks_sampling_method;

ASSERT_OK(RestartPostgres());
conn = ASSERT_RESULT(ConnectToDB(kColocatedDatabaseName));

const auto num_distinct_tolerace = kNumDistinctTolerance[blocks_sampling_method];
const auto null_frac_tolerance = kNullFracTolerance[blocks_sampling_method];
const auto estimated_total_rows_accuracy =
Expand Down Expand Up @@ -414,7 +419,7 @@ TEST_F(PgAnalyzeTest, AnalyzeSamplingColocated) {
ASSERT_GT(correlation, - 1 - kEps);
// YsqlSamplingAlgorithm::FULL_TABLE_SCAN calculates correlation incorrectly as of
// 2024-12-12, so skip it.
if (FLAGS_ysql_sampling_algorithm != YsqlSamplingAlgorithm::FULL_TABLE_SCAN) {
if (ysql_sampling_algorithm != YsqlSamplingAlgorithm::FULL_TABLE_SCAN) {
if (column_name == "k" || column_name == "v" || column_name == "v_d") {
// These column values are in the scan order.
ASSERT_GT(correlation, 1 - kEps);
Expand Down
9 changes: 9 additions & 0 deletions src/yb/yql/pgwrapper/pg_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_replication_commands, kLocalPersiste
DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_replica_identity, kLocalPersisted, false, true,
"Enable replica identity command for Alter Table query");

DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_allow_block_based_sampling_algorithm,
kLocalVolatile, false, true, "Allow YsqlSamplingAlgorithm::BLOCK_BASED_SAMPLING");

DEFINE_RUNTIME_PG_FLAG(
string, yb_default_replica_identity, "CHANGE",
"The default replica identity to be assigned to user defined tables at the time of creation. "
Expand Down Expand Up @@ -307,6 +310,12 @@ DEFINE_RUNTIME_PG_FLAG(bool, yb_enable_fkey_catcache, true,
DEFINE_RUNTIME_PG_FLAG(bool, yb_enable_nop_alter_role_optimization, true,
"Enable nop alter role statement optimization.");

DEFINE_RUNTIME_PG_FLAG(string, yb_sampling_algorithm,
"block_based_sampling",
"Which sampling algorithm to use for YSQL. full_table_scan - scan the whole table and pick "
"random rows, block_based_sampling - sample the table for a set of blocks, then scan selected "
"blocks to form a final rows sample.");

DEFINE_validator(ysql_yb_xcluster_consistency_level, FLAG_IN_SET_VALIDATOR("database", "tablet"));

DEFINE_NON_RUNTIME_string(ysql_conn_mgr_warmup_db, "yugabyte",
Expand Down

0 comments on commit 66fd3f9

Please sign in to comment.