Skip to content

Commit

Permalink
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228) (#8368)
Browse files Browse the repository at this point in the history
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228)

* Fix UT due to ClickHouse/ClickHouse#73422

---------

Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
  • Loading branch information
3 people authored Dec 28, 2024
1 parent 74cc9d7 commit 49f6657
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,14 @@ class CHListenerApi extends ListenerApi with Logging {
"local_engine.settings.log_processors_profiles" -> "true")
conf.setCHSettings("spark_version", SPARK_VERSION)
// add memory limit for external sort
val externalSortKey = CHConf.runtimeSettings("max_bytes_before_external_sort")
if (conf.getLong(externalSortKey, -1) < 0) {
if (conf.getLong(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, -1) < 0) {
if (conf.getBoolean("spark.memory.offHeap.enabled", defaultValue = false)) {
val memSize = JavaUtils.byteStringAsBytes(conf.get("spark.memory.offHeap.size"))
if (memSize > 0L) {
val cores = conf.getInt("spark.executor.cores", 1).toLong
val sortMemLimit = ((memSize / cores) * 0.8).toLong
logDebug(s"max memory for sorting: $sortMemLimit")
conf.set(externalSortKey, sortMemLimit.toString)
conf.set(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, sortMemLimit.toString)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ object RuntimeSettings {
.doc("https://clickhouse.com/docs/en/operations/settings/settings#min_insert_block_size_rows")
.longConf
.createWithDefault(1048449)

val MAX_BYTES_BEFORE_EXTERNAL_SORT =
buildConf(runtimeSettings("max_bytes_before_external_sort"))
.doc("https://clickhouse.com/docs/en/operations/settings/query-complexity#settings-max_bytes_before_external_sort")
.longConf
.createWithDefault(0)
// scalastyle:on line.size.limit

/** Gluten Configuration */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.gluten.execution.mergetree

import org.apache.gluten.GlutenConfig
import org.apache.gluten.backendsapi.clickhouse.CHConf
import org.apache.gluten.backendsapi.clickhouse.{CHConf, RuntimeSettings}
import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite

import org.apache.spark.SparkConf
Expand Down Expand Up @@ -53,7 +53,7 @@ class GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite

test("GLUTEN-6470: Fix Task not serializable error when inserting mergetree data") {

val externalSortKey = CHConf.runtimeSettings("max_bytes_before_external_sort")
val externalSortKey = RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key
assertResult(3435973836L)(spark.conf.get(externalSortKey).toLong)

spark.sql(s"""
Expand Down
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20241224
CH_COMMIT=b38537577c5
CH_BRANCH=rebase_ch/20241228
CH_COMMIT=bf8e58b57e9
5 changes: 5 additions & 0 deletions cpp-ch/local-engine/Common/CHUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ namespace Setting
{
extern const SettingsUInt64 prefer_external_sort_block_bytes;
extern const SettingsUInt64 max_bytes_before_external_sort;
extern const SettingsDouble max_bytes_ratio_before_external_sort;
extern const SettingsBool query_plan_merge_filters;
extern const SettingsBool compile_expressions;
extern const SettingsShortCircuitFunctionEvaluation short_circuit_function_evaluation;
Expand Down Expand Up @@ -644,6 +645,10 @@ void BackendInitializerUtil::initSettings(const SparkConfigs::ConfigMap & spark_
settings[Setting::short_circuit_function_evaluation] = ShortCircuitFunctionEvaluation::DISABLE;
///

// After https://github.com/ClickHouse/ClickHouse/pull/73422
// Since we already set max_bytes_before_external_sort, set max_bytes_ratio_before_external_sort to 0
settings[Setting::max_bytes_ratio_before_external_sort] = 0.;

for (const auto & [key, value] : spark_conf_map)
{
// Firstly apply spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.* to settings
Expand Down

0 comments on commit 49f6657

Please sign in to comment.