Skip to content

Commit

Permalink
planner: Choose index with statistics vs one without (#58593)
Browse files Browse the repository at this point in the history
close #46375
  • Loading branch information
terry1purcell authored Jan 3, 2025
1 parent e18ca24 commit 4721bc3
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
shard_count = 29,
shard_count = 30,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
22 changes: 22 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,28 @@ func TestEstimationForUnknownValuesAfterModify(t *testing.T) {
require.Truef(t, count > 20, "expected: between 20 to 40, got: %v", count)
}

func TestNewIndexWithoutStats(t *testing.T) {
store, _ := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, c int, index idxa(a))")
testKit.MustExec("set @@tidb_analyze_version=2")
testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'")
testKit.MustExec("insert into t values (1, 1, 1)")
testKit.MustExec("insert into t select mod(a,250), mod(a,10), mod(a,100) from (with recursive x as (select 1 as a union all select a + 1 AS a from x where a < 500) select a from x) as subquery")
testKit.MustExec("analyze table t")
testKit.MustExec("create index idxb on t(b)")
// Create index after ANALYZE. SkyLine pruning should ensure that idxa is chosen because it has statistics
testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxa(a)")
testKit.MustExec("analyze table t")
// idxa should still win after statistics
testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxa(a)")
testKit.MustExec("create index idxab on t(a, b)")
// New index idxab should win due to having the most matching equal predicates - regardless of no statistics
testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxab(a, b)")
}

func TestEstimationUniqueKeyEqualConds(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down
28 changes: 26 additions & 2 deletions pkg/planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -711,13 +711,37 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int {

// compareCandidates is the core of skyline pruning, which is used to decide which candidate path is better.
// The return value is 1 if lhs is better, -1 if rhs is better, 0 if they are equivalent or not comparable.
func compareCandidates(sctx base.PlanContext, prop *property.PhysicalProperty, lhs, rhs *candidatePath) int {
func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *property.PhysicalProperty, lhs, rhs *candidatePath) int {
// Due to #50125, full scan on MVIndex has been disabled, so MVIndex path might lead to 'can't find a proper plan' error at the end.
// Avoid MVIndex path to exclude all other paths and leading to 'can't find a proper plan' error, see #49438 for an example.
if isMVIndexPath(lhs.path) || isMVIndexPath(rhs.path) {
return 0
}

// If one index has statistics and the other does not, choose the index with statistics if it
// has the same or higher number of equal/IN predicates.
lhsHasStatistics := statsTbl.Pseudo
if statsTbl != nil && lhs.path.Index != nil {
lhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(lhs.path.Index.ID, true)
}
rhsHasStatistics := statsTbl.Pseudo
if statsTbl != nil && rhs.path.Index != nil {
rhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true)
}
if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
(!lhsHasStatistics || !rhsHasStatistics) && // At least one index doesn't have statistics
len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge due to unreliability
lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
if lhsHasStatistics && lhsTotalEqual > 0 && lhsTotalEqual >= rhsTotalEqual {
return 1
}
if rhsHasStatistics && rhsTotalEqual > 0 && rhsTotalEqual >= lhsTotalEqual {
return -1
}
}

// This rule is empirical but not always correct.
// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
Expand Down Expand Up @@ -1140,7 +1164,7 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
if candidates[i].path.StoreType == kv.TiFlash {
continue
}
result := compareCandidates(ds.SCtx(), prop, candidates[i], currentCandidate)
result := compareCandidates(ds.SCtx(), ds.StatisticTable, prop, candidates[i], currentCandidate)
if result == 1 {
pruned = true
// We can break here because the current candidate cannot prune others anymore.
Expand Down

0 comments on commit 4721bc3

Please sign in to comment.