diff --git a/pkg/planner/cardinality/BUILD.bazel b/pkg/planner/cardinality/BUILD.bazel index e478223ec4334..bebf6b8dccfd2 100644 --- a/pkg/planner/cardinality/BUILD.bazel +++ b/pkg/planner/cardinality/BUILD.bazel @@ -59,7 +59,7 @@ go_test( data = glob(["testdata/**"]), embed = [":cardinality"], flaky = True, - shard_count = 29, + shard_count = 30, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index 0d425f7251bed..b09f1010a618d 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -341,6 +341,28 @@ func TestEstimationForUnknownValuesAfterModify(t *testing.T) { require.Truef(t, count > 20, "expected: between 20 to 40, got: %v", count) } +func TestNewIndexWithoutStats(t *testing.T) { + store, _ := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, b int, c int, index idxa(a))") + testKit.MustExec("set @@tidb_analyze_version=2") + testKit.MustExec("set @@global.tidb_enable_auto_analyze='OFF'") + testKit.MustExec("insert into t values (1, 1, 1)") + testKit.MustExec("insert into t select mod(a,250), mod(a,10), mod(a,100) from (with recursive x as (select 1 as a union all select a + 1 AS a from x where a < 500) select a from x) as subquery") + testKit.MustExec("analyze table t") + testKit.MustExec("create index idxb on t(b)") + // Create index after ANALYZE. SkyLine pruning should ensure that idxa is chosen because it has statistics + testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxa(a)") + testKit.MustExec("analyze table t") + // idxa should still win after statistics + testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxa(a)") + testKit.MustExec("create index idxab on t(a, b)") + // New index idxab should win due to having the most matching equal predicates - regardless of no statistics + testKit.MustQuery("explain format='brief' select * from t where a = 5 and b = 5").CheckContain("idxab(a, b)") +} + func TestEstimationUniqueKeyEqualConds(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) testKit := testkit.NewTestKit(t, store) diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index e4a1056bfde2f..ac93c82bdb5f4 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -711,13 +711,37 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int { // compareCandidates is the core of skyline pruning, which is used to decide which candidate path is better. // The return value is 1 if lhs is better, -1 if rhs is better, 0 if they are equivalent or not comparable. -func compareCandidates(sctx base.PlanContext, prop *property.PhysicalProperty, lhs, rhs *candidatePath) int { +func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *property.PhysicalProperty, lhs, rhs *candidatePath) int { // Due to #50125, full scan on MVIndex has been disabled, so MVIndex path might lead to 'can't find a proper plan' error at the end. // Avoid MVIndex path to exclude all other paths and leading to 'can't find a proper plan' error, see #49438 for an example. if isMVIndexPath(lhs.path) || isMVIndexPath(rhs.path) { return 0 } + // If one index has statistics and the other does not, choose the index with statistics if it + // has the same or higher number of equal/IN predicates. + lhsHasStatistics := statsTbl.Pseudo + if statsTbl != nil && lhs.path.Index != nil { + lhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(lhs.path.Index.ID, true) + } + rhsHasStatistics := statsTbl.Pseudo + if statsTbl != nil && rhs.path.Index != nil { + rhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true) + } + if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan + (lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics + (!lhsHasStatistics || !rhsHasStatistics) && // At least one index doesn't have statistics + len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge due to unreliability + lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount + rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount + if lhsHasStatistics && lhsTotalEqual > 0 && lhsTotalEqual >= rhsTotalEqual { + return 1 + } + if rhsHasStatistics && rhsTotalEqual > 0 && rhsTotalEqual >= lhsTotalEqual { + return -1 + } + } + // This rule is empirical but not always correct. // If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better. if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10 @@ -1140,7 +1164,7 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [ if candidates[i].path.StoreType == kv.TiFlash { continue } - result := compareCandidates(ds.SCtx(), prop, candidates[i], currentCandidate) + result := compareCandidates(ds.SCtx(), ds.StatisticTable, prop, candidates[i], currentCandidate) if result == 1 { pruned = true // We can break here because the current candidate cannot prune others anymore.