From 3bf870a87e8dd943fe9407dce277e20ae1718d6c Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Tue, 29 Oct 2024 19:28:28 +0000 Subject: [PATCH 1/9] kvserver/rangefeed: add metric for processor-level send timeout Epic: none Release note: None --- docs/generated/metrics/metrics.html | 1 + pkg/kv/kvserver/rangefeed/metrics.go | 8 ++++++++ pkg/kv/kvserver/rangefeed/scheduled_processor.go | 2 ++ 3 files changed, 11 insertions(+) diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html index fe659599b919..2f3c69bd0e73 100644 --- a/docs/generated/metrics/metrics.html +++ b/docs/generated/metrics/metrics.html @@ -250,6 +250,7 @@ STORAGEkv.rangefeed.processors_goroutineNumber of active RangeFeed processors using goroutinesProcessorsGAUGECOUNTAVGNONE STORAGEkv.rangefeed.processors_schedulerNumber of active RangeFeed processors using schedulerProcessorsGAUGECOUNTAVGNONE STORAGEkv.rangefeed.registrationsNumber of active RangeFeed registrationsRegistrationsGAUGECOUNTAVGNONE +STORAGEkv.rangefeed.scheduled_processor.queue_timeoutNumber of times the RangeFeed processor shutdown because of a queue send timeoutFailure CountCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE STORAGEkv.rangefeed.scheduler.normal.latencyKV RangeFeed normal scheduler latencyLatencyHISTOGRAMNANOSECONDSAVGNONE STORAGEkv.rangefeed.scheduler.normal.queue_sizeNumber of entries in the KV RangeFeed normal scheduler queuePending RangesGAUGECOUNTAVGNONE STORAGEkv.rangefeed.scheduler.system.latencyKV RangeFeed system scheduler latencyLatencyHISTOGRAMNANOSECONDSAVGNONE diff --git a/pkg/kv/kvserver/rangefeed/metrics.go b/pkg/kv/kvserver/rangefeed/metrics.go index aaec026effd2..a8d7b2912f87 100644 --- a/pkg/kv/kvserver/rangefeed/metrics.go +++ b/pkg/kv/kvserver/rangefeed/metrics.go @@ -76,12 +76,19 @@ var ( Measurement: "Pending Ranges", Unit: metric.Unit_COUNT, } + metaQueueTimeout = metric.Metadata{ + Name: "kv.rangefeed.scheduled_processor.queue_timeout", + Help: "Number of times the RangeFeed processor shutdown because of a queue send timeout", + Measurement: "Failure Count", + Unit: metric.Unit_COUNT, + } ) // Metrics are for production monitoring of RangeFeeds. type Metrics struct { RangeFeedCatchUpScanNanos *metric.Counter RangeFeedBudgetExhausted *metric.Counter + RangefeedProcessorQueueTimeout *metric.Counter RangeFeedBudgetBlocked *metric.Counter RangeFeedRegistrations *metric.Gauge RangeFeedClosedTimestampMaxBehindNanos *metric.Gauge @@ -106,6 +113,7 @@ func (*Metrics) MetricStruct() {} func NewMetrics() *Metrics { return &Metrics{ RangeFeedCatchUpScanNanos: metric.NewCounter(metaRangeFeedCatchUpScanNanos), + RangefeedProcessorQueueTimeout: metric.NewCounter(metaQueueTimeout), RangeFeedBudgetExhausted: metric.NewCounter(metaRangeFeedExhausted), RangeFeedBudgetBlocked: metric.NewCounter(metaRangeFeedBudgetBlocked), RangeFeedRegistrations: metric.NewGauge(metaRangeFeedRegistrations), diff --git a/pkg/kv/kvserver/rangefeed/scheduled_processor.go b/pkg/kv/kvserver/rangefeed/scheduled_processor.go index 6bca9a20f38c..a2f80bcdce2b 100644 --- a/pkg/kv/kvserver/rangefeed/scheduled_processor.go +++ b/pkg/kv/kvserver/rangefeed/scheduled_processor.go @@ -499,6 +499,7 @@ func (p *ScheduledProcessor) enqueueEventInternal( case <-p.stoppedC: // Already stopped. Do nothing. case <-ctx.Done(): + p.Metrics.RangefeedProcessorQueueTimeout.Inc(1) p.sendStop(newErrBufferCapacityExceeded()) return false } @@ -528,6 +529,7 @@ func (p *ScheduledProcessor) enqueueEventInternal( case <-ctx.Done(): // Sending on the eventC channel would have blocked. // Instead, tear down the processor and return immediately. + p.Metrics.RangefeedProcessorQueueTimeout.Inc(1) p.sendStop(newErrBufferCapacityExceeded()) return false } From aa453b8c361829df46a4b35faf40fbb3231155ef Mon Sep 17 00:00:00 2001 From: Ricky Stewart Date: Wed, 30 Oct 2024 11:09:31 -0500 Subject: [PATCH 2/9] bazel-github-helper: sort list of failed tests in summary Epic: CRDB-17171 Release note: None Release justification: Non-production code changes --- pkg/cmd/bazci/bazel-github-helper/main.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/cmd/bazci/bazel-github-helper/main.go b/pkg/cmd/bazci/bazel-github-helper/main.go index 9e8fd5c77459..495b878bf9d3 100644 --- a/pkg/cmd/bazci/bazel-github-helper/main.go +++ b/pkg/cmd/bazci/bazel-github-helper/main.go @@ -24,6 +24,7 @@ import ( "fmt" "os" "os/exec" + "sort" "strings" "github.com/cockroachdb/cockroach/pkg/build/engflow" @@ -182,6 +183,18 @@ func dumpSummary(f *os.File, invocation *engflow.InvocationInfo) error { } } + sort.Slice(failedTests, func(i, j int) bool { + t1 := failedTests[i] + t2 := failedTests[2] + if t1.label < t2.label { + return true + } else if t1.label == t2.label { + return t1.name < t2.name + } else { + return false + } + }) + if len(failedTests) != 0 { _, err := f.WriteString(`| Label | TestName | Status | Link | | --- | --- | --- | --- | From b656fb88889da37db2dfe5cf9d44108c8cdfc468 Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Wed, 30 Oct 2024 12:18:27 -0400 Subject: [PATCH 3/9] scbuild: version gate SetZoneConfig with isV243Active New support was added in 24.3 in `SetZoneConfig` for subzone configs. Since this work was a significant change, we should gate `SetZoneConfig` to minimize the risk of compatibility issues with earlier versions. Epic: none Release note: None --- pkg/sql/logictest/testdata/logic_test/zone_config | 2 ++ pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/sql/logictest/testdata/logic_test/zone_config b/pkg/sql/logictest/testdata/logic_test/zone_config index 1c93a2b21d64..3ed4ca478716 100644 --- a/pkg/sql/logictest/testdata/logic_test/zone_config +++ b/pkg/sql/logictest/testdata/logic_test/zone_config @@ -308,11 +308,13 @@ statement ok SET CLUSTER SETTING sql.schema.force_declarative_statements = "+CONFIGURE ZONE" skipif config local-mixed-24.1 +skipif config local-mixed-24.2 skipif config local-legacy-schema-changer statement error pq: pg_type is a system catalog ALTER TABLE pg_catalog.pg_type CONFIGURE ZONE USING gc.ttlseconds = 100000 skipif config local-mixed-24.1 +skipif config local-mixed-24.2 skipif config local-legacy-schema-changer statement error pq: columns is a virtual object and cannot be modified ALTER TABLE information_schema.columns CONFIGURE ZONE USING gc.ttlseconds = 100000 diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go index 869fcb373cf0..a1fcf977c337 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go @@ -63,7 +63,7 @@ var supportedStatements = map[reflect.Type]supportedStatement{ reflect.TypeOf((*tree.CreateSchema)(nil)): {fn: CreateSchema, statementTags: []string{tree.CreateSchemaTag}, on: true, checks: nil}, reflect.TypeOf((*tree.CreateSequence)(nil)): {fn: CreateSequence, statementTags: []string{tree.CreateSequenceTag}, on: true, checks: isV241Active}, reflect.TypeOf((*tree.CreateDatabase)(nil)): {fn: CreateDatabase, statementTags: []string{tree.CreateDatabaseTag}, on: true, checks: isV241Active}, - reflect.TypeOf((*tree.SetZoneConfig)(nil)): {fn: SetZoneConfig, statementTags: []string{tree.ConfigureZoneTag}, on: false, checks: isV242Active}, + reflect.TypeOf((*tree.SetZoneConfig)(nil)): {fn: SetZoneConfig, statementTags: []string{tree.ConfigureZoneTag}, on: false, checks: isV243Active}, reflect.TypeOf((*tree.CreateTrigger)(nil)): {fn: CreateTrigger, statementTags: []string{tree.CreateTriggerTag}, on: true, checks: isV243Active}, reflect.TypeOf((*tree.DropTrigger)(nil)): {fn: DropTrigger, statementTags: []string{tree.DropTriggerTag}, on: true, checks: isV243Active}, } From 2ec0a43e37715e6b7e8bc318acb8e9ee6cddf004 Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Tue, 22 Oct 2024 10:44:40 -0400 Subject: [PATCH 4/9] schemachanger: re-enable dsc zone config Since 24.3 has been cut, we can turn zone configs on in the declarative schema changer by default. Epic: none Release note: None --- pkg/sql/logictest/testdata/logic_test/zone_config | 7 ------- .../schemachanger/scbuild/internal/scbuildstmt/process.go | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/zone_config b/pkg/sql/logictest/testdata/logic_test/zone_config index 3ed4ca478716..713868bd3e54 100644 --- a/pkg/sql/logictest/testdata/logic_test/zone_config +++ b/pkg/sql/logictest/testdata/logic_test/zone_config @@ -303,10 +303,6 @@ onlyif config local-legacy-schema-changer statement error pq: user root does not have CREATE or ZONECONFIG privilege on relation columns ALTER TABLE information_schema.columns CONFIGURE ZONE USING gc.ttlseconds = 100000 -# TODO(annie): remove this override once CONFIGURE ZONE is enabled by default in the DSC -statement ok -SET CLUSTER SETTING sql.schema.force_declarative_statements = "+CONFIGURE ZONE" - skipif config local-mixed-24.1 skipif config local-mixed-24.2 skipif config local-legacy-schema-changer @@ -319,9 +315,6 @@ skipif config local-legacy-schema-changer statement error pq: columns is a virtual object and cannot be modified ALTER TABLE information_schema.columns CONFIGURE ZONE USING gc.ttlseconds = 100000 -statement ok -RESET CLUSTER SETTING sql.schema.force_declarative_statements - statement ok CREATE TABLE roachie(i int) diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go index a1fcf977c337..1a35d847d2ee 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/process.go @@ -63,7 +63,7 @@ var supportedStatements = map[reflect.Type]supportedStatement{ reflect.TypeOf((*tree.CreateSchema)(nil)): {fn: CreateSchema, statementTags: []string{tree.CreateSchemaTag}, on: true, checks: nil}, reflect.TypeOf((*tree.CreateSequence)(nil)): {fn: CreateSequence, statementTags: []string{tree.CreateSequenceTag}, on: true, checks: isV241Active}, reflect.TypeOf((*tree.CreateDatabase)(nil)): {fn: CreateDatabase, statementTags: []string{tree.CreateDatabaseTag}, on: true, checks: isV241Active}, - reflect.TypeOf((*tree.SetZoneConfig)(nil)): {fn: SetZoneConfig, statementTags: []string{tree.ConfigureZoneTag}, on: false, checks: isV243Active}, + reflect.TypeOf((*tree.SetZoneConfig)(nil)): {fn: SetZoneConfig, statementTags: []string{tree.ConfigureZoneTag}, on: true, checks: isV243Active}, reflect.TypeOf((*tree.CreateTrigger)(nil)): {fn: CreateTrigger, statementTags: []string{tree.CreateTriggerTag}, on: true, checks: isV243Active}, reflect.TypeOf((*tree.DropTrigger)(nil)): {fn: DropTrigger, statementTags: []string{tree.DropTriggerTag}, on: true, checks: isV243Active}, } From f0cb7cc3ec7c9884d25f240add8746ff02a23425 Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Tue, 22 Oct 2024 11:43:05 -0400 Subject: [PATCH 5/9] scexec: extend UpdateZoneConfig to allow for deletes This patch extends UpdateZoneConfig to allow for deletes in the system.zones table. Epic: None Release note: None --- .../scexec/exec_immediate_mutation.go | 42 +++++++++++++------ .../scexec/scmutationexec/dependencies.go | 11 +++-- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/pkg/sql/schemachanger/scexec/exec_immediate_mutation.go b/pkg/sql/schemachanger/scexec/exec_immediate_mutation.go index f785f637a34c..49bcf33f06a4 100644 --- a/pkg/sql/schemachanger/scexec/exec_immediate_mutation.go +++ b/pkg/sql/schemachanger/scexec/exec_immediate_mutation.go @@ -28,7 +28,8 @@ type immediateState struct { withReset bool sequencesToInit []sequenceToInit temporarySchemasToRegister map[descpb.ID]*temporarySchemaToRegister - zoneConfigsToUpdate []zoneConfigToUpdate + modifiedZoneConfigs []zoneConfigToUpsert + zoneConfigsToDelete []zoneConfigToDelete } type temporarySchemaToRegister struct { @@ -48,16 +49,20 @@ type sequenceToInit struct { startVal int64 } -// zoneConfigToUpdate is a struct that holds the information needed to update a +// zoneConfigToUpsert is a struct that holds the information needed to update a // zone config or subzone configs. If zc is subzone config, then we treat this // as a subzone write and update the subzone configs (along with their subzone // spans for the table). Otherwise, we write the whole zone config as an update. -type zoneConfigToUpdate struct { +type zoneConfigToUpsert struct { id descpb.ID zc *zonepb.ZoneConfig isSubzoneConfig bool } +type zoneConfigToDelete struct { + id descpb.ID +} + var _ scmutationexec.ImmediateMutationStateUpdater = (*immediateState)(nil) func (s *immediateState) AddToCheckedOutDescriptors(mut catalog.MutableDescriptor) { @@ -137,8 +142,8 @@ func (s *immediateState) InitSequence(id descpb.ID, startVal int64) { } func (s *immediateState) UpdateZoneConfig(id descpb.ID, zc *zonepb.ZoneConfig) { - s.zoneConfigsToUpdate = append(s.zoneConfigsToUpdate, - zoneConfigToUpdate{ + s.modifiedZoneConfigs = append(s.modifiedZoneConfigs, + zoneConfigToUpsert{ id: id, zc: zc, }) @@ -148,14 +153,21 @@ func (s *immediateState) UpdateSubzoneConfig( tableid descpb.ID, subzone zonepb.Subzone, subzoneSpans []zonepb.SubzoneSpan, ) { zc := &zonepb.ZoneConfig{Subzones: []zonepb.Subzone{subzone}, SubzoneSpans: subzoneSpans} - s.zoneConfigsToUpdate = append(s.zoneConfigsToUpdate, - zoneConfigToUpdate{ + s.modifiedZoneConfigs = append(s.modifiedZoneConfigs, + zoneConfigToUpsert{ id: tableid, zc: zc, isSubzoneConfig: true, }) } +func (s *immediateState) DeleteZoneConfig(id descpb.ID) { + s.zoneConfigsToDelete = append(s.zoneConfigsToDelete, + zoneConfigToDelete{ + id: id, + }) +} + func (s *immediateState) Reset() { s.withReset = true } @@ -222,20 +234,26 @@ func (s *immediateState) exec(ctx context.Context, c Catalog) error { c.InsertTemporarySchema(tempIdxToRegister.schemaName, tempIdxToRegister.parentID, tempIdxId) } - for _, zc := range s.zoneConfigsToUpdate { - if zc.isSubzoneConfig { - if err := c.UpdateSubzoneConfig(ctx, zc.id, zc.zc.Subzones, - zc.zc.SubzoneSpans); err != nil { + for _, zcToUpdate := range s.modifiedZoneConfigs { + if zcToUpdate.isSubzoneConfig { + if err := c.UpdateSubzoneConfig(ctx, zcToUpdate.id, zcToUpdate.zc.Subzones, + zcToUpdate.zc.SubzoneSpans); err != nil { return err } } else { - if err := c.UpdateZoneConfig(ctx, zc.id, zc.zc); err != nil { + if err := c.UpdateZoneConfig(ctx, zcToUpdate.id, zcToUpdate.zc); err != nil { return err } } } + for _, zcToDelete := range s.zoneConfigsToDelete { + if err := c.DeleteZoneConfig(ctx, zcToDelete.id); err != nil { + return err + } + } + return c.Validate(ctx) } diff --git a/pkg/sql/schemachanger/scexec/scmutationexec/dependencies.go b/pkg/sql/schemachanger/scexec/scmutationexec/dependencies.go index dafaf8cfec67..45958ab58c55 100644 --- a/pkg/sql/schemachanger/scexec/scmutationexec/dependencies.go +++ b/pkg/sql/schemachanger/scexec/scmutationexec/dependencies.go @@ -78,11 +78,16 @@ type ImmediateMutationStateUpdater interface { // InitSequence initializes a sequence. InitSequence(id descpb.ID, startVal int64) - // UpdateZoneConfig updates a zone config. + // UpdateZoneConfig upserts a zone config. UpdateZoneConfig(id descpb.ID, zc *zonepb.ZoneConfig) - // UpdateSubzoneConfig updates subzone zone configs. - UpdateSubzoneConfig(tableid descpb.ID, subzone zonepb.Subzone, subzoneSpans []zonepb.SubzoneSpan) + // UpdateSubzoneConfig upserts a subzone config. + UpdateSubzoneConfig( + tableid descpb.ID, subzone zonepb.Subzone, subzoneSpans []zonepb.SubzoneSpan, + ) + + // DeleteZoneConfig deletes the zone config for the given ID. + DeleteZoneConfig(id descpb.ID) // Reset schedules a reset of the in-txn catalog state // to undo the modifications from earlier stages. From a59d3168cdda154829ac692a62c2fd2e1ceef25d Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Tue, 22 Oct 2024 12:08:23 -0400 Subject: [PATCH 6/9] scbuildstmt: refactor zoneConfigObjBuilder method This patch refactors the `addZoneConfigToBuildCtx` method of our `zoneConfigObjBuilder` to be less specific; we pull the `b.add` out so that we can decide in `SetZoneConfig` whether to add or drop this element. Epic: none Release note: None --- .../scbuild/internal/scbuildstmt/configure_zone.go | 10 +++++++++- .../internal/scbuildstmt/database_zone_config.go | 6 ++++-- .../scbuild/internal/scbuildstmt/index_zone_config.go | 5 +---- .../internal/scbuildstmt/partition_zone_config.go | 5 +---- .../scbuild/internal/scbuildstmt/table_zone_config.go | 5 +---- .../internal/scbuildstmt/zone_config_helpers.go | 8 +++++--- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go index 9109a9ab366b..07f04519be98 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go @@ -81,7 +81,15 @@ func SetZoneConfig(b BuildCtx, n *tree.SetZoneConfig) { resolvePhysicalTableName(b, n) } - elem := zco.addZoneConfigToBuildCtx(b) + var elem scpb.Element + if n.Discard { + elem = zco.getZoneConfigElem(b) + b.Drop(elem) + } else { + zco.incrementSeqNum() + elem = zco.getZoneConfigElem(b) + b.Add(elem) + } // Log event for auditing eventDetails := eventpb.CommonZoneConfigDetails{ diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go index a9e633936459..d60b9db013e6 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go @@ -27,14 +27,16 @@ type databaseZoneConfigObj struct { var _ zoneConfigObject = &databaseZoneConfigObj{} -func (dzo *databaseZoneConfigObj) addZoneConfigToBuildCtx(b BuildCtx) scpb.Element { +func (dzo *databaseZoneConfigObj) incrementSeqNum() { dzo.seqNum += 1 +} + +func (dzo *databaseZoneConfigObj) getZoneConfigElem(b BuildCtx) scpb.Element { elem := &scpb.DatabaseZoneConfig{ DatabaseID: dzo.databaseID, ZoneConfig: dzo.zoneConfig, SeqNum: dzo.seqNum, } - b.Add(elem) return elem } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go index 73b51df7ee32..14ffd6937b2b 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go @@ -22,7 +22,6 @@ type indexZoneConfigObj struct { tableZoneConfigObj indexID catid.IndexID indexSubzone *zonepb.Subzone - seqNum uint32 } var _ zoneConfigObject = &indexZoneConfigObj{} @@ -31,8 +30,7 @@ func (izo *indexZoneConfigObj) getTableZoneConfig() *zonepb.ZoneConfig { return izo.tableZoneConfigObj.zoneConfig } -func (izo *indexZoneConfigObj) addZoneConfigToBuildCtx(b BuildCtx) scpb.Element { - izo.seqNum += 1 +func (izo *indexZoneConfigObj) getZoneConfigElem(b BuildCtx) scpb.Element { subzones := []zonepb.Subzone{*izo.indexSubzone} // Merge the new subzones with the old subzones so that we can generate @@ -55,7 +53,6 @@ func (izo *indexZoneConfigObj) addZoneConfigToBuildCtx(b BuildCtx) scpb.Element SubzoneSpans: ss, SeqNum: izo.seqNum, } - b.Add(elem) return elem } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go index 5056e8f6e447..36ebe314f9d1 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go @@ -26,7 +26,6 @@ type partitionZoneConfigObj struct { indexZoneConfigObj partitionSubzone *zonepb.Subzone partitionName string - seqNum uint32 } var _ zoneConfigObject = &partitionZoneConfigObj{} @@ -35,8 +34,7 @@ func (pzo *partitionZoneConfigObj) getTableZoneConfig() *zonepb.ZoneConfig { return pzo.tableZoneConfigObj.zoneConfig } -func (pzo *partitionZoneConfigObj) addZoneConfigToBuildCtx(b BuildCtx) scpb.Element { - pzo.seqNum += 1 +func (pzo *partitionZoneConfigObj) getZoneConfigElem(b BuildCtx) scpb.Element { subzones := []zonepb.Subzone{*pzo.partitionSubzone} // Merge the new subzones with the old subzones so that we can generate @@ -60,7 +58,6 @@ func (pzo *partitionZoneConfigObj) addZoneConfigToBuildCtx(b BuildCtx) scpb.Elem SubzoneSpans: ss, SeqNum: pzo.seqNum, } - b.Add(elem) return elem } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go index f001052484ea..70ab518bf7f2 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go @@ -24,19 +24,16 @@ type tableZoneConfigObj struct { databaseZoneConfigObj tableID catid.DescID zoneConfig *zonepb.ZoneConfig - seqNum uint32 } var _ zoneConfigObject = &tableZoneConfigObj{} -func (tzo *tableZoneConfigObj) addZoneConfigToBuildCtx(b BuildCtx) scpb.Element { - tzo.seqNum += 1 +func (tzo *tableZoneConfigObj) getZoneConfigElem(b BuildCtx) scpb.Element { elem := &scpb.TableZoneConfig{ TableID: tzo.tableID, ZoneConfig: tzo.zoneConfig, SeqNum: tzo.seqNum, } - b.Add(elem) return elem } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go index b7577a2e1522..7caaeb4505ca 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go @@ -52,9 +52,8 @@ type zoneConfigAuthorizer interface { } type zoneConfigObjBuilder interface { - // addZoneConfigToBuildCtx adds the zone config to the build context and - // returns the added element for logging. - addZoneConfigToBuildCtx(b BuildCtx) scpb.Element + // getZoneConfigElem retrieves the scpb.Element for the zone config object. + getZoneConfigElem(b BuildCtx) scpb.Element // getTargetID returns the target ID of the zone config object. This is either // a database or a table ID. @@ -72,6 +71,9 @@ type zoneConfigObjBuilder interface { // setZoneConfigToWrite fills our object with the zone config/subzone config // we will be writing to KV. setZoneConfigToWrite(zone *zonepb.ZoneConfig) + + // incrementSeqNum increments the seqNum by 1. + incrementSeqNum() } type zoneConfigRetriever interface { From 7aba6869d0436a02dac567170a1ca893b2aac920 Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Wed, 23 Oct 2024 11:30:30 -0400 Subject: [PATCH 7/9] schemachanger: add support discarding database/table zc This patch supports discarding a database/table zone config in the declarative schema changer. Informs: #133157 Release note: None --- .../backup_base_generated_test.go | 28 +++++++ .../logictest/testdata/logic_test/event_log | 6 +- pkg/sql/schemachanger/scbuild/event_log.go | 24 ++++-- .../internal/scbuildstmt/configure_zone.go | 27 ++++-- .../scbuildstmt/database_zone_config.go | 6 +- .../internal/scbuildstmt/index_zone_config.go | 4 + .../scbuildstmt/partition_zone_config.go | 4 + .../internal/scbuildstmt/table_zone_config.go | 6 +- .../scbuildstmt/zone_config_helpers.go | 83 ++++++++++++++++++- .../scexec/scmutationexec/BUILD.bazel | 1 + .../scexec/scmutationexec/zone_config.go | 22 +++++ .../schemachanger/scop/immediate_mutation.go | 7 ++ .../immediate_mutation_visitor_generated.go | 6 ++ .../schemachanger/scpb/element_collection.go | 12 +++ .../opgen/opgen_database_zone_config.go | 7 +- .../internal/opgen/opgen_table_zone_config.go | 7 +- .../schemachanger/sctest_generated_test.go | 42 ++++++++++ ...database_configure_zone_discard.definition | 8 ++ ...tabase_configure_zone_discard.side_effects | 46 ++++++++++ ...ure_zone_discard__statement_1_of_2.explain | 24 ++++++ ...ne_discard__statement_1_of_2.explain_shape | 8 ++ ...ure_zone_discard__statement_2_of_2.explain | 18 ++++ ...ne_discard__statement_2_of_2.explain_shape | 9 ++ 23 files changed, 384 insertions(+), 21 deletions(-) create mode 100644 pkg/sql/schemachanger/scexec/scmutationexec/zone_config.go create mode 100644 pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.definition create mode 100644 pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.side_effects create mode 100644 pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain create mode 100644 pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain_shape create mode 100644 pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain create mode 100644 pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain_shape diff --git a/pkg/ccl/schemachangerccl/backup_base_generated_test.go b/pkg/ccl/schemachangerccl/backup_base_generated_test.go index 0627cfa8bd93..a776a510f974 100644 --- a/pkg/ccl/schemachangerccl/backup_base_generated_test.go +++ b/pkg/ccl/schemachangerccl/backup_base_generated_test.go @@ -85,6 +85,13 @@ func TestBackupRollbacks_base_alter_database_configure_zone(t *testing.T) { sctest.BackupRollbacks(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestBackupRollbacks_base_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.BackupRollbacks(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestBackupRollbacks_base_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -610,6 +617,13 @@ func TestBackupRollbacksMixedVersion_base_alter_database_configure_zone(t *testi sctest.BackupRollbacksMixedVersion(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestBackupRollbacksMixedVersion_base_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.BackupRollbacksMixedVersion(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestBackupRollbacksMixedVersion_base_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1135,6 +1149,13 @@ func TestBackupSuccess_base_alter_database_configure_zone(t *testing.T) { sctest.BackupSuccess(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestBackupSuccess_base_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.BackupSuccess(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestBackupSuccess_base_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1660,6 +1681,13 @@ func TestBackupSuccessMixedVersion_base_alter_database_configure_zone(t *testing sctest.BackupSuccessMixedVersion(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestBackupSuccessMixedVersion_base_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.BackupSuccessMixedVersion(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestBackupSuccessMixedVersion_base_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/sql/logictest/testdata/logic_test/event_log b/pkg/sql/logictest/testdata/logic_test/event_log index 6a8edce05648..73ec261362a3 100644 --- a/pkg/sql/logictest/testdata/logic_test/event_log +++ b/pkg/sql/logictest/testdata/logic_test/event_log @@ -551,13 +551,13 @@ ORDER BY "timestamp", info skipif config 3node-tenant-default-configs query IT -SELECT "reportingID", "info"::JSONB - 'Timestamp' - 'DescriptorID' +SELECT "reportingID", "info"::JSONB - 'Timestamp' - 'DescriptorID' - 'Statement' FROM system.eventlog WHERE "eventType" = 'remove_zone_config' ORDER BY "timestamp", info ---- -1 {"EventType": "remove_zone_config", "Statement": "ALTER DATABASE test CONFIGURE ZONE DISCARD", "Tag": "CONFIGURE ZONE", "Target": "DATABASE test", "User": "root"} -1 {"EventType": "remove_zone_config", "Statement": "ALTER TABLE \"\".\"\".a CONFIGURE ZONE DISCARD", "Tag": "CONFIGURE ZONE", "Target": "TABLE test.public.a", "User": "root"} +1 {"EventType": "remove_zone_config", "Tag": "CONFIGURE ZONE", "Target": "DATABASE test", "User": "root"} +1 {"EventType": "remove_zone_config", "Tag": "CONFIGURE ZONE", "Target": "TABLE test.public.a", "User": "root"} statement ok DROP TABLE a diff --git a/pkg/sql/schemachanger/scbuild/event_log.go b/pkg/sql/schemachanger/scbuild/event_log.go index 6b118396d831..b5108e929fac 100644 --- a/pkg/sql/schemachanger/scbuild/event_log.go +++ b/pkg/sql/schemachanger/scbuild/event_log.go @@ -467,17 +467,31 @@ func (pb payloadBuilder) build(b buildCtx) logpb.EventPayload { var zcDetails eventpb.CommonZoneConfigDetails var oldConfig string if pb.maybePayload != nil { - payload := pb.maybePayload.(*eventpb.SetZoneConfig) - zcDetails = eventpb.CommonZoneConfigDetails{ - Target: payload.Target, - Options: payload.Options, + if payload, ok := pb.maybePayload.(*eventpb.SetZoneConfig); ok { + zcDetails = eventpb.CommonZoneConfigDetails{ + Target: payload.Target, + Options: payload.Options, + } + oldConfig = payload.ResolvedOldConfig } - oldConfig = payload.ResolvedOldConfig } return &eventpb.SetZoneConfig{ CommonZoneConfigDetails: zcDetails, ResolvedOldConfig: oldConfig, } + } else { + var zcDetails eventpb.CommonZoneConfigDetails + if pb.maybePayload != nil { + if payload, ok := pb.maybePayload.(*eventpb.RemoveZoneConfig); ok { + zcDetails = eventpb.CommonZoneConfigDetails{ + Target: payload.Target, + Options: payload.Options, + } + } + } + return &eventpb.RemoveZoneConfig{ + CommonZoneConfigDetails: zcDetails, + } } case *scpb.Trigger: if pb.TargetStatus == scpb.Status_PUBLIC { diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go index 07f04519be98..cc081684973f 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/configure_zone.go @@ -14,6 +14,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sqlclustersettings" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" "github.com/cockroachdb/cockroach/pkg/util/log/eventpb" + "github.com/cockroachdb/cockroach/pkg/util/log/logpb" "github.com/cockroachdb/errors" ) @@ -83,6 +84,11 @@ func SetZoneConfig(b BuildCtx, n *tree.SetZoneConfig) { var elem scpb.Element if n.Discard { + // If we are discarding the zone config and a zone config did not previously + // exist for us to discard, then no-op. + if zco.isNoOp() { + return + } elem = zco.getZoneConfigElem(b) b.Drop(elem) } else { @@ -96,16 +102,18 @@ func SetZoneConfig(b BuildCtx, n *tree.SetZoneConfig) { Target: tree.AsString(&n.ZoneSpecifier), Options: optionsStr, } - info := &eventpb.SetZoneConfig{CommonZoneConfigDetails: eventDetails, - ResolvedOldConfig: oldZone.String()} + + var info logpb.EventPayload + if n.Discard { + info = &eventpb.RemoveZoneConfig{CommonZoneConfigDetails: eventDetails} + } else { + info = &eventpb.SetZoneConfig{CommonZoneConfigDetails: eventDetails, + ResolvedOldConfig: oldZone.String()} + } b.LogEventForExistingPayload(elem, info) } func astToZoneConfigObject(b BuildCtx, n *tree.SetZoneConfig) (zoneConfigObject, error) { - if n.Discard { - return nil, scerrors.NotImplementedErrorf(n, "discarding zone configurations is not "+ - "supported in the DSC") - } zs := n.ZoneSpecifier // We are a database object. if n.Database != "" { @@ -161,6 +169,13 @@ func astToZoneConfigObject(b BuildCtx, n *tree.SetZoneConfig) (zoneConfigObject, return &tzo, nil } + // TODO(annie): remove this when we add support for discarding subzone + // configs. + if n.Discard { + return nil, scerrors.NotImplementedErrorf(n, "discarding zone configurations on "+ + "subzones are not supported in the DSC") + } + izo := indexZoneConfigObj{tableZoneConfigObj: tzo} // We are an index object. Determine the index ID and fill this // information out in our zoneConfigObject. diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go index d60b9db013e6..53573ff840f8 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/database_zone_config.go @@ -31,6 +31,10 @@ func (dzo *databaseZoneConfigObj) incrementSeqNum() { dzo.seqNum += 1 } +func (dzo *databaseZoneConfigObj) isNoOp() bool { + return dzo.zoneConfig == nil +} + func (dzo *databaseZoneConfigObj) getZoneConfigElem(b BuildCtx) scpb.Element { elem := &scpb.DatabaseZoneConfig{ DatabaseID: dzo.databaseID, @@ -89,7 +93,7 @@ func (dzo *databaseZoneConfigObj) checkZoneConfigChangePermittedForMultiRegion( return nil } - return maybeMultiregionErrorWithHint(options) + return maybeMultiregionErrorWithHint(b, dzo, options) } func (dzo *databaseZoneConfigObj) getTargetID() catid.DescID { diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go index 14ffd6937b2b..57ea7ed5a92a 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/index_zone_config.go @@ -26,6 +26,10 @@ type indexZoneConfigObj struct { var _ zoneConfigObject = &indexZoneConfigObj{} +func (izo *indexZoneConfigObj) isNoOp() bool { + return izo.indexSubzone == nil +} + func (izo *indexZoneConfigObj) getTableZoneConfig() *zonepb.ZoneConfig { return izo.tableZoneConfigObj.zoneConfig } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go index 36ebe314f9d1..1e8bd8434acf 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/partition_zone_config.go @@ -30,6 +30,10 @@ type partitionZoneConfigObj struct { var _ zoneConfigObject = &partitionZoneConfigObj{} +func (pzo *partitionZoneConfigObj) isNoOp() bool { + return pzo.partitionSubzone == nil +} + func (pzo *partitionZoneConfigObj) getTableZoneConfig() *zonepb.ZoneConfig { return pzo.tableZoneConfigObj.zoneConfig } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go index 70ab518bf7f2..2efc7537e61c 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/table_zone_config.go @@ -28,6 +28,10 @@ type tableZoneConfigObj struct { var _ zoneConfigObject = &tableZoneConfigObj{} +func (tzo *tableZoneConfigObj) isNoOp() bool { + return tzo.zoneConfig == nil +} + func (tzo *tableZoneConfigObj) getZoneConfigElem(b BuildCtx) scpb.Element { elem := &scpb.TableZoneConfig{ TableID: tzo.tableID, @@ -88,7 +92,7 @@ func (tzo *tableZoneConfigObj) checkZoneConfigChangePermittedForMultiRegion( return nil } - return maybeMultiregionErrorWithHint(options) + return maybeMultiregionErrorWithHint(b, tzo, options) } func (tzo *tableZoneConfigObj) getTargetID() catid.DescID { diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go index 7caaeb4505ca..8f76684aec30 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/zone_config_helpers.go @@ -74,6 +74,10 @@ type zoneConfigObjBuilder interface { // incrementSeqNum increments the seqNum by 1. incrementSeqNum() + + // isNoOp returns true if the zone config object is a no-op. This is defined + // by our object having no zone config yet. + isNoOp() bool } type zoneConfigRetriever interface { @@ -156,8 +160,37 @@ func resolvePhysicalTableName(b BuildCtx, n *tree.SetZoneConfig) { // maybeMultiregionErrorWithHint returns an error if the user is trying to // update a zone config value that's protected for multi-region databases. -func maybeMultiregionErrorWithHint(options tree.KVOptions) error { +func maybeMultiregionErrorWithHint(b BuildCtx, zco zoneConfigObject, options tree.KVOptions) error { hint := "to override this error, SET override_multi_region_zone_config = true and reissue the command" + // The request is to discard the zone configuration. Error in cases where + // the zone configuration being discarded was created by the multi-region + // abstractions. + if options == nil { + needToError := false + // Determine if this zone config that we're trying to discard is + // supposed to be there. zco is either a database or a table. + _, isDB := zco.(*databaseZoneConfigObj) + if isDB { + needToError = true + } else { + var err error + needToError, err = blockDiscardOfZoneConfigForMultiRegionObject(b, zco.getTargetID()) + if err != nil { + return err + } + } + + if needToError { + // User is trying to update a zone config value that's protected for + // multi-region databases. Return the constructed error. + err := errors.WithDetail(errors.Newf( + "attempting to discard the zone configuration of a multi-region entity"), + "discarding a multi-region zone configuration may result in sub-optimal performance or behavior", + ) + return errors.WithHint(err, hint) + } + } + // This is clearly an n^2 operation, but since there are only a single // digit number of zone config keys, it's likely faster to do it this way // than incur the memory allocation of creating a map. @@ -176,6 +209,44 @@ func maybeMultiregionErrorWithHint(options tree.KVOptions) error { return nil } +// blockDiscardOfZoneConfigForMultiRegionObject determines if discarding the +// zone configuration of a multi-region table, index or partition should be +// blocked. We only block the discard if the multi-region abstractions have +// created the zone configuration. Note that this function relies on internal +// knowledge of which table locality patterns write zone configurations. We do +// things this way to avoid having to read the zone configurations directly and +// do a more explicit comparison (with a generated zone configuration). If, down +// the road, the rules around writing zone configurations change, the tests in +// multi_region_zone_configs will fail and this function will need updating. +func blockDiscardOfZoneConfigForMultiRegionObject(b BuildCtx, tblID catid.DescID) (bool, error) { + tableElems := b.QueryByID(tblID) + + // It's a table zone config that the user is trying to discard. This + // should only be present on GLOBAL and REGIONAL BY TABLE tables in a + // specified region. + globalElem := tableElems.FilterTableLocalityGlobal().MustGetZeroOrOneElement() + primaryRegionElem := tableElems.FilterTableLocalityPrimaryRegion().MustGetZeroOrOneElement() + secondaryRegionElem := tableElems.FilterTableLocalitySecondaryRegion().MustGetZeroOrOneElement() + RBRElem := tableElems.FilterTableLocalityRegionalByRow().MustGetZeroOrOneElement() + + if globalElem != nil { + return true, nil + } else if secondaryRegionElem != nil { + return true, nil + } else if primaryRegionElem != nil { + // For REGIONAL BY TABLE tables, no need to error if a user-specified + // region does not exist. + return false, nil + } else if RBRElem != nil { + // For REGIONAL BY ROW tables, no need to error if we're setting a + // table level zone config. + return false, nil + } else { + return false, errors.AssertionFailedf( + "unknown table locality %s", b.QueryByID(tblID)) + } +} + // isMultiRegionTable returns True if this table is a multi-region table, // meaning it has locality GLOBAL, or REGIONAL BY TABLE, or REGIONAL BY ROW. func isMultiRegionTable(b BuildCtx, tableID catid.DescID) bool { @@ -1020,6 +1091,11 @@ func prepareZoneConfig( // No zone was found. Use an empty zone config that inherits from its parent. if partialZone == nil { + // If we are trying to discard a zone config that doesn't exist in + // system.zones, make this a no-op. + if n.Discard { + return nil, nil, nil + } partialZone = zonepb.NewZoneConfig() } currentZone := protoutil.Clone(partialZone).(*zonepb.ZoneConfig) @@ -1051,6 +1127,11 @@ func prepareZoneConfig( partialZone.CopyFromZone(zoneInheritedFields, copyFromParentList) } + if n.Discard { + partialZone.DeleteTableConfig() + return nil, partialZone, nil + } + // Determine where to load the configuration. newZone := *completeZone diff --git a/pkg/sql/schemachanger/scexec/scmutationexec/BUILD.bazel b/pkg/sql/schemachanger/scexec/scmutationexec/BUILD.bazel index 5878f0221147..ab55b2f6e3c3 100644 --- a/pkg/sql/schemachanger/scexec/scmutationexec/BUILD.bazel +++ b/pkg/sql/schemachanger/scexec/scmutationexec/BUILD.bazel @@ -22,6 +22,7 @@ go_library( "stats.go", "table.go", "trigger.go", + "zone_config.go", ], importpath = "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scexec/scmutationexec", visibility = ["//visibility:public"], diff --git a/pkg/sql/schemachanger/scexec/scmutationexec/zone_config.go b/pkg/sql/schemachanger/scexec/scmutationexec/zone_config.go new file mode 100644 index 000000000000..1ee4e2abd0bd --- /dev/null +++ b/pkg/sql/schemachanger/scexec/scmutationexec/zone_config.go @@ -0,0 +1,22 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package scmutationexec + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scop" +) + +func (i *immediateVisitor) DiscardZoneConfig(ctx context.Context, op scop.DiscardZoneConfig) error { + zc := op.ZoneConfig + if zc.IsSubzonePlaceholder() && len(zc.Subzones) == 0 { + i.ImmediateMutationStateUpdater.DeleteZoneConfig(op.DescID) + } else { + i.ImmediateMutationStateUpdater.UpdateZoneConfig(op.DescID, zc) + } + return nil +} diff --git a/pkg/sql/schemachanger/scop/immediate_mutation.go b/pkg/sql/schemachanger/scop/immediate_mutation.go index 4359373285bb..031c488755ed 100644 --- a/pkg/sql/schemachanger/scop/immediate_mutation.go +++ b/pkg/sql/schemachanger/scop/immediate_mutation.go @@ -995,6 +995,13 @@ type AddDatabaseZoneConfig struct { ZoneConfig *zonepb.ZoneConfig } +// DiscardZoneConfig discards the zone config for the given descriptor ID. +type DiscardZoneConfig struct { + immediateMutationOp + DescID descpb.ID + ZoneConfig *zonepb.ZoneConfig +} + // AddTableZoneConfig adds a zone config to a table. type AddTableZoneConfig struct { immediateMutationOp diff --git a/pkg/sql/schemachanger/scop/immediate_mutation_visitor_generated.go b/pkg/sql/schemachanger/scop/immediate_mutation_visitor_generated.go index 1065a7753c5d..29a635773283 100644 --- a/pkg/sql/schemachanger/scop/immediate_mutation_visitor_generated.go +++ b/pkg/sql/schemachanger/scop/immediate_mutation_visitor_generated.go @@ -146,6 +146,7 @@ type ImmediateMutationVisitor interface { InitSequence(context.Context, InitSequence) error CreateDatabaseDescriptor(context.Context, CreateDatabaseDescriptor) error AddDatabaseZoneConfig(context.Context, AddDatabaseZoneConfig) error + DiscardZoneConfig(context.Context, DiscardZoneConfig) error AddTableZoneConfig(context.Context, AddTableZoneConfig) error AddIndexZoneConfig(context.Context, AddIndexZoneConfig) error AddPartitionZoneConfig(context.Context, AddPartitionZoneConfig) error @@ -796,6 +797,11 @@ func (op AddDatabaseZoneConfig) Visit(ctx context.Context, v ImmediateMutationVi return v.AddDatabaseZoneConfig(ctx, op) } +// Visit is part of the ImmediateMutationOp interface. +func (op DiscardZoneConfig) Visit(ctx context.Context, v ImmediateMutationVisitor) error { + return v.DiscardZoneConfig(ctx, op) +} + // Visit is part of the ImmediateMutationOp interface. func (op AddTableZoneConfig) Visit(ctx context.Context, v ImmediateMutationVisitor) error { return v.AddTableZoneConfig(ctx, op) diff --git a/pkg/sql/schemachanger/scpb/element_collection.go b/pkg/sql/schemachanger/scpb/element_collection.go index bd243dcc0511..d24228a0c6a6 100644 --- a/pkg/sql/schemachanger/scpb/element_collection.go +++ b/pkg/sql/schemachanger/scpb/element_collection.go @@ -6,6 +6,9 @@ package scpb import ( + "fmt" + "reflect" + "github.com/cockroachdb/cockroach/pkg/util/debugutil" "github.com/cockroachdb/errors" ) @@ -273,3 +276,12 @@ func (c *ElementCollection[E]) MustHaveZeroOrOne() *ElementCollection[E] { } return c } + +func (c *ElementCollection[E]) String() string { + var result string + for _, element := range c.Elements() { + elemType := reflect.TypeOf(element).Elem() + result += fmt.Sprintf("%s:{%+v}\n", elemType.Name(), element) + } + return result +} diff --git a/pkg/sql/schemachanger/scplan/internal/opgen/opgen_database_zone_config.go b/pkg/sql/schemachanger/scplan/internal/opgen/opgen_database_zone_config.go index abe4421c3eae..0a9a5c91c686 100644 --- a/pkg/sql/schemachanger/scplan/internal/opgen/opgen_database_zone_config.go +++ b/pkg/sql/schemachanger/scplan/internal/opgen/opgen_database_zone_config.go @@ -26,8 +26,11 @@ func init() { toAbsent( scpb.Status_PUBLIC, to(scpb.Status_ABSENT, - emit(func(this *scpb.DatabaseZoneConfig) *scop.NotImplementedForPublicObjects { - return notImplementedForPublicObjects(this) + emit(func(this *scpb.DatabaseZoneConfig) *scop.DiscardZoneConfig { + return &scop.DiscardZoneConfig{ + DescID: this.DatabaseID, + ZoneConfig: this.ZoneConfig, + } }), ), ), diff --git a/pkg/sql/schemachanger/scplan/internal/opgen/opgen_table_zone_config.go b/pkg/sql/schemachanger/scplan/internal/opgen/opgen_table_zone_config.go index 12a6634cd59c..e6a4a342492d 100644 --- a/pkg/sql/schemachanger/scplan/internal/opgen/opgen_table_zone_config.go +++ b/pkg/sql/schemachanger/scplan/internal/opgen/opgen_table_zone_config.go @@ -26,8 +26,11 @@ func init() { toAbsent( scpb.Status_PUBLIC, to(scpb.Status_ABSENT, - emit(func(this *scpb.TableZoneConfig) *scop.NotImplementedForPublicObjects { - return notImplementedForPublicObjects(this) + emit(func(this *scpb.TableZoneConfig) *scop.DiscardZoneConfig { + return &scop.DiscardZoneConfig{ + DescID: this.TableID, + ZoneConfig: this.ZoneConfig, + } }), ), ), diff --git a/pkg/sql/schemachanger/sctest_generated_test.go b/pkg/sql/schemachanger/sctest_generated_test.go index 4205f7adf715..6e399dd6c14b 100644 --- a/pkg/sql/schemachanger/sctest_generated_test.go +++ b/pkg/sql/schemachanger/sctest_generated_test.go @@ -85,6 +85,13 @@ func TestEndToEndSideEffects_alter_database_configure_zone(t *testing.T) { sctest.EndToEndSideEffects(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestEndToEndSideEffects_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.EndToEndSideEffects(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestEndToEndSideEffects_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -610,6 +617,13 @@ func TestExecuteWithDMLInjection_alter_database_configure_zone(t *testing.T) { sctest.ExecuteWithDMLInjection(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestExecuteWithDMLInjection_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.ExecuteWithDMLInjection(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestExecuteWithDMLInjection_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1135,6 +1149,13 @@ func TestGenerateSchemaChangeCorpus_alter_database_configure_zone(t *testing.T) sctest.GenerateSchemaChangeCorpus(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestGenerateSchemaChangeCorpus_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.GenerateSchemaChangeCorpus(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestGenerateSchemaChangeCorpus_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1660,6 +1681,13 @@ func TestPause_alter_database_configure_zone(t *testing.T) { sctest.Pause(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestPause_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.Pause(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestPause_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -2185,6 +2213,13 @@ func TestPauseMixedVersion_alter_database_configure_zone(t *testing.T) { sctest.PauseMixedVersion(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestPauseMixedVersion_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.PauseMixedVersion(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestPauseMixedVersion_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -2710,6 +2745,13 @@ func TestRollback_alter_database_configure_zone(t *testing.T) { sctest.Rollback(t, path, sctest.SingleNodeTestClusterFactory{}) } +func TestRollback_alter_database_configure_zone_discard(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + const path = "pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard" + sctest.Rollback(t, path, sctest.SingleNodeTestClusterFactory{}) +} + func TestRollback_alter_database_configure_zone_multiple(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.definition b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.definition new file mode 100644 index 000000000000..51e8a2e98bce --- /dev/null +++ b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.definition @@ -0,0 +1,8 @@ +setup +CREATE DATABASE db; +---- + +test +ALTER DATABASE db CONFIGURE ZONE USING num_replicas = 7, gc.ttlseconds = 10000; +ALTER DATABASE db CONFIGURE ZONE DISCARD; +---- diff --git a/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.side_effects b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.side_effects new file mode 100644 index 000000000000..3625db98357e --- /dev/null +++ b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard.side_effects @@ -0,0 +1,46 @@ +/* setup */ +CREATE DATABASE db; +---- +... ++database {0 0 db} -> 104 ++schema {104 0 public} -> 105 + +/* test */ +ALTER DATABASE db CONFIGURE ZONE USING num_replicas = 7, gc.ttlseconds = 10000; +ALTER DATABASE db CONFIGURE ZONE DISCARD; +---- +begin transaction #1 +# begin StatementPhase +checking for feature: CONFIGURE ZONE +write *eventpb.SetZoneConfig to event log: + config: + options: + - '"gc.ttlseconds" = 10000' + - num_replicas = 7 + target: DATABASE db + resolvedOldConfig: 'range_min_bytes:134217728 range_max_bytes:536870912 gc: num_replicas:5 inherited_constraints:false null_voter_constraints_is_empty:true inherited_lease_preferences:false ' + sql: + descriptorId: 104 + statement: ALTER DATABASE ‹db› CONFIGURE ZONE USING ‹num_replicas› = ‹7›, ‹"gc.ttlseconds"› = ‹10000› + tag: CONFIGURE ZONE + user: root +## StatementPhase stage 1 of 1 with 1 MutationType op +upsert zone config for #104 +checking for feature: CONFIGURE ZONE +write *eventpb.RemoveZoneConfig to event log: + config: + target: DATABASE db + sql: + descriptorId: 104 + statement: ALTER DATABASE ‹db› CONFIGURE ZONE DISCARD + tag: CONFIGURE ZONE + user: root +## StatementPhase stage 1 of 1 with 1 MutationType op +deleting zone config for #104 +# end StatementPhase +# begin PreCommitPhase +## PreCommitPhase stage 1 of 1 with 1 MutationType op +undo all catalog changes within txn #1 +persist all catalog changes to storage +# end PreCommitPhase +commit transaction #1 diff --git a/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain new file mode 100644 index 000000000000..19d90236d8f2 --- /dev/null +++ b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain @@ -0,0 +1,24 @@ +/* setup */ +CREATE DATABASE db; + +/* test */ +EXPLAIN (DDL) ALTER DATABASE db CONFIGURE ZONE USING num_replicas = 7, gc.ttlseconds = 10000; +---- +Schema change plan for ALTER DATABASE ‹db› CONFIGURE ZONE USING ‹num_replicas› = ‹7›, ‹"gc.ttlseconds"› = ‹10000›; + ├── StatementPhase + │ └── Stage 1 of 1 in StatementPhase + │ ├── 1 element transitioning toward PUBLIC + │ │ └── ABSENT → PUBLIC DatabaseZoneConfig:{DescID: 104 (db), SeqNum: 1} + │ └── 1 Mutation operation + │ └── AddDatabaseZoneConfig {"DatabaseID":104} + └── PreCommitPhase + ├── Stage 1 of 2 in PreCommitPhase + │ ├── 1 element transitioning toward PUBLIC + │ │ └── PUBLIC → ABSENT DatabaseZoneConfig:{DescID: 104 (db), SeqNum: 1} + │ └── 1 Mutation operation + │ └── UndoAllInTxnImmediateMutationOpSideEffects + └── Stage 2 of 2 in PreCommitPhase + ├── 1 element transitioning toward PUBLIC + │ └── ABSENT → PUBLIC DatabaseZoneConfig:{DescID: 104 (db), SeqNum: 1} + └── 1 Mutation operation + └── AddDatabaseZoneConfig {"DatabaseID":104} diff --git a/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain_shape b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain_shape new file mode 100644 index 000000000000..60403304aef8 --- /dev/null +++ b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_1_of_2.explain_shape @@ -0,0 +1,8 @@ +/* setup */ +CREATE DATABASE db; + +/* test */ +EXPLAIN (DDL, SHAPE) ALTER DATABASE db CONFIGURE ZONE USING num_replicas = 7, gc.ttlseconds = 10000; +---- +Schema change plan for ALTER DATABASE ‹db› CONFIGURE ZONE USING ‹num_replicas› = ‹7›, ‹"gc.ttlseconds"› = ‹10000›; + └── execute 1 system table mutations transaction diff --git a/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain new file mode 100644 index 000000000000..1b098706869f --- /dev/null +++ b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain @@ -0,0 +1,18 @@ +/* setup */ +CREATE DATABASE db; + +/* test */ +ALTER DATABASE db CONFIGURE ZONE USING num_replicas = 7, gc.ttlseconds = 10000; +EXPLAIN (DDL) ALTER DATABASE db CONFIGURE ZONE DISCARD; +---- +Schema change plan for ALTER DATABASE ‹db› CONFIGURE ZONE DISCARD; following ALTER DATABASE ‹db› CONFIGURE ZONE USING ‹num_replicas› = ‹7›, ‹"gc.ttlseconds"› = ‹10000›; + ├── StatementPhase + │ └── Stage 1 of 1 in StatementPhase + │ ├── 1 element transitioning toward ABSENT + │ │ └── PUBLIC → ABSENT DatabaseZoneConfig:{DescID: 104 (db), SeqNum: 1} + │ └── 1 Mutation operation + │ └── DiscardZoneConfig {"DescID":104} + └── PreCommitPhase + └── Stage 1 of 1 in PreCommitPhase + └── 1 Mutation operation + └── UndoAllInTxnImmediateMutationOpSideEffects diff --git a/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain_shape b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain_shape new file mode 100644 index 000000000000..c3278d1288d1 --- /dev/null +++ b/pkg/sql/schemachanger/testdata/end_to_end/alter_database_configure_zone_discard/alter_database_configure_zone_discard__statement_2_of_2.explain_shape @@ -0,0 +1,9 @@ +/* setup */ +CREATE DATABASE db; + +/* test */ +ALTER DATABASE db CONFIGURE ZONE USING num_replicas = 7, gc.ttlseconds = 10000; +EXPLAIN (DDL, SHAPE) ALTER DATABASE db CONFIGURE ZONE DISCARD; +---- +Schema change plan for ALTER DATABASE ‹db› CONFIGURE ZONE DISCARD; following ALTER DATABASE ‹db› CONFIGURE ZONE USING ‹num_replicas› = ‹7›, ‹"gc.ttlseconds"› = ‹10000›; + └── execute 1 system table mutations transaction From 8105a383e9a88d6b0c81c2e3fd83c97043d9a832 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 30 Oct 2024 15:28:13 -0400 Subject: [PATCH 8/9] raft: don't panic on defortifying snapshot from new term Informs #132762. This commit fixes a bug introduced by 58a9f53e. Now that we no longer assume that snapshots are coming from the leader, we were not defortifying the raft leadership when receiving a snapshot. This meant that if a snapshot came from a new term, we would hit an assertion failure, which the new test reproduces. This commit addresses this bug by distinguishing between messages that are always sent from the leader of the message's term and messages that indicate that there is a new leader of the message's term, even if the message is not from the leader itself. This is temporary, and can be removed when we address #127349. Release note: None --- pkg/raft/raft.go | 23 ++-- pkg/raft/testdata/snapshot_new_term.txt | 155 ++++++++++++++++++++++++ pkg/raft/util.go | 20 +++ pkg/raft/util_test.go | 44 +++++++ 4 files changed, 234 insertions(+), 8 deletions(-) create mode 100644 pkg/raft/testdata/snapshot_new_term.txt diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 24ade33584ef..afe3de0f746f 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1065,8 +1065,7 @@ func (r *raft) setTerm(term uint64) { assertTrue(term > r.Term, "term cannot regress") r.Term = term r.Vote = None - r.lead = None - r.leadEpoch = 0 + r.resetLead() } func (r *raft) setVote(id pb.PeerID) { @@ -1087,7 +1086,7 @@ func (r *raft) setLead(lead pb.PeerID) { func (r *raft) resetLead() { r.lead = None - r.leadEpoch = 0 + r.resetLeadEpoch() } func (r *raft) setLeadEpoch(leadEpoch pb.Epoch) { @@ -1537,13 +1536,21 @@ func (r *raft) Step(m pb.Message) error { default: r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]", r.id, r.Term, m.Type, m.From, m.Term) - if IsMsgFromLeader(m.Type) { - // We've just received a message from a leader which was elected - // at a higher term. The old leader is no longer fortified, so it's - // safe to de-fortify at this point. + if IsMsgIndicatingLeader(m.Type) { + // We've just received a message that indicates that a new leader + // was elected at a higher term, but the message may not be from the + // leader itself. Either way, the old leader is no longer fortified, + // so it's safe to de-fortify at this point. r.deFortify(m.From, m.Term) - r.becomeFollower(m.Term, m.From) + var lead pb.PeerID + if IsMsgFromLeader(m.Type) { + lead = m.From + } + r.becomeFollower(m.Term, lead) } else { + // We've just received a message that does not indicate that a new + // leader was elected at a higher term. All it means is that some + // other peer has this term. r.becomeFollower(m.Term, None) } } diff --git a/pkg/raft/testdata/snapshot_new_term.txt b/pkg/raft/testdata/snapshot_new_term.txt new file mode 100644 index 000000000000..e2dac737c2e9 --- /dev/null +++ b/pkg/raft/testdata/snapshot_new_term.txt @@ -0,0 +1,155 @@ +# Test that creates a scenario where a peer learns about a new leadership term +# via a snapshot. + +log-level none +---- +ok + +add-nodes 3 voters=(1,2,3) index=10 +---- +ok + +# Elect 1 as leader. +campaign 1 +---- +ok + +stabilize +---- +ok + +log-level debug +---- +ok + +raft-state +---- +1: StateLeader (Voter) Term:1 Lead:1 LeadEpoch:1 +2: StateFollower (Voter) Term:1 Lead:1 LeadEpoch:1 +3: StateFollower (Voter) Term:1 Lead:1 LeadEpoch:1 + +# Transfer leadership to 2, without 3 hearing about it. +transfer-leadership from=1 to=2 +---- +INFO 1 [term 1] starts to transfer leadership to 2 +INFO 1 sends MsgTimeoutNow to 2 immediately as 2 already has up-to-date log +INFO 1 became follower at term 1 + +stabilize 1 2 +---- +> 1 handling Ready + Ready MustSync=false: + State:StateFollower + Messages: + 1->2 MsgTimeoutNow Term:1 Log:0/0 +> 2 receiving messages + 1->2 MsgTimeoutNow Term:1 Log:0/0 + INFO 2 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership + INFO 2 is starting a new election at term 1 + INFO 2 became candidate at term 2 + INFO 2 [logterm: 1, index: 11] sent MsgVote request to 1 at term 2 + INFO 2 [logterm: 1, index: 11] sent MsgVote request to 3 at term 2 +> 2 handling Ready + Ready MustSync=true: + State:StateCandidate + HardState Term:2 Vote:2 Commit:11 Lead:0 LeadEpoch:0 + Messages: + 2->1 MsgVote Term:2 Log:1/11 + 2->3 MsgVote Term:2 Log:1/11 + INFO 2 received MsgVoteResp from 2 at term 2 + INFO 2 has received 1 MsgVoteResp votes and 0 vote rejections +> 1 receiving messages + 2->1 MsgVote Term:2 Log:1/11 + INFO 1 [term: 1] received a MsgVote message with higher term from 2 [term: 2] + INFO 1 became follower at term 2 + INFO 1 [logterm: 1, index: 11, vote: 0] cast MsgVote for 2 [logterm: 1, index: 11] at term 2 +> 1 handling Ready + Ready MustSync=true: + HardState Term:2 Vote:2 Commit:11 Lead:0 LeadEpoch:0 + Messages: + 1->2 MsgVoteResp Term:2 Log:0/0 +> 2 receiving messages + 1->2 MsgVoteResp Term:2 Log:0/0 + INFO 2 received MsgVoteResp from 1 at term 2 + INFO 2 has received 2 MsgVoteResp votes and 0 vote rejections + INFO 2 became leader at term 2 +> 2 handling Ready + Ready MustSync=true: + State:StateLeader + HardState Term:2 Vote:2 Commit:11 Lead:2 LeadEpoch:1 + Entries: + 2/12 EntryNormal "" + Messages: + 2->1 MsgFortifyLeader Term:2 Log:0/0 + 2->3 MsgFortifyLeader Term:2 Log:0/0 + 2->1 MsgApp Term:2 Log:1/11 Commit:11 Entries:[2/12 EntryNormal ""] + 2->3 MsgApp Term:2 Log:1/11 Commit:11 Entries:[2/12 EntryNormal ""] +> 1 receiving messages + 2->1 MsgFortifyLeader Term:2 Log:0/0 + 2->1 MsgApp Term:2 Log:1/11 Commit:11 Entries:[2/12 EntryNormal ""] +> 1 handling Ready + Ready MustSync=true: + HardState Term:2 Vote:2 Commit:11 Lead:2 LeadEpoch:1 + Entries: + 2/12 EntryNormal "" + Messages: + 1->2 MsgFortifyLeaderResp Term:2 Log:0/0 LeadEpoch:1 + 1->2 MsgAppResp Term:2 Log:0/12 Commit:11 +> 2 receiving messages + 1->2 MsgFortifyLeaderResp Term:2 Log:0/0 LeadEpoch:1 + 1->2 MsgAppResp Term:2 Log:0/12 Commit:11 +> 2 handling Ready + Ready MustSync=true: + HardState Term:2 Vote:2 Commit:12 Lead:2 LeadEpoch:1 + CommittedEntries: + 2/12 EntryNormal "" + Messages: + 2->1 MsgApp Term:2 Log:1/11 Commit:11 Entries:[2/12 EntryNormal ""] + 2->1 MsgApp Term:2 Log:2/12 Commit:12 +> 1 receiving messages + 2->1 MsgApp Term:2 Log:1/11 Commit:11 Entries:[2/12 EntryNormal ""] + 2->1 MsgApp Term:2 Log:2/12 Commit:12 +> 1 handling Ready + Ready MustSync=true: + HardState Term:2 Vote:2 Commit:12 Lead:2 LeadEpoch:1 + CommittedEntries: + 2/12 EntryNormal "" + Messages: + 1->2 MsgAppResp Term:2 Log:0/12 Commit:11 + 1->2 MsgAppResp Term:2 Log:0/12 Commit:12 +> 2 receiving messages + 1->2 MsgAppResp Term:2 Log:0/12 Commit:11 + 1->2 MsgAppResp Term:2 Log:0/12 Commit:12 + +# Drop inflight messages to 3. +deliver-msgs drop=(3) +---- +dropped: 2->3 MsgVote Term:2 Log:1/11 +dropped: 2->3 MsgFortifyLeader Term:2 Log:0/0 +dropped: 2->3 MsgApp Term:2 Log:1/11 Commit:11 Entries:[2/12 EntryNormal ""] + +# Send a manual snapshot from 2 to 3, which will be at term 2. +send-snapshot 2 3 +---- +2->3 MsgSnap Term:2 Log:0/0 + Snapshot: Index:12 Term:2 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false + +stabilize +---- +> 3 receiving messages + 2->3 MsgSnap Term:2 Log:0/0 + Snapshot: Index:12 Term:2 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false + INFO 3 [term: 1] received a MsgSnap message with higher term from 2 [term: 2] + INFO 3 became follower at term 2 + INFO log [committed=11, applied=11, applying=11, unstable.offset=12, unstable.offsetInProgress=12, len(unstable.Entries)=0] starts to restore snapshot [index: 12, term: 2] + INFO 3 switched to configuration voters=(1 2 3) + INFO 3 [commit: 12, lastindex: 12, lastterm: 2] restored snapshot [index: 12, term: 2] + INFO 3 [commit: 12] restored snapshot [index: 12, term: 2] +> 3 handling Ready + Ready MustSync=true: + HardState Term:2 Commit:12 Lead:0 LeadEpoch:0 + Snapshot Index:12 Term:2 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false + Messages: + 3->2 MsgAppResp Term:2 Log:0/12 Commit:12 +> 2 receiving messages + 3->2 MsgAppResp Term:2 Log:0/12 Commit:12 diff --git a/pkg/raft/util.go b/pkg/raft/util.go index b79ad287f7db..f672fb32391d 100644 --- a/pkg/raft/util.go +++ b/pkg/raft/util.go @@ -49,6 +49,8 @@ var isResponseMsg = [...]bool{ pb.MsgFortifyLeaderResp: true, } +// isMsgFromLeader contains message types that come from the leader of the +// message's term. var isMsgFromLeader = [...]bool{ pb.MsgApp: true, // TODO(nvanbenschoten): we can't consider MsgSnap to be from the leader of @@ -60,6 +62,20 @@ var isMsgFromLeader = [...]bool{ pb.MsgDeFortifyLeader: true, } +// isMsgIndicatingLeader contains message types that indicate that there is a +// leader at the message's term, even if the message is not from the leader +// itself. +// +// TODO(nvanbenschoten): remove this when we address the TODO above. +var isMsgIndicatingLeader = [...]bool{ + pb.MsgApp: true, + pb.MsgSnap: true, + pb.MsgHeartbeat: true, + pb.MsgTimeoutNow: true, + pb.MsgFortifyLeader: true, + pb.MsgDeFortifyLeader: true, +} + func isMsgInArray(msgt pb.MessageType, arr []bool) bool { i := int(msgt) return i < len(arr) && arr[i] @@ -77,6 +93,10 @@ func IsMsgFromLeader(msgt pb.MessageType) bool { return isMsgInArray(msgt, isMsgFromLeader[:]) } +func IsMsgIndicatingLeader(msgt pb.MessageType) bool { + return isMsgInArray(msgt, isMsgIndicatingLeader[:]) +} + func IsLocalMsgTarget(id pb.PeerID) bool { return id == LocalAppendThread || id == LocalApplyThread } diff --git a/pkg/raft/util_test.go b/pkg/raft/util_test.go index 2dba8990920d..1f0a84553c07 100644 --- a/pkg/raft/util_test.go +++ b/pkg/raft/util_test.go @@ -186,6 +186,50 @@ func TestMsgFromLeader(t *testing.T) { if got != tt.isMsgFromLeader { t.Errorf("#%d: got %v, want %v", i, got, tt.isMsgFromLeader) } + if got { + require.True(t, IsMsgIndicatingLeader(tt.msgt), + "IsMsgFromLeader should imply IsMsgIndicatingLeader") + } + } +} + +func TestMsgIndicatingLeader(t *testing.T) { + tests := []struct { + msgt pb.MessageType + isMsgIndicatingLeader bool + }{ + {pb.MsgHup, false}, + {pb.MsgBeat, false}, + {pb.MsgUnreachable, false}, + {pb.MsgSnapStatus, false}, + {pb.MsgCheckQuorum, false}, + {pb.MsgTransferLeader, false}, + {pb.MsgProp, false}, + {pb.MsgApp, true}, + {pb.MsgAppResp, false}, + {pb.MsgVote, false}, + {pb.MsgVoteResp, false}, + {pb.MsgSnap, true}, + {pb.MsgHeartbeat, true}, + {pb.MsgHeartbeatResp, false}, + {pb.MsgTimeoutNow, true}, + {pb.MsgPreVote, false}, + {pb.MsgPreVoteResp, false}, + {pb.MsgStorageAppend, false}, + {pb.MsgStorageAppendResp, false}, + {pb.MsgStorageApply, false}, + {pb.MsgStorageApplyResp, false}, + {pb.MsgForgetLeader, false}, + {pb.MsgFortifyLeader, true}, + {pb.MsgFortifyLeaderResp, false}, + {pb.MsgDeFortifyLeader, true}, + } + + for i, tt := range tests { + got := IsMsgIndicatingLeader(tt.msgt) + if got != tt.isMsgIndicatingLeader { + t.Errorf("#%d: got %v, want %v", i, got, tt.isMsgIndicatingLeader) + } } } From 6db8137fa50d1caba45e7c793caaf6cd1d4a992a Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Wed, 30 Oct 2024 12:57:57 -0700 Subject: [PATCH 9/9] revert "logictest: deflake TestLogic_union" This reverts commit 6218f23518a73db64b4a8621234626bcfe865f25. I don't think this commit did anything to remove or reduce flakiness in the union logic test because the error occurs on the SELECT query _after_ the DDL and DML statements modified by this commit. Reverting it to allow for cleaner backports. Release note: None --- pkg/sql/logictest/testdata/logic_test/union | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/union b/pkg/sql/logictest/testdata/logic_test/union index 3575117a3994..e787ed47c926 100644 --- a/pkg/sql/logictest/testdata/logic_test/union +++ b/pkg/sql/logictest/testdata/logic_test/union @@ -683,15 +683,11 @@ SELECT a, b AS b1, b AS b2 FROM abc INTERSECT SELECT a, c, b FROM abc ORDER by a # synchronizer was causing spurious errors on queries with LIMIT. statement ok CREATE TABLE t127043_1 (k1 INT, v1 INT, INDEX (k1)); -CREATE TABLE t127043_2 (k2 INT, v2 INT, INDEX (k2)); -CREATE TABLE t127043_3 (k3 INT, v3 INT, INDEX (k3)); - -statement ok INSERT INTO t127043_1 VALUES (1, 1); +CREATE TABLE t127043_2 (k2 INT, v2 INT, INDEX (k2)); INSERT INTO t127043_2 VALUES (1, 1); +CREATE TABLE t127043_3 (k3 INT, v3 INT, INDEX (k3)); INSERT INTO t127043_3 VALUES (1, 1); - -statement ok CREATE VIEW v127043 (k, v) AS SELECT k1 AS k, v1 AS v FROM t127043_1@t127043_1_k1_idx