Skip to content

Commit

Permalink
Prevent exponential backoff from overflowing.
Browse files Browse the repository at this point in the history
When using `-cut-over-exponential-backoff` with `-default-retries`
greater than `64`, the sleep interval will overflow and immediately
begin retrying in rapid succession. This commit corrects the backoff
algorithm and adds test for both "retry operation" functions.
  • Loading branch information
burnison committed Jan 9, 2025
1 parent ad5d3ea commit 554d7f4
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 7 deletions.
15 changes: 8 additions & 7 deletions go/logic/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

var (
ErrMigratorUnsupportedRenameAlter = errors.New("ALTER statement seems to RENAME the table. This is not supported, and you should run your RENAME outside gh-ost.")
RetrySleepFn = time.Sleep
)

type ChangelogState string
Expand Down Expand Up @@ -135,7 +136,7 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
for i := 0; i < maxRetries; i++ {
if i != 0 {
// sleep after previous iteration
time.Sleep(1 * time.Second)
RetrySleepFn(1 * time.Second)
}
err = operation()
if err == nil {
Expand All @@ -155,16 +156,16 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
// attempts are reached. Wait intervals between attempts obey a maximum of
// `ExponentialBackoffMaxInterval`.
func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) {
var interval int64
maxRetries := int(this.migrationContext.MaxRetries())
maxInterval := this.migrationContext.ExponentialBackoffMaxInterval
for i := 0; i < maxRetries; i++ {
newInterval := int64(math.Exp2(float64(i - 1)))
if newInterval <= maxInterval {
interval = newInterval
}
interval := math.Min(
float64(maxInterval),
math.Max(1, math.Exp2(float64(i-1))),
)

if i != 0 {
time.Sleep(time.Duration(interval) * time.Second)
RetrySleepFn(time.Duration(interval) * time.Second)
}
err = operation()
if err == nil {
Expand Down
67 changes: 67 additions & 0 deletions go/logic/migrator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/testcontainers/testcontainers-go"
Expand Down Expand Up @@ -378,6 +379,72 @@ func (suite *MigratorTestSuite) TestFoo() {
suite.Require().Equal("_testing_del", tableName)
}

func TestMigratorRetry(t *testing.T) {
oldRetrySleepFn := RetrySleepFn
defer func() { RetrySleepFn = oldRetrySleepFn }()

migrationContext := base.NewMigrationContext()
migrationContext.SetDefaultNumRetries(100)
migrator := NewMigrator(migrationContext, "1.2.3")

var sleeps = 0
RetrySleepFn = func(duration time.Duration) {
assert.Equal(t, 1*time.Second, duration)
sleeps++
}

var tries = 0
retryable := func() error {
tries++
if tries < int(migrationContext.MaxRetries()) {
return errors.New("Backoff")
}
return nil
}

result := migrator.retryOperation(retryable, false)
assert.NoError(t, result)
assert.Equal(t, sleeps, 99)
assert.Equal(t, tries, 100)
}

func TestMigratorRetryWithExponentialBackoff(t *testing.T) {
oldRetrySleepFn := RetrySleepFn
defer func() { RetrySleepFn = oldRetrySleepFn }()

migrationContext := base.NewMigrationContext()
migrationContext.SetDefaultNumRetries(100)
migrationContext.SetExponentialBackoffMaxInterval(42)
migrator := NewMigrator(migrationContext, "1.2.3")

var sleeps = 0
expected := []int{
1, 2, 4, 8, 16, 32, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42,
}
RetrySleepFn = func(duration time.Duration) {
assert.Equal(t, time.Duration(expected[sleeps])*time.Second, duration)
sleeps++
}

var tries = 0
retryable := func() error {
tries++
if tries < int(migrationContext.MaxRetries()) {
return errors.New("Backoff")
}
return nil
}

result := migrator.retryOperationWithExponentialBackoff(retryable, false)
assert.NoError(t, result)
assert.Equal(t, sleeps, 99)
assert.Equal(t, tries, 100)
}

func TestMigrator(t *testing.T) {
suite.Run(t, new(MigratorTestSuite))
}

0 comments on commit 554d7f4

Please sign in to comment.