Skip to content

Commit

Permalink
jobs: remove legacy error log
Browse files Browse the repository at this point in the history
Release note: none.
Epic: none.
  • Loading branch information
dt committed Jan 14, 2025
1 parent 91a08e7 commit e17badb
Show file tree
Hide file tree
Showing 7 changed files with 7 additions and 446 deletions.
2 changes: 0 additions & 2 deletions pkg/jobs/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ go_library(
"//pkg/util/cidr",
"//pkg/util/envutil",
"//pkg/util/hlc",
"//pkg/util/json",
"//pkg/util/log",
"//pkg/util/log/eventpb",
"//pkg/util/log/severity",
Expand All @@ -83,7 +82,6 @@ go_library(
"@com_github_cockroachdb_errors//oserror",
"@com_github_cockroachdb_logtags//:logtags",
"@com_github_cockroachdb_redact//:redact",
"@com_github_gogo_protobuf//jsonpb",
"@com_github_gogo_protobuf//proto",
"@com_github_gogo_protobuf//types",
"@com_github_klauspost_compress//gzip",
Expand Down
90 changes: 0 additions & 90 deletions pkg/jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
package jobs

import (
"bytes"
"context"
gojson "encoding/json"
"fmt"
"reflect"
"sync/atomic"
Expand All @@ -21,13 +19,10 @@ import (
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/protoreflect"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sqlliveness"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
Expand All @@ -36,7 +31,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
"github.com/gogo/protobuf/jsonpb"
)

// Job manages logging the progress of long-running system processes, like
Expand Down Expand Up @@ -892,90 +886,6 @@ func (sj *StartableJob) recordStart() (alreadyStarted bool) {
return atomic.AddInt64(&sj.starts, 1) != 1
}

// ParseRetriableExecutionErrorLogFromJSON inverts the output of
// FormatRetriableExecutionErrorLogToJSON.
func ParseRetriableExecutionErrorLogFromJSON(
log []byte,
) ([]*jobspb.RetriableExecutionFailure, error) {
var jsonArr []gojson.RawMessage
if err := gojson.Unmarshal(log, &jsonArr); err != nil {
return nil, errors.Wrap(err, "failed to decode json array for execution log")
}
ret := make([]*jobspb.RetriableExecutionFailure, len(jsonArr))

json := jsonpb.Unmarshaler{AllowUnknownFields: true}
var reader bytes.Reader
for i, data := range jsonArr {
msgI, err := protoreflect.NewMessage("cockroach.sql.jobs.jobspb.RetriableExecutionFailure")
if err != nil {
return nil, errors.WithAssertionFailure(err)
}
msg := msgI.(*jobspb.RetriableExecutionFailure)
reader.Reset(data)
if err := json.Unmarshal(&reader, msg); err != nil {
return nil, err
}
ret[i] = msg
}
return ret, nil
}

// FormatRetriableExecutionErrorLogToJSON extracts the events
// stored in the payload, formats them into a json array. This function
// is intended for use with crdb_internal.jobs. Note that the error will
// be flattened into a string and stored in the TruncatedError field.
func FormatRetriableExecutionErrorLogToJSON(
ctx context.Context, log []*jobspb.RetriableExecutionFailure,
) (*tree.DJSON, error) {
ab := json.NewArrayBuilder(len(log))
for i := range log {
ev := *log[i]
if ev.Error != nil {
ev.TruncatedError = errors.DecodeError(ctx, *ev.Error).Error()
ev.Error = nil
}
msg, err := protoreflect.MessageToJSON(&ev, protoreflect.FmtFlags{
EmitDefaults: false,
})
if err != nil {
return nil, err
}
ab.Add(msg)
}
return tree.NewDJSON(ab.Build()), nil
}

// FormatRetriableExecutionErrorLogToStringArray extracts the events
// stored in the payload, formats them into strings and returns them as an
// array of strings. This function is intended for use with crdb_internal.jobs.
func FormatRetriableExecutionErrorLogToStringArray(
ctx context.Context, log []*jobspb.RetriableExecutionFailure,
) *tree.DArray {
arr := tree.NewDArray(types.String)
for _, ev := range log {
if ev == nil { // no reason this should happen, but be defensive
continue
}
var cause error
if ev.Error != nil {
cause = errors.DecodeError(ctx, *ev.Error)
} else {
cause = fmt.Errorf("(truncated) %s", ev.TruncatedError)
}
msg := formatRetriableExecutionFailure(
ev.InstanceID,
Status(ev.Status),
timeutil.FromUnixMicros(ev.ExecutionStartMicros),
timeutil.FromUnixMicros(ev.ExecutionEndMicros),
cause,
)
// We really don't care about errors here. I'd much rather see nothing
// in my log than crash.
_ = arr.Append(tree.NewDString(msg))
}
return arr
}

// GetJobTraceID returns the current trace ID of the job from the job progress.
func GetJobTraceID(ctx context.Context, db isql.DB, jobID jobspb.JobID) (tracingpb.TraceID, error) {
var traceID tracingpb.TraceID
Expand Down
7 changes: 2 additions & 5 deletions pkg/jobs/jobspb/jobs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1481,12 +1481,9 @@ message Payload {
// PauseReason is used to describe the reason that the job is currently paused
// or has been requested to be paused.
string pause_reason = 28;
// RetriableExecutionFailureLog stores a history of retriable execution
// failures. These failures may occur in either the RUNNING or REVERTING
// status. A finite number of these entries will be kept, as governed by
// the jobs.execution_errors.max_entries cluster setting.
repeated RetriableExecutionFailure retriable_execution_failure_log = 32;

reserved 32;

// CreationClusterID is populated at creation with the ClusterID, in case a
// job resuming later, needs to use this information, e.g. to determine if it
// has been restored into a different cluster, which might mean it should
Expand Down
23 changes: 5 additions & 18 deletions pkg/jobs/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -1983,25 +1983,12 @@ func (r *Registry) maybeRecordExecutionFailure(ctx context.Context, err error, j
if !errors.As(err, &efe) {
return
}

updateErr := j.NoTxn().Update(ctx, func(
txn isql.Txn, md JobMetadata, ju *JobUpdater,
) error {
pl := md.Payload
{ // Append the entry to the log
maxSize := int(executionErrorsMaxEntrySize.Get(&r.settings.SV))
pl.RetriableExecutionFailureLog = append(pl.RetriableExecutionFailureLog,
efe.toRetriableExecutionFailure(ctx, maxSize))
}
{ // Maybe truncate the log.
maxEntries := int(executionErrorsMaxEntriesSetting.Get(&r.settings.SV))
log := &pl.RetriableExecutionFailureLog
if len(*log) > maxEntries {
*log = (*log)[len(*log)-maxEntries:]
}
updateErr := r.db.Txn(ctx, func(ctx context.Context, txn isql.Txn) error {
v, err := txn.GetSystemSchemaVersion(ctx)
if err != nil || v.Less(clusterversion.V25_1.Version()) {
return err
}
ju.UpdatePayload(pl)
return nil
return j.Messages().Record(ctx, txn, "retry", efe.cause.Error())
})
if ctx.Err() != nil {
return
Expand Down
Loading

0 comments on commit e17badb

Please sign in to comment.