Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip #137731

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft

wip #137731

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/sql/catalog/catenumpb/encoded_datum.proto
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ enum DatumEncoding {
DESCENDING_KEY = 1;
// Indicates that the datum is encoded using the encoding used for values.
VALUE = 2;
// Indicates that the datum is encoded using the legacy encoding for values.
VALUE_LEGACY = 3;
}
21 changes: 8 additions & 13 deletions pkg/sql/row/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/fetchpb"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/valueside"
"github.com/cockroachdb/cockroach/pkg/sql/rowinfra"
"github.com/cockroachdb/cockroach/pkg/sql/scrub"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
Expand Down Expand Up @@ -1076,21 +1075,17 @@ func (rf *Fetcher) processValueSingle(
return prettyKey, "", nil
}
typ := table.spec.FetchedColumns[idx].Type
// TODO(arjun): The value is a directly marshaled single value, so we
// unmarshal it eagerly here. This can potentially be optimized out,
// although that would require changing UnmarshalColumnValue to operate
// on bytes, and for Encode/DecodeTableValue to operate on marshaled
// single values.
value, err := valueside.UnmarshalLegacy(rf.args.Alloc, typ, kv.Value)
if err != nil {
return "", "", err
}
encValue := rowenc.EncDatumValueLegacyFromValue(kv.Value)
if rf.args.TraceKV {
prettyValue = value.String()
err := encValue.EnsureDecoded(typ, rf.args.Alloc)
if err != nil {
return "", "", err
}
prettyValue = encValue.Datum.String()
}
table.row[idx] = rowenc.DatumToEncDatum(typ, value)
table.row[idx] = encValue
if DebugRowFetch {
log.Infof(ctx, "Scan %s -> %v", kv.Key, value)
log.Infof(ctx, "Scan %d -> %v", kv.Key, encValue)
}
return prettyKey, prettyValue, nil
}
Expand Down
50 changes: 50 additions & 0 deletions pkg/sql/rowenc/encoded_datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"fmt"
"unsafe"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside"
Expand Down Expand Up @@ -158,6 +159,11 @@ func EncDatumFromBuffer(enc catenumpb.DatumEncoding, buf []byte) (EncDatum, []by
}
ed := EncDatumFromEncoded(enc, buf[typeOffset:encLen])
return ed, buf[encLen:], nil
case catenumpb.DatumEncoding_VALUE_LEGACY:
// The legacy value encoding is only used for individual datums occupying
// entire roachpb.Values, and therefore we don't expect to call
// EncDatumFromBuffer with the legacy encoding.
panic(errors.AssertionFailedf("EncDatumFromBuffer unimplemented for VALUE_LEGACY"))
default:
panic(errors.AssertionFailedf("unknown encoding %s", enc))
}
Expand All @@ -179,6 +185,17 @@ func EncDatumValueFromBufferWithOffsetsAndType(
return ed, buf[encLen:], nil
}

// EncDatumValueLegacyFromValue
func EncDatumValueLegacyFromValue(value roachpb.Value) EncDatum {
if value.RawBytes == nil {
return EncDatum{
encoding: catenumpb.DatumEncoding_VALUE_LEGACY,
Datum: tree.DNull,
}
}
return EncDatumFromEncoded(catenumpb.DatumEncoding_VALUE_LEGACY, value.TagAndDataBytes())
}

// DatumToEncDatum initializes an EncDatum with the given Datum.
func DatumToEncDatum(ctyp *types.T, d tree.Datum) EncDatum {
ed, err := DatumToEncDatumEx(ctyp, d)
Expand Down Expand Up @@ -242,6 +259,9 @@ func (ed *EncDatum) IsNull() bool {
}
return typ == encoding.Null

case catenumpb.DatumEncoding_VALUE_LEGACY:
return ed.encoded == nil

default:
panic(errors.AssertionFailedf("unknown encoding %s", ed.encoding))
}
Expand All @@ -264,6 +284,16 @@ func (ed *EncDatum) EnsureDecoded(typ *types.T, a *tree.DatumAlloc) error {
ed.Datum, rem, err = keyside.Decode(a, typ, ed.encoded, encoding.Descending)
case catenumpb.DatumEncoding_VALUE:
ed.Datum, rem, err = valueside.Decode(a, typ, ed.encoded)
case catenumpb.DatumEncoding_VALUE_LEGACY:
var v roachpb.Value
v.SetTagAndData(ed.encoded)
// NULL is not stored in legacy-encoded values. We use an UNKNOWN tag to
// satisfy the EncDatum invariants that encoded != nil, and must check for
// that here.
if v.GetTag() == roachpb.ValueType_UNKNOWN {
v.RawBytes = nil
}
ed.Datum, err = valueside.UnmarshalLegacy(a, typ, v)
default:
return errors.AssertionFailedf("unknown encoding %d", redact.Safe(ed.encoding))
}
Expand Down Expand Up @@ -308,6 +338,21 @@ func (ed *EncDatum) Encode(
return keyside.Encode(appendTo, ed.Datum, encoding.Descending)
case catenumpb.DatumEncoding_VALUE:
return valueside.Encode(appendTo, valueside.NoColumnID, ed.Datum)
case catenumpb.DatumEncoding_VALUE_LEGACY:
// The legacy value encoding is only used for individual datums occupying
// entire roachpb.Values. Because of this, we don't expect to call Encode
// with the legacy encoding, bue we implement it anyway for testing
// purposes.
v, err := valueside.MarshalLegacy(typ, ed.Datum)
if err != nil {
return nil, err
}
// NULL is not stored in legacy-encoded values. We use an UNKNOWN tag here
// to satisfy the EncDatum invariants that encoded != nil.
if v.RawBytes == nil {
v.SetTag(roachpb.ValueType_UNKNOWN)
}
return v.TagAndDataBytes(), nil
default:
panic(errors.AssertionFailedf("unknown encoding requested %s", enc))
}
Expand Down Expand Up @@ -466,6 +511,11 @@ func (ed *EncDatum) GetInt() (int64, error) {
_, val, err := encoding.DecodeUntaggedIntValue(ed.encoded[dataOffset:])
return val, err

case catenumpb.DatumEncoding_VALUE_LEGACY:
var v roachpb.Value
v.SetTagAndData(ed.encoded)
return v.GetInt()

default:
return 0, errors.Errorf("unknown encoding %s", ed.encoding)
}
Expand Down
25 changes: 21 additions & 4 deletions pkg/sql/rowenc/encoded_datum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"unsafe"

"github.com/cockroachdb/apd/v3"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
Expand Down Expand Up @@ -116,7 +117,9 @@ func TestEncDatum(t *testing.T) {
}

func columnTypeCompatibleWithEncoding(typ *types.T, enc catenumpb.DatumEncoding) bool {
return enc == catenumpb.DatumEncoding_VALUE || colinfo.ColumnTypeIsIndexable(typ)
return enc == catenumpb.DatumEncoding_VALUE ||
enc == catenumpb.DatumEncoding_VALUE_LEGACY ||
colinfo.ColumnTypeIsIndexable(typ)
}

func TestEncDatumNull(t *testing.T) {
Expand Down Expand Up @@ -245,6 +248,7 @@ func TestEncDatumCompare(t *testing.T) {
asc := catenumpb.DatumEncoding_ASCENDING_KEY
desc := catenumpb.DatumEncoding_DESCENDING_KEY
noncmp := catenumpb.DatumEncoding_VALUE
legacy := catenumpb.DatumEncoding_VALUE_LEGACY

checkEncDatumCmp(t, a, typ, &v1, &v2, asc, asc, -1, false)
checkEncDatumCmp(t, a, typ, &v2, &v1, asc, asc, +1, false)
Expand All @@ -263,6 +267,10 @@ func TestEncDatumCompare(t *testing.T) {
checkEncDatumCmp(t, a, typ, &v2, &v1, desc, noncmp, +1, true)
checkEncDatumCmp(t, a, typ, &v1, &v1, asc, desc, 0, true)
checkEncDatumCmp(t, a, typ, &v2, &v2, desc, asc, 0, true)
checkEncDatumCmp(t, a, typ, &v1, &v2, noncmp, legacy, -1, true)
checkEncDatumCmp(t, a, typ, &v2, &v1, desc, legacy, +1, true)
checkEncDatumCmp(t, a, typ, &v1, &v1, asc, legacy, 0, true)
checkEncDatumCmp(t, a, typ, &v2, &v2, legacy, legacy, 0, true)
}
}
}
Expand Down Expand Up @@ -714,14 +722,18 @@ func TestEncDatumFingerprintMemory(t *testing.T) {
defer leaktest.AfterTest(t)()

const (
asc = catenumpb.DatumEncoding_ASCENDING_KEY
desc = catenumpb.DatumEncoding_DESCENDING_KEY
value = catenumpb.DatumEncoding_VALUE
asc = catenumpb.DatumEncoding_ASCENDING_KEY
desc = catenumpb.DatumEncoding_DESCENDING_KEY
value = catenumpb.DatumEncoding_VALUE
legacy = catenumpb.DatumEncoding_VALUE_LEGACY

i = 123
s = "abcde"
)

var v roachpb.Value
v.SetString(s)

testCases := []struct {
encDatum rowenc.EncDatum
typ *types.T
Expand All @@ -744,6 +756,11 @@ func TestEncDatumFingerprintMemory(t *testing.T) {
typ: types.String,
newMemUsage: int64(tree.NewDString(s).Size()),
},
{
encDatum: rowenc.EncDatumFromEncoded(legacy, v.TagAndDataBytes()),
typ: types.String,
newMemUsage: int64(tree.NewDString(s).Size()),
},
}

ctx := context.Background()
Expand Down
Loading