From 969ef7db354b853cfd68f8229548eb641085eb56 Mon Sep 17 00:00:00 2001 From: Szymon Kulec Date: Fri, 23 Feb 2024 08:08:00 +0100 Subject: [PATCH] Slotted array is path based again (#249) * Fan out for state * Slotted is path based * SlottedArray uses NibblePath fully --- .../SlottedArrayBenchmarks.cs | 8 +- src/Paprika.Tests/Data/SlottedArrayTests.cs | 29 ++-- .../Merkle/RootHashFuzzyTests.cs | 2 +- src/Paprika.Tests/Store/AbandonedTests.cs | 4 +- src/Paprika.Tests/Store/RootPageTests.cs | 133 ----------------- src/Paprika.Tests/Utils/MetricsTests.cs | 2 +- src/Paprika/Data/NibblePath.cs | 29 ++-- src/Paprika/Data/SlottedArray.cs | 135 ++++++++---------- src/Paprika/Store/DataPage.cs | 59 +++----- src/Paprika/Store/FanOutPage.cs | 39 ++++- src/Paprika/Store/IPageVisitor.cs | 2 + src/Paprika/Store/Page.cs | 3 + src/Paprika/Store/RootPage.cs | 81 +++-------- src/Paprika/Utils/Printer.cs | 5 + 14 files changed, 182 insertions(+), 349 deletions(-) delete mode 100644 src/Paprika.Tests/Store/RootPageTests.cs diff --git a/src/Paprika.Benchmarks/SlottedArrayBenchmarks.cs b/src/Paprika.Benchmarks/SlottedArrayBenchmarks.cs index da911c1d..b8af40b4 100644 --- a/src/Paprika.Benchmarks/SlottedArrayBenchmarks.cs +++ b/src/Paprika.Benchmarks/SlottedArrayBenchmarks.cs @@ -21,7 +21,7 @@ public SlottedArrayBenchmarks() while (true) { BinaryPrimitives.WriteInt32LittleEndian(key, _to); - if (map.TrySet(key, key) == false) + if (map.TrySet(NibblePath.FromKey(key), key) == false) { // filled break; @@ -45,7 +45,7 @@ public int Write_whole_page_of_data() for (int i = 0; i < _to; i++) { BinaryPrimitives.WriteInt32LittleEndian(key, i); - if (map.TrySet(key, key)) + if (map.TrySet(NibblePath.FromKey(key), key)) { count++; } @@ -66,7 +66,7 @@ public int Read_existing_keys() for (var i = 0; i < _to; i++) { BinaryPrimitives.WriteInt32LittleEndian(key, i); - if (map.TryGet(key, out var data)) + if (map.TryGet(NibblePath.FromKey(key), out var data)) { result += data.Length; } @@ -87,7 +87,7 @@ public int Read_nonexistent_keys() for (int i = _to; i < _to * 2; i++) { BinaryPrimitives.WriteInt32LittleEndian(key, i); - if (map.TryGet(key, out _) == false) + if (map.TryGet(NibblePath.FromKey(key), out _) == false) { result += 1; } diff --git a/src/Paprika.Tests/Data/SlottedArrayTests.cs b/src/Paprika.Tests/Data/SlottedArrayTests.cs index 0f0df486..229d7041 100644 --- a/src/Paprika.Tests/Data/SlottedArrayTests.cs +++ b/src/Paprika.Tests/Data/SlottedArrayTests.cs @@ -49,15 +49,15 @@ public void Enumerate_all() using var e = map.EnumerateAll(); e.MoveNext().Should().BeTrue(); - e.Current.Key.SequenceEqual(key0).Should().BeTrue(); + e.Current.Key.RawSpan.SequenceEqual(key0).Should().BeTrue(); e.Current.RawData.SequenceEqual(Data0).Should().BeTrue(); e.MoveNext().Should().BeTrue(); - e.Current.Key.SequenceEqual(key1).Should().BeTrue(); + e.Current.Key.RawSpan.SequenceEqual(key1).Should().BeTrue(); e.Current.RawData.SequenceEqual(Data1).Should().BeTrue(); e.MoveNext().Should().BeTrue(); - e.Current.Key.SequenceEqual(key2).Should().BeTrue(); + e.Current.Key.RawSpan.SequenceEqual(key2).Should().BeTrue(); e.Current.RawData.SequenceEqual(Data2).Should().BeTrue(); e.MoveNext().Should().BeFalse(); @@ -170,19 +170,19 @@ public void Hashing() // three bytes Unique(stackalloc byte[] { 0xA, 0xD, 0xC }); - Unique(stackalloc byte[] { 0xA, 0xE, 0xC }); - Unique(stackalloc byte[] { 0xA, 0xF, 0xC }); + Unique(stackalloc byte[] { 0xAA, 0xEE, 0xCC }); + Unique(stackalloc byte[] { 0xAA, 0xFF, 0xCC }); // three bytes - Unique(stackalloc byte[] { 0xA, 0xD, 0xC, 0x1 }); - Unique(stackalloc byte[] { 0xA, 0xE, 0xC, 0x1 }); - Unique(stackalloc byte[] { 0xA, 0xF, 0xC, 0x1 }); + Unique(stackalloc byte[] { 0xAA, 0xDD, 0xCC, 0x11 }); + Unique(stackalloc byte[] { 0xAA, 0xEE, 0xCC, 0x11 }); + Unique(stackalloc byte[] { 0xAA, 0xFF, 0xCC, 0x11 }); return; void Unique(in ReadOnlySpan key) { - var hash = SlottedArray.GetHash(key); + var hash = SlottedArray.GetHash(NibblePath.FromKey(key)); var hex = key.ToHexString(true); if (hashes.TryAdd(hash, hex) == false) @@ -195,24 +195,25 @@ void Unique(in ReadOnlySpan key) file static class FixedMapTestExtensions { - public static void SetAssert(this SlottedArray map, in ReadOnlySpan key, ReadOnlySpan data, string? because = null) + public static void SetAssert(this SlottedArray map, in ReadOnlySpan key, ReadOnlySpan data, + string? because = null) { - map.TrySet(key, data).Should().BeTrue(because ?? "TrySet should succeed"); + map.TrySet(NibblePath.FromKey(key), data).Should().BeTrue(because ?? "TrySet should succeed"); } public static void DeleteAssert(this SlottedArray map, in ReadOnlySpan key) { - map.Delete(key).Should().BeTrue("Delete should succeed"); + map.Delete(NibblePath.FromKey(key)).Should().BeTrue("Delete should succeed"); } public static void GetAssert(this SlottedArray map, in ReadOnlySpan key, ReadOnlySpan expected) { - map.TryGet(key, out var actual).Should().BeTrue(); + map.TryGet(NibblePath.FromKey(key), out var actual).Should().BeTrue(); actual.SequenceEqual(expected).Should().BeTrue("Actual data should equal expected"); } public static void GetShouldFail(this SlottedArray map, in ReadOnlySpan key) { - map.TryGet(key, out var actual).Should().BeFalse("The key should not exist"); + map.TryGet(NibblePath.FromKey(key), out var actual).Should().BeFalse("The key should not exist"); } } \ No newline at end of file diff --git a/src/Paprika.Tests/Merkle/RootHashFuzzyTests.cs b/src/Paprika.Tests/Merkle/RootHashFuzzyTests.cs index 646d1620..f0009d63 100644 --- a/src/Paprika.Tests/Merkle/RootHashFuzzyTests.cs +++ b/src/Paprika.Tests/Merkle/RootHashFuzzyTests.cs @@ -93,7 +93,7 @@ public async Task CalculateThenDelete(string test, ulong size) using var db = PagedDb.NativeMemoryDb(1024 * 1024 * 1024, 2); var merkle = new ComputeMerkleBehavior(1, 1); - await using var blockchain = new Blockchain(db, merkle, null, new CacheBudget.Options(1000, 16)); + await using var blockchain = new Blockchain(db, merkle, null, new CacheBudget.Options(1000, 8), new CacheBudget.Options(1000, 8)); // set generator.Run(blockchain, 513, false, true); diff --git a/src/Paprika.Tests/Store/AbandonedTests.cs b/src/Paprika.Tests/Store/AbandonedTests.cs index 9c69b117..92631b49 100644 --- a/src/Paprika.Tests/Store/AbandonedTests.cs +++ b/src/Paprika.Tests/Store/AbandonedTests.cs @@ -59,7 +59,7 @@ public async Task Reuse_in_limited_environment() public async Task Work_proper_bookkeeping_when_lots_of_reads() { const int repeats = 1_000; - const int multiplier = 1 + 1; // data page + abandoned page per commit + const int multiplier = 2 + 1; // fanout page + data page + abandoned page per commit const int historyDepth = 2; var account = Keccak.EmptyTreeHash; @@ -100,7 +100,7 @@ public async Task Reuse_in_grow_and_shrink() byte[] value = [13]; - using var db = PagedDb.NativeMemoryDb(256 * Page.PageSize); + using var db = PagedDb.NativeMemoryDb(1024 * Page.PageSize); for (var i = 0; i < repeats; i++) { diff --git a/src/Paprika.Tests/Store/RootPageTests.cs b/src/Paprika.Tests/Store/RootPageTests.cs deleted file mode 100644 index fe2c19bc..00000000 --- a/src/Paprika.Tests/Store/RootPageTests.cs +++ /dev/null @@ -1,133 +0,0 @@ -using System.Buffers.Binary; -using FluentAssertions; -using NUnit.Framework; -using Paprika.Crypto; -using Paprika.Data; -using static Paprika.Store.RootPage; - -namespace Paprika.Tests.Store; - -public class RootPageTests -{ - [Test] - public void Encode_length() - { - const byte packed = 0x04; - const byte merkle = 0x02; - const byte odd = 0x01; - const byte packedShift = 3; - - // Path constructed that nibbles[1] & nibbles[3] are prone to packing them - var path = NibblePath.Parse("A1204567"); - - var n0 = path.GetAt(0); - var n0Shifted = (byte)(n0 << NibblePath.NibbleShift); - - var n1 = path.GetAt(1); - - var n2 = path.GetAt(2); - var n2Shifted = (byte)(n2 << NibblePath.NibbleShift); - - var n3 = path.GetAt(3); - - var n4 = path.GetAt(4); - var n4Shifted = (byte)(n4 << NibblePath.NibbleShift); - - // length: 0 - Check(NibblePath.Empty, DataType.Merkle, []); - - // length: 1 - Check(path.SliceTo(1), DataType.Merkle, [(byte)(n0Shifted | merkle | odd)]); - Check(path.SliceTo(1), DataType.Account, [(byte)(n0Shifted | odd)]); - Check(path.SliceTo(1), DataType.StorageCell, [(byte)(n0Shifted | odd)]); - - // length: 2, packed - var n1Packed = n1 << packedShift | packed; - Check(path.SliceTo(2), DataType.Merkle, [(byte)(n0Shifted | n1Packed | merkle)]); - Check(path.SliceTo(2), DataType.Account, [(byte)(n0Shifted | n1Packed)]); - Check(path.SliceTo(2), DataType.StorageCell, [(byte)(n0Shifted | n1Packed)]); - - // length: 3 - Check(path.SliceTo(3), DataType.Merkle, [(byte)(n0Shifted | n1), (byte)(n2Shifted | merkle | odd)]); - Check(path.SliceTo(3), DataType.Account, [(byte)(n0Shifted | n1), (byte)(n2Shifted | odd)]); - Check(path.SliceTo(3), DataType.StorageCell, [(byte)(n0Shifted | n1), (byte)(n2Shifted | odd)]); - - // length: 4, packed - var n3Packed = n3 << packedShift | packed; - Check(path.SliceTo(4), DataType.Merkle, [(byte)(n0Shifted | n1), (byte)(n2Shifted | n3Packed | merkle)]); - Check(path.SliceTo(4), DataType.Account, [(byte)(n0Shifted | n1), (byte)(n2Shifted | n3Packed)]); - Check(path.SliceTo(4), DataType.StorageCell, [(byte)(n0Shifted | n1), (byte)(n2Shifted | n3Packed)]); - - // length: 5 - Check(path.SliceTo(5), DataType.Merkle, - [(byte)(n0Shifted | n1), (byte)(n2Shifted | n3), (byte)(n4Shifted | merkle | odd)]); - Check(path.SliceTo(5), DataType.Account, - [(byte)(n0Shifted | n1), (byte)(n2Shifted | n3), (byte)(n4Shifted | odd)]); - Check(path.SliceTo(5), DataType.StorageCell, - [(byte)(n0Shifted | n1), (byte)(n2Shifted | n3), (byte)(n4Shifted | odd)]); - - return; - - static void Check(in NibblePath path, DataType type, params byte[] expected) - { - var actual = Encode(path, stackalloc byte[64], type); - (actual.Length % 2).Should().Be(0, "Only even lengths"); - if (actual.RawSpan.SequenceEqual(expected) == false) - { - Assert.Fail( - $"Mismatch, expected was: {expected.AsSpan().ToHexString(false)} while actual {actual.RawSpan.ToHexString(false)}"); - } - } - } - - [TestCase(DataType.Account)] - [TestCase(DataType.StorageCell)] - public void Encode_fuzzing(DataType type) - { - const int count = 8; - - Span span = stackalloc byte[4]; - Span nibbles = stackalloc byte[count]; - Span working = stackalloc byte[count]; - - Dictionary hashes = new Dictionary(); - - for (var i = 0; i < 2000; i++) - { - BinaryPrimitives.WriteInt32LittleEndian(span, i); - var path = NibblePath.FromKey(span); - - for (var j = 0; j < count; j++) - { - nibbles[j] = path.GetAt(j); - } - - // nibbles contains all the nibbles - var cutoff = nibbles.LastIndexOfAnyExcept((byte)0) + 1; - var actualPath = NibblePath.FromRawNibbles(nibbles[..cutoff], working); - - if (actualPath.Length > 0) - { - // only Merkle can be at the root! - Unique(actualPath, type, hashes); - } - - Unique(actualPath, DataType.Merkle, hashes); - } - - return; - - void Unique(in NibblePath path, DataType type, Dictionary hashes) - { - Span destination = stackalloc byte[count]; - var hex = Encode(path, destination, type).RawSpan.ToHexString(false); - - var p = path.ToString(); - if (hashes.TryAdd(hex, p) == false) - { - Assert.Fail($"Hex of encoded path {hex} already exists for path {hashes[hex]} and cannot be added for path {p}"); - } - } - - } -} \ No newline at end of file diff --git a/src/Paprika.Tests/Utils/MetricsTests.cs b/src/Paprika.Tests/Utils/MetricsTests.cs index de2935ea..f5488fb0 100644 --- a/src/Paprika.Tests/Utils/MetricsTests.cs +++ b/src/Paprika.Tests/Utils/MetricsTests.cs @@ -12,8 +12,8 @@ public class MetricsTests { private const int Mb = 1024 * 1024; - [Explicit("Sometimes faulty reporting happens with HDR here")] [Test] + [Explicit("Sometimes metrics do not report as it's HDR reporting.")] public async Task Metrics_should_report() { using var metrics = new Metrics(); diff --git a/src/Paprika/Data/NibblePath.cs b/src/Paprika/Data/NibblePath.cs index c9d3ec94..28744b53 100644 --- a/src/Paprika/Data/NibblePath.cs +++ b/src/Paprika/Data/NibblePath.cs @@ -74,6 +74,14 @@ public static NibblePath FromRawNibbles(ReadOnlySpan nibbles, Span w return copy; } + /// + /// Creates the nibble path from preamble and raw slice + /// + public static NibblePath FromRaw(byte preamble, ReadOnlySpan slice) + { + return new NibblePath(slice, preamble & OddBit, preamble >> LengthShift); + } + /// /// /// @@ -139,24 +147,26 @@ public Span WriteTo(Span destination) return destination.Slice(0, length); } + public byte RawPreamble => (byte)((_odd & OddBit) | (Length << LengthShift)); + private int WriteImpl(Span destination) { var odd = _odd & OddBit; - var lenght = GetSpanLength(Length, _odd); + var length = GetSpanLength(Length, _odd); destination[0] = (byte)(odd | (Length << LengthShift)); - MemoryMarshal.CreateSpan(ref _span, lenght).CopyTo(destination.Slice(PreambleLength)); + MemoryMarshal.CreateSpan(ref _span, length).CopyTo(destination.Slice(PreambleLength)); // clearing the oldest nibble, if needed // yes, it can be branch free if (((odd + Length) & 1) == 1) { - ref var oldest = ref destination[lenght]; + ref var oldest = ref destination[length]; oldest = (byte)(oldest & 0b1111_0000); } - return lenght + PreambleLength; + return length + PreambleLength; } /// @@ -255,14 +265,9 @@ public NibblePath Append(scoped in NibblePath other, Span workingSet) /// /// Gets the raw underlying span behind the path, removing the odd encoding. /// - public ReadOnlySpan RawSpan - { - get - { - var lenght = GetSpanLength(Length, _odd); - return MemoryMarshal.CreateSpan(ref _span, lenght); - } - } + public ReadOnlySpan RawSpan => MemoryMarshal.CreateSpan(ref _span, RawSpanLength); + + public int RawSpanLength => GetSpanLength(Length, _odd); public static ReadOnlySpan ReadFrom(ReadOnlySpan source, out NibblePath nibblePath) { diff --git a/src/Paprika/Data/SlottedArray.cs b/src/Paprika/Data/SlottedArray.cs index 639a883f..77879a4c 100644 --- a/src/Paprika/Data/SlottedArray.cs +++ b/src/Paprika/Data/SlottedArray.cs @@ -34,7 +34,7 @@ public SlottedArray(Span buffer) _slots = MemoryMarshal.Cast(_data); } - public bool TrySet(in ReadOnlySpan key, ReadOnlySpan data, ushort? keyHash = default) + public bool TrySet(in NibblePath key, ReadOnlySpan data, ushort? keyHash = default) { var hash = keyHash ?? GetHash(key); @@ -86,20 +86,22 @@ public bool TrySet(in ReadOnlySpan key, ReadOnlySpan data, ushort? k int offset; - if (key.Length <= Slot.MaxSlotLengthKey) + if (key.RawPreamble <= Slot.MaxSlotPreamble) { - slot.KeyLength = key.Length; + slot.KeyPreamble = key.RawPreamble; offset = 0; } else { - slot.KeyLength = Slot.KeyLongerMarker; - dest[0] = (byte)key.Length; + slot.KeyPreamble = Slot.PreambleBiggerMarker; + dest[0] = key.RawPreamble; offset = 1; } - key.CopyTo(dest.Slice(offset)); - data.CopyTo(dest.Slice(offset + key.Length)); + var raw = key.RawSpan; + + raw.CopyTo(dest.Slice(offset)); + data.CopyTo(dest.Slice(offset + raw.Length)); // commit low and high _header.Low += Slot.Size; @@ -169,11 +171,11 @@ private Item Build() ref var slot = ref _map._slots[_index]; var span = _map.GetSlotPayload(ref slot); - var shift = slot.KeyLength == Slot.KeyLongerMarker ? KeyLengthLength : 0; - var keyLength = slot.KeyLength == Slot.KeyLongerMarker ? span[0] : slot.KeyLength; + var shift = slot.KeyPreamble == Slot.PreambleBiggerMarker ? KeyLengthLength : 0; + var preamble = slot.KeyPreamble == Slot.PreambleBiggerMarker ? span[0] : slot.KeyPreamble; - var key = span.Slice(shift, keyLength); - var data = span.Slice(shift + keyLength); + var key = NibblePath.FromRaw(preamble, span.Slice(shift)); + var data = span.Slice(shift + key.RawSpanLength); return new Item(key, data, _index); } @@ -184,18 +186,11 @@ public void Dispose() ArrayPool.Shared.Return(_bytes); } - public readonly ref struct Item + public readonly ref struct Item(NibblePath key, ReadOnlySpan rawData, int index) { - public int Index { get; } - public ReadOnlySpan Key { get; } - public ReadOnlySpan RawData { get; } - - public Item(ReadOnlySpan key, ReadOnlySpan rawData, int index) - { - Index = index; - Key = key; - RawData = rawData; - } + public int Index { get; } = index; + public NibblePath Key { get; } = key; + public ReadOnlySpan RawData { get; } = rawData; } // a shortcut to not allocate, just copy the enumerator @@ -230,7 +225,7 @@ public int MoveTo(in SlottedArray destination) /// /// Gets the aggregated count of entries per nibble. /// - public void GatherCountStatistics(Span buckets, NibbleSelector selector) + public void GatherCountStatistics(Span buckets) { Debug.Assert(buckets.Length == BucketCount); @@ -242,33 +237,35 @@ public void GatherCountStatistics(Span buckets, NibbleSelector selector) // extract only not deleted and these which have at least one nibble if (slot.IsDeleted == false) { - var payload = GetSlotPayload(ref slot); + var span = GetSlotPayload(ref slot); - var shift = slot.KeyLength == Slot.KeyLongerMarker ? KeyLengthLength : 0; - var keyLength = slot.KeyLength == Slot.KeyLongerMarker ? payload[0] : slot.KeyLength; + var shift = slot.KeyPreamble == Slot.PreambleBiggerMarker ? KeyLengthLength : 0; + var preamble = slot.KeyPreamble == Slot.PreambleBiggerMarker ? span[0] : slot.KeyPreamble; - var first = selector(payload.Slice(shift, keyLength)); - if (first < BucketCount) - { - buckets[first] += 1; - } + // TODO: empty if (preamble == 0 || preamble == 1) + var key = NibblePath.FromRaw(preamble, span.Slice(shift)); + + if (key.IsEmpty) + continue; + + buckets[key.FirstNibble] += 1; } } } private const int KeyLengthLength = 1; - private static int GetTotalSpaceRequired(in ReadOnlySpan key, ReadOnlySpan data) + private static int GetTotalSpaceRequired(in NibblePath key, ReadOnlySpan data) { - return (key.Length <= Slot.MaxSlotLengthKey ? 0 : KeyLengthLength) + - key.Length + data.Length; + return (key.RawPreamble <= Slot.MaxSlotPreamble ? 0 : KeyLengthLength) + + key.RawSpanLength + data.Length; } /// /// Warning! This does not set any tombstone so the reader won't be informed about a delete, /// just will miss the value. /// - public bool Delete(in ReadOnlySpan key) + public bool Delete(in NibblePath key) { if (TryGetImpl(key, GetHash(key), out _, out var index)) { @@ -317,7 +314,7 @@ private void Deframent() copyTo.Hash = copyFrom.Hash; copyTo.ItemAddress = high; - copyTo.KeyLength = copyFrom.KeyLength; + copyTo.KeyPreamble = copyFrom.KeyPreamble; copy._header.Low += Slot.Size; copy._header.High = (ushort)(copy._header.High + fromSpan.Length); @@ -358,7 +355,7 @@ private void CollectTombstones() } } - public bool TryGet(ReadOnlySpan key, out ReadOnlySpan data) + public bool TryGet(in NibblePath key, out ReadOnlySpan data) { if (TryGetImpl(key, GetHash(key), out var span, out _)) { @@ -372,7 +369,7 @@ public bool TryGet(ReadOnlySpan key, out ReadOnlySpan data) [OptimizationOpportunity(OptimizationType.CPU, "key encoding is delayed but it might be called twice, here + TrySet")] - private bool TryGetImpl(scoped in ReadOnlySpan key, ushort hash, out Span data, out int slotIndex) + private bool TryGetImpl(in NibblePath key, ushort hash, out Span data, out int slotIndex) { var to = _header.Low / Slot.Size; @@ -392,6 +389,8 @@ private bool TryGetImpl(scoped in ReadOnlySpan key, ushort hash, out Span< return false; } + var preamble = key.RawPreamble; + while (index != notFound) { // move offset to the given position @@ -406,16 +405,16 @@ private bool TryGetImpl(scoped in ReadOnlySpan key, ushort hash, out Span< { var actual = GetSlotPayload(ref slot); - // The StartsWith check assumes that all the keys have the same length. - var length = key.Length; - var shift = slot.KeyLength == Slot.KeyLongerMarker ? KeyLengthLength : 0; - var actualLength = slot.KeyLength == Slot.KeyLongerMarker ? actual[0] : slot.KeyLength; + var shift = slot.KeyPreamble == Slot.PreambleBiggerMarker ? KeyLengthLength : 0; + var actualPreamble = slot.KeyPreamble == Slot.PreambleBiggerMarker ? actual[0] : slot.KeyPreamble; - if (actualLength == length) + if (actualPreamble == preamble) { - if (actual.Slice(shift, length).SequenceEqual(key)) + var actualKey = NibblePath.FromRaw(actualPreamble, actual.Slice(shift)); + + if (actualKey.Equals(key)) { - data = actual.Slice(shift + length); + data = actual.Slice(shift + actualKey.RawSpanLength); slotIndex = i; return true; } @@ -487,12 +486,12 @@ public bool IsDeleted private const ushort KeyLengthMask = 0b1110_0000_0000_0000; private const ushort KeyLengthShift = 13; - public const int MaxSlotLengthKey = 6; - public const int KeyLongerMarker = 7; + public const int MaxSlotPreamble = 6; + public const int PreambleBiggerMarker = 7; - public int KeyLength + public byte KeyPreamble { - get => (Raw & KeyLengthMask) >> KeyLengthShift; + get => (byte)((Raw & KeyLengthMask) >> KeyLengthShift); set => Raw = (ushort)((Raw & ~KeyLengthMask) | (value << KeyLengthShift)); } @@ -521,42 +520,22 @@ public override string ToString() /// /// Highly optimized to eliminate bound checks and special cases /// - public static ushort GetHash(ReadOnlySpan key) + public static ushort GetHash(in NibblePath key) { - // First, special cases for short paths - if (key.Length <= 2) - { - if (key.Length == 2) - { - return Unsafe.ReadUnaligned(in key[0]); - } - - return key.Length == 0 ? (ushort)0 : MemoryMarshal.GetReference(key); - } + const int shift = NibblePath.NibbleShift; - // At least 3 bytes long, read start and end - var a = Unsafe.ReadUnaligned(in key[0]); - var b = Unsafe.ReadUnaligned(in key[^2]); + if (key.Length == 0) + return 0; - // A really simple hash. - return (ushort)(a ^ b); + // get nibbles at 0, 1/3, 2/3, last + return (ushort)(key.GetAt(0) | + (key.GetAt(key.Length / 3) << shift) | + (key.GetAt(key.Length * 2 / 3) << (shift * 2)) | + (key.GetAt(key.Length - 1) << (shift * 3))); } public override string ToString() => $"{nameof(Count)}: {Count}, {nameof(CapacityLeft)}: {CapacityLeft}"; - public IEnumerable MaterializeAllKeys() - { - var list = new List(); - foreach (var item in EnumerateAll()) - { - list.Add(item.Key.ToHexString(false)); - } - - list.Sort(StringComparer.InvariantCultureIgnoreCase); - - return list; - } - [StructLayout(LayoutKind.Explicit, Size = Size)] private struct Header { diff --git a/src/Paprika/Store/DataPage.cs b/src/Paprika/Store/DataPage.cs index 65a8c873..f39e374d 100644 --- a/src/Paprika/Store/DataPage.cs +++ b/src/Paprika/Store/DataPage.cs @@ -45,28 +45,35 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat // delete locally if (LeafCount <= MaxLeafCount) { - map.Delete(key.RawSpan); + map.Delete(key); for (var i = 0; i < MaxLeafCount; i++) { // TODO: consider checking whether the array contains the data first, // only then make it writable as it results in a COW - if (TryGetWritableLeaf(i, batch, out var leaf)) leaf.Delete(key.RawSpan); + if (TryGetWritableLeaf(i, batch, out var leaf)) leaf.Delete(key); } return page; } + if (key.IsEmpty) + { + // there's no lower level, delete in map + map.Delete(key); + return page; + } + var childPageAddress = Data.Buckets[key.FirstNibble]; if (childPageAddress.IsNull) { // there's no lower level, delete in map - map.Delete(key.RawSpan); + map.Delete(key); return page; } } // try write in map - if (map.TrySet(key.RawSpan, data)) + if (map.TrySet(key, data)) { return page; } @@ -86,7 +93,7 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat // move as many as possible to the first leaf and try to re-add var anyMoved = map.MoveTo(newest) > 0; - if (anyMoved && map.TrySet(key.RawSpan, data)) + if (anyMoved && map.TrySet(key, data)) { return page; } @@ -99,7 +106,7 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat TryGetWritableLeaf(LeafCount - 1, batch, out newest, true); map.MoveTo(newest); - if (map.TrySet(key.RawSpan, data)) + if (map.TrySet(key, data)) { return page; } @@ -130,13 +137,13 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat foreach (var item in leafMap.EnumerateAll()) { - Set(NibblePath.FromKey(item.Key), item.RawData, batch); + Set(item.Key, item.RawData, batch); } } foreach (var item in new SlottedArray(copy).EnumerateAll()) { - Set(NibblePath.FromKey(item.Key), item.RawData, batch); + Set(item.Key, item.RawData, batch); } ArrayPool.Shared.Return(bytes); @@ -174,24 +181,18 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat return Set(key, data, batch); } - private DataPage FlushDown(in SlottedArray map, byte nibble, DataPage destination, IBatchContext batch) + private static DataPage FlushDown(in SlottedArray map, byte nibble, DataPage destination, IBatchContext batch) { foreach (var item in map.EnumerateAll()) { - var key = NibblePath.FromKey(item.Key); + var key = item.Key; if (key.IsEmpty) // empty keys are left in page continue; - key = key.SliceFrom(TreeLevelOddity); // for odd levels, slice by 1 - if (key.IsEmpty) - continue; - if (key.FirstNibble != nibble) continue; var sliced = key.SliceFrom(1); - if (sliced.IsEmpty) - continue; destination = new DataPage(destination.Set(sliced, item.RawData, batch)); @@ -242,31 +243,13 @@ private bool TryGetWritableLeaf(int index, IBatchContext batch, out SlottedArray private static SlottedArray GetLeafSlottedArray(Page page) => new(new Span(page.Payload, Payload.Size)); - private int TreeLevelOddity => Header.Level % 2; - - - private byte FindMostFrequentNibble(SlottedArray map) + private static byte FindMostFrequentNibble(SlottedArray map) { const int count = SlottedArray.BucketCount; Span stats = stackalloc ushort[count]; - if (TreeLevelOddity == 0) - { - map.GatherCountStatistics(stats, static span => - { - var path = NibblePath.FromKey(span); - return path.Length > 0 ? path.FirstNibble : byte.MaxValue; - }); - } - else - { - map.GatherCountStatistics(stats, static span => - { - var path = NibblePath.FromKey(span); - return path.Length > 1 ? path.GetAt(1) : byte.MaxValue; - }); - } + map.GatherCountStatistics(stats); byte biggestIndex = 0; for (byte i = 1; i < count; i++) @@ -324,7 +307,7 @@ public bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadO var map = Map; // try regular map - if (map.TryGet(key.RawSpan, out result)) + if (map.TryGet(key, out result)) { return true; } @@ -335,7 +318,7 @@ public bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadO for (var i = LeafCount - 1; i >= 0; i--) { var leafMap = GetLeafSlottedArray(batch.GetAt(Data.Buckets[i])); - if (leafMap.TryGet(key.RawSpan, out result)) + if (leafMap.TryGet(key, out result)) return true; } diff --git a/src/Paprika/Store/FanOutPage.cs b/src/Paprika/Store/FanOutPage.cs index 66b43b5f..4a42592d 100644 --- a/src/Paprika/Store/FanOutPage.cs +++ b/src/Paprika/Store/FanOutPage.cs @@ -5,6 +5,13 @@ namespace Paprika.Store; +/// +/// The fan out page stores keys shorter than in its content, +/// and delegates other keys lengths to lower layers of the tree. +/// +/// This is useful to get a good fan out at the higher levels of the tree. +/// Unfortunately this impacts the caching behavior and may result in more pages being updated. +/// [method: DebuggerStepThrough] public readonly unsafe struct FanOutPage(Page page) : IPageWithData { @@ -12,14 +19,18 @@ public readonly unsafe struct FanOutPage(Page page) : IPageWithData private const int ConsumedNibbles = 2; - public ref PageHeader Header => ref page.Header; + private ref PageHeader Header => ref page.Header; private ref Payload Data => ref Unsafe.AsRef(page.Payload); [StructLayout(LayoutKind.Explicit, Size = Size)] private struct Payload { - private const int Size = FanOut * DbAddress.Size; + private const int Size = Page.PageSize - PageHeader.Size; + + private const int FanOutSize = FanOut * DbAddress.Size; + + private const int DataSize = Size - FanOutSize; /// /// The number of buckets to fan out to. @@ -32,16 +43,19 @@ private struct Payload [FieldOffset(0)] private DbAddress Address; public Span Addresses => MemoryMarshal.CreateSpan(ref Address, FanOut); + + [FieldOffset(FanOutSize)] private byte DataFirst; + + public Span Data => MemoryMarshal.CreateSpan(ref DataFirst, DataSize); } public bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadOnlySpan result) { batch.AssertRead(Header); - if (key.Length < ConsumedNibbles) + if (IsKeyLocal(key)) { - result = default; - return false; + return new SlottedArray(Data.Data).TryGet(key, out result); } var index = GetIndex(key); @@ -67,6 +81,16 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat return new FanOutPage(writable).Set(key, data, batch); } + if (IsKeyLocal(key)) + { + if (new SlottedArray(Data.Data).TrySet(key, data) == false) + { + ThrowNoSpaceInline(); + } + + return page; + } + var index = GetIndex(key); var sliced = key.SliceFrom(ConsumedNibbles); @@ -87,6 +111,11 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat return page; } + private static bool IsKeyLocal(in NibblePath key) => key.Length < ConsumedNibbles; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ThrowNoSpaceInline() => throw new Exception("Could not set the data inline"); + public void Report(IReporter reporter, IPageResolver resolver, int level) { foreach (var bucket in Data.Addresses) diff --git a/src/Paprika/Store/IPageVisitor.cs b/src/Paprika/Store/IPageVisitor.cs index 2594acfc..3d6a6e19 100644 --- a/src/Paprika/Store/IPageVisitor.cs +++ b/src/Paprika/Store/IPageVisitor.cs @@ -7,4 +7,6 @@ public interface IPageVisitor void On(AbandonedPage page, DbAddress addr); void On(DataPage page, DbAddress addr); + + void On(FanOutPage page, DbAddress addr); } \ No newline at end of file diff --git a/src/Paprika/Store/Page.cs b/src/Paprika/Store/Page.cs index c1fdd70f..e7c83c67 100644 --- a/src/Paprika/Store/Page.cs +++ b/src/Paprika/Store/Page.cs @@ -19,6 +19,9 @@ public interface IPage public interface IPageWithData : IPage where TPage : struct, IPageWithData { + /// + /// Wraps the raw page as + /// static abstract TPage Wrap(Page page); bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadOnlySpan result); diff --git a/src/Paprika/Store/RootPage.cs b/src/Paprika/Store/RootPage.cs index 846c2f3a..ffb1a1b5 100644 --- a/src/Paprika/Store/RootPage.cs +++ b/src/Paprika/Store/RootPage.cs @@ -9,8 +9,20 @@ namespace Paprika.Store; /// /// Root page is a page that contains all the needed metadata from the point of view of the database. -/// It also includes the blockchain information like block hash or block number +/// It also includes the blockchain information like block hash or block number. /// +/// +/// Considerations for page types selected: +/// +/// State: +/// is that splits accounts into 256 buckets. +/// This makes the updates update more pages, but adds a nice fan out for fast searches. +/// Account ids: +/// is a of s. This gives 64k buckets on two levels. Searches should search no more than 3 levels of pages. +/// +/// Storage: +/// is a of s. This gives 64k buckets on two levels. +/// public readonly unsafe struct RootPage(Page root) : IPage { private const int StorageKeySize = Keccak.Size + Keccak.Size + 1; @@ -86,7 +98,7 @@ public void Accept(IPageVisitor visitor, IPageResolver resolver) { if (Data.StateRoot.IsNull == false) { - var data = new DataPage(resolver.GetAt(Data.StateRoot)); + var data = new FanOutPage(resolver.GetAt(Data.StateRoot)); visitor.On(data, Data.StateRoot); } @@ -103,8 +115,7 @@ public bool TryGet(scoped in Key key, IReadOnlyBatchContext batch, out ReadOnlyS return false; } - var encoded = Encode(key.Path, stackalloc byte[key.Path.MaxByteLength], key.Type); - return new DataPage(batch.GetAt(Data.StateRoot)).TryGet(encoded, batch, out result); + return new FanOutPage(batch.GetAt(Data.StateRoot)).TryGet(key.Path, batch, out result); } if (Data.Ids.TryGet(key.Path, batch, out var id) == false) @@ -113,8 +124,7 @@ public bool TryGet(scoped in Key key, IReadOnlyBatchContext batch, out ReadOnlyS return false; } - var encodedStorage = Encode(key.StoragePath, stackalloc byte[key.Path.MaxByteLength], key.Type); - var path = NibblePath.FromKey(id).Append(encodedStorage, stackalloc byte[StorageKeySize]); + var path = NibblePath.FromKey(id).Append(key.StoragePath, stackalloc byte[StorageKeySize]); return Data.Storage.TryGet(path, batch, out result); } @@ -131,63 +141,14 @@ public bool TryGet(scoped in Key key, IReadOnlyBatchContext batch, out ReadOnlyS /// public static NibblePath Encode(in NibblePath path, in Span destination, DataType type) { - var typeFlag = (byte)(type & DataType.Merkle); - - const byte oddEnd = 0x01; - const byte evenEnd = 0x00; - - // 2 lower bits are used, for odd|even and merkle. - // We can use 1 more bit for differentiation for even lengths. - // This leaves 1 bit to extract potential value. This means that it can compress 2 / 16, - // meaning 1/8 of even paths. - - const byte evenPacked = 0x04; - const byte packedShift = 3; - const byte maxPacked = 1; - - Debug.Assert(path.IsOdd == false, "Encoded paths should not be odd. They always start at 0"); - - if (path.IsEmpty) - return path; - - var raw = path.RawSpan; - - if (path.Length % 2 == 1) - { - // Odd case - raw.CopyTo(destination); - ref var last = ref destination[raw.Length - 1]; - last &= 0xF0; - last |= oddEnd; - last |= typeFlag; - - return NibblePath.FromKey(destination[..raw.Length]); - } - - // Even case - raw.CopyTo(destination); - var lastByte = raw[^1]; - var lastNibble = lastByte & NibblePath.NibbleMask; - var lastButOneNibble = lastByte & (NibblePath.NibbleMask << NibblePath.NibbleShift); - - if (lastNibble <= maxPacked) - { - // We can pack better - destination[raw.Length - 1] = - (byte)(lastButOneNibble | (lastNibble << packedShift) | evenPacked | typeFlag); - return NibblePath.FromKey(destination[..raw.Length]); - } - - destination[raw.Length] = (byte)(evenEnd | typeFlag); - return NibblePath.FromKey(destination[..(raw.Length + 1)]); + return path; } public void SetRaw(in Key key, IBatchContext batch, ReadOnlySpan rawData) { if (key.IsState) { - var encoded = Encode(key.Path, stackalloc byte[key.Path.MaxByteLength], key.Type); - SetAtRoot(batch, encoded, rawData, ref Data.StateRoot); + SetAtRoot(batch, key.Path, rawData, ref Data.StateRoot); } else { @@ -223,8 +184,7 @@ public void SetRaw(in Key key, IBatchContext batch, ReadOnlySpan rawData) } } - var encoded = Encode(key.StoragePath, stackalloc byte[key.Path.MaxByteLength], key.Type); - var path = id.Append(encoded, stackalloc byte[StorageKeySize]); + var path = id.Append(key.StoragePath, stackalloc byte[StorageKeySize]); Data.Storage.Set(path, rawData, batch); } @@ -236,8 +196,7 @@ public void Destroy(IBatchContext batch, in NibblePath account) Data.Ids.Set(account, ReadOnlySpan.Empty, batch); // Destroy the account entry - SetAtRoot(batch, Encode(account, stackalloc byte[account.MaxByteLength], DataType.Account), - ReadOnlySpan.Empty, ref Data.StateRoot); + SetAtRoot(batch, account, ReadOnlySpan.Empty, ref Data.StateRoot); // Remove the cached batch.IdCache.Remove(account.UnsafeAsKeccak); diff --git a/src/Paprika/Utils/Printer.cs b/src/Paprika/Utils/Printer.cs index 69eafd6e..3f5a55dd 100644 --- a/src/Paprika/Utils/Printer.cs +++ b/src/Paprika/Utils/Printer.cs @@ -107,6 +107,11 @@ public void On(DataPage page, DbAddress addr) _printable.Add(addr.Raw, p); } + public void On(FanOutPage page, DbAddress addr) + { + + } + public void Print(TextWriter writer) { var builders = Enumerable.Range(0, PageHeight)