From 8b91e46d65eb296ba96dc2004a847a9504feb68c Mon Sep 17 00:00:00 2001 From: Szymon Kulec Date: Tue, 27 Feb 2024 16:17:26 +0100 Subject: [PATCH] Simple leaf page (#261) * Simple LeafPage introduced * smarter cow for TrySet * cleanup * counting right * extension caching * reporting * bigger fanout * format * do not nest parent responses to make caches smaller * cache extension when it is touched on delete --- src/Paprika.Tests/Chain/BlockchainTests.cs | 2 +- src/Paprika.Tests/Data/SlottedArrayTests.cs | 25 + src/Paprika.Tests/Merkle/AdditionalTests.cs | 2 +- src/Paprika.Tests/Store/BasePageTests.cs | 43 +- src/Paprika.Tests/Store/DataPageTests.cs | 953 +++++++++--------- src/Paprika.Tests/Store/DbTests.cs | 40 +- src/Paprika/Chain/Blockchain.cs | 4 +- src/Paprika/Data/SlottedArray.cs | 54 +- src/Paprika/Merkle/CommitExtensions.cs | 4 +- src/Paprika/Merkle/ComputeMerkleBehavior.cs | 14 +- src/Paprika/Store/BatchContextBase.cs | 9 +- src/Paprika/Store/DataPage.cs | 288 ++---- src/Paprika/Store/IBatchContext.cs | 1 + src/Paprika/Store/LeafPage.cs | 125 +++ .../Store/PageManagers/PointerPageManager.cs | 9 +- src/Paprika/Store/PagedDb.cs | 4 +- src/Paprika/Store/RootPage.cs | 2 +- src/Paprika/Utils/ReadOnlySpanOwner.cs | 18 +- 18 files changed, 883 insertions(+), 714 deletions(-) create mode 100644 src/Paprika/Store/LeafPage.cs diff --git a/src/Paprika.Tests/Chain/BlockchainTests.cs b/src/Paprika.Tests/Chain/BlockchainTests.cs index 0f213472..34f0ae93 100644 --- a/src/Paprika.Tests/Chain/BlockchainTests.cs +++ b/src/Paprika.Tests/Chain/BlockchainTests.cs @@ -322,7 +322,7 @@ public async Task Account_destruction_database_flushed() blockchain.Finalize(hash); // Poor man's await on finalization flushed - await Task.Delay(500); + await blockchain.WaitTillFlush(hash); using var block2 = blockchain.StartNew(hash); diff --git a/src/Paprika.Tests/Data/SlottedArrayTests.cs b/src/Paprika.Tests/Data/SlottedArrayTests.cs index 229d7041..2c6e5264 100644 --- a/src/Paprika.Tests/Data/SlottedArrayTests.cs +++ b/src/Paprika.Tests/Data/SlottedArrayTests.cs @@ -103,6 +103,25 @@ public void Update_in_situ() map.GetAssert(key1, Data2); } + [Test] + public void Report_has_space_properly() + { + const int dataSize = 1; + const int keySize = 0; + var key = NibblePath.Empty; + Span value = stackalloc byte[dataSize] { 13 }; + Span valueToBig = stackalloc byte[dataSize + 1]; + + Span span = stackalloc byte[SlottedArray.OneSlotArrayMinimalSize + dataSize + keySize]; + var map = new SlottedArray(span); + + map.SetAssert(key, value); + + map.HasSpaceToUpdateExisting(key, ReadOnlySpan.Empty).Should().BeTrue(); + map.HasSpaceToUpdateExisting(key, value).Should().BeTrue(); + map.HasSpaceToUpdateExisting(key, valueToBig).Should().BeFalse(); + } + [Test] public void Update_in_resize() { @@ -195,6 +214,12 @@ void Unique(in ReadOnlySpan key) file static class FixedMapTestExtensions { + public static void SetAssert(this SlottedArray map, in NibblePath key, ReadOnlySpan data, + string? because = null) + { + map.TrySet(key, data).Should().BeTrue(because ?? "TrySet should succeed"); + } + public static void SetAssert(this SlottedArray map, in ReadOnlySpan key, ReadOnlySpan data, string? because = null) { diff --git a/src/Paprika.Tests/Merkle/AdditionalTests.cs b/src/Paprika.Tests/Merkle/AdditionalTests.cs index 506cbb8c..b0c9df98 100644 --- a/src/Paprika.Tests/Merkle/AdditionalTests.cs +++ b/src/Paprika.Tests/Merkle/AdditionalTests.cs @@ -17,7 +17,7 @@ public async Task Account_destruction_same_block() const int seed = 17; const int storageCount = 32 * 1024; - using var db = PagedDb.NativeMemoryDb(8 * 1024 * 1024, 2); + using var db = PagedDb.NativeMemoryDb(16 * 1024 * 1024, 2); var merkle = new ComputeMerkleBehavior(2, 2); await using var blockchain = new Blockchain(db, merkle); diff --git a/src/Paprika.Tests/Store/BasePageTests.cs b/src/Paprika.Tests/Store/BasePageTests.cs index 3267847a..eba26cb6 100644 --- a/src/Paprika.Tests/Store/BasePageTests.cs +++ b/src/Paprika.Tests/Store/BasePageTests.cs @@ -1,4 +1,5 @@ using System.Runtime.InteropServices; +using FluentAssertions; using Paprika.Crypto; using Paprika.Store; @@ -8,59 +9,67 @@ public abstract class BasePageTests { protected static unsafe Page AllocPage() { - var memory = (byte*)NativeMemory.AlignedAlloc((UIntPtr)Page.PageSize, (UIntPtr)sizeof(long)); + var memory = (byte*)NativeMemory.AlignedAlloc(Page.PageSize, sizeof(long)); new Span(memory, Page.PageSize).Clear(); return new Page(memory); } - internal class TestBatchContext : BatchContextBase + internal class TestBatchContext(uint batchId, Stack? reusable = null) : BatchContextBase(batchId) { private readonly Dictionary _address2Page = new(); private readonly Dictionary _page2Address = new(); + private readonly Stack _reusable = reusable ?? new Stack(); + private readonly HashSet _toReuse = new(); // data pages should start at non-null addresses // 0-N is take by metadata pages private uint _pageCount = 1U; - public TestBatchContext(uint batchId) : base(batchId) - { - IdCache = new Dictionary(); - } - public override Page GetAt(DbAddress address) => _address2Page[address]; public override DbAddress GetAddress(Page page) => _page2Address[page.Raw]; public override Page GetNewPage(out DbAddress addr, bool clear) { - var page = AllocPage(); + Page page; + if (_reusable.TryPop(out addr)) + { + page = GetAt(addr); + } + else + { + page = AllocPage(); + addr = DbAddress.Page(_pageCount++); + + _address2Page[addr] = page; + _page2Address[page.Raw] = addr; + } + if (clear) page.Clear(); page.Header.BatchId = BatchId; - addr = DbAddress.Page(_pageCount++); - - _address2Page[addr] = page; - _page2Address[page.Raw] = addr; - return page; } // for now public override bool WasWritten(DbAddress addr) => true; + public override void RegisterForFutureReuse(Page page) { - // NOOP + _toReuse.Add(GetAddress(page)) + .Should() + .BeTrue("Page should not be registered as reusable before"); } - public override Dictionary IdCache { get; } + public override Dictionary IdCache { get; } = new(); public override string ToString() => $"Batch context used {_pageCount} pages to write the data"; public TestBatchContext Next() { - var next = new TestBatchContext(BatchId + 1); + var next = new TestBatchContext(BatchId + 1, new Stack(_toReuse)); // remember the mapping foreach (var (addr, page) in _address2Page) @@ -78,6 +87,8 @@ public TestBatchContext Next() return next; } + + public uint PageCount => _pageCount; } internal static TestBatchContext NewBatch(uint batchId) => new(batchId); diff --git a/src/Paprika.Tests/Store/DataPageTests.cs b/src/Paprika.Tests/Store/DataPageTests.cs index 12f7739c..c78e0b70 100644 --- a/src/Paprika.Tests/Store/DataPageTests.cs +++ b/src/Paprika.Tests/Store/DataPageTests.cs @@ -1,463 +1,490 @@ -// using System.Buffers.Binary; -// using FluentAssertions; -// using Nethermind.Int256; -// using NUnit.Framework; -// using Paprika.Crypto; -// using Paprika.Data; -// using Paprika.Store; -// using static Paprika.Tests.Values; -// -// namespace Paprika.Tests.Store; -// -// public class DataPageTests : BasePageTests -// { -// private const uint BatchId = 1; -// -// private static byte[] GetValue(int i) => new UInt256((uint)i).ToBigEndian(); -// -// private static Keccak GetKey(int i) -// { -// var keccak = Keccak.Zero; -// BinaryPrimitives.WriteInt32LittleEndian(keccak.BytesAsSpan, i); -// return keccak; -// } -// -// [Test] -// public void Set_then_Get() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// var value = GetValue(0); -// -// var updated = dataPage.SetAccount(Key0, value, batch); -// updated.ShouldHaveAccount(Key0, value, batch); -// } -// -// [Test] -// public void Update_key() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var value0 = GetValue(0); -// var value1 = GetValue(1); -// -// var dataPage = new DataPage(page); -// -// var updated = dataPage -// .SetAccount(Key0, value0, batch) -// .SetAccount(Key0, value1, batch); -// -// updated.ShouldHaveAccount(Key0, value1, batch); -// } -// -// [Test] -// public void Works_with_bucket_collision() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// -// var dataPage = new DataPage(page); -// var value1A = GetValue(0); -// var value1B = GetValue(1); -// -// var updated = dataPage -// .SetAccount(Key1A, value1A, batch) -// .SetAccount(Key1B, value1B, batch); -// -// updated.ShouldHaveAccount(Key1A, value1A, batch); -// updated.ShouldHaveAccount(Key1B, value1B, batch); -// } -// -// [Test] -// public void Page_overflows() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int count = 128 * 1024; -// const int seed = 13; -// -// var random = new Random(seed); -// for (var i = 0; i < count; i++) -// { -// dataPage = dataPage.SetAccount(random.NextKeccak(), GetValue(i), batch); -// } -// -// random = new Random(seed); -// for (var i = 0; i < count; i++) -// { -// dataPage.ShouldHaveAccount(random.NextKeccak(), GetValue(i), batch, i); -// } -// } -// -// [Test(Description = "The test for a page that has some accounts and their storages with 50-50 ratio")] -// public void Page_overflows_with_some_storage_and_some_accounts() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int count = 35; -// -// for (int i = 0; i < count; i++) -// { -// var key = GetKey(i); -// var address = key; -// var value = GetValue(i); -// -// dataPage = dataPage -// .SetAccount(key, value, batch) -// .SetStorage(key, address, value, batch); -// } -// -// for (int i = 0; i < count; i++) -// { -// var key = GetKey(i); -// var address = key; -// var value = GetValue(i); -// -// dataPage.ShouldHaveAccount(key, value, batch); -// dataPage.ShouldHaveStorage(key, address, value, batch); -// } -// } -// -// [Test(Description = -// "The scenario to test handling updates over multiple batches so that the pages are properly linked and used.")] -// public void Multiple_batches() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int count = 32 * 1024; -// const int batchEvery = 32; -// -// for (int i = 0; i < count; i++) -// { -// var key = GetKey(i); -// -// if (i % batchEvery == 0) -// { -// batch = batch.Next(); -// } -// -// dataPage = dataPage.SetAccount(key, GetValue(i), batch); -// } -// -// for (int i = 0; i < count; i++) -// { -// var key = GetKey(i); -// -// dataPage.ShouldHaveAccount(key, GetValue(i), batch); -// } -// } -// -// [Test(Description = "Ensures that tree can hold entries with NibblePaths of various lengths")] -// public void Var_length_NibblePaths() -// { -// Span data = stackalloc byte[1] { 13 }; -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// // big enough to fill the page -// const int count = 200; -// -// // set the empty path which may happen on var-length scenarios -// var keccakKey = Key.Account(NibblePath.Empty); -// dataPage = dataPage.Set(new SetContext(NibblePath.Empty, data, batch)).Cast(); -// -// for (var i = 0; i < count; i++) -// { -// var key = GetKey(i); -// dataPage = dataPage.SetAccount(key, GetValue(i), batch); -// } -// -// // assert -// dataPage.TryGet(keccakKey, batch, out var value).Should().BeTrue(); -// value.SequenceEqual(data).Should().BeTrue(); -// -// for (int i = 0; i < count; i++) -// { -// var key = GetKey(i); -// var path = NibblePath.FromKey(key); -// dataPage.ShouldHaveAccount(key, GetValue(i), batch); -// } -// } -// -// [Test] -// public void Page_overflows_with_merkle() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int seed = 13; -// var rand = new Random(seed); -// -// const int count = 10_000; -// for (int i = 0; i < count; i++) -// { -// var account = NibblePath.FromKey(rand.NextKeccak()); -// -// var accountKey = Key.Raw(account, DataType.Account, NibblePath.Empty); -// var merkleKey = Key.Raw(account, DataType.Merkle, NibblePath.Empty); -// -// dataPage = dataPage.Set(new SetContext(accountKey, GetAccountValue(i), batch)).Cast(); -// dataPage = dataPage.Set(new SetContext(merkleKey, GetMerkleValue(i), batch)).Cast(); -// } -// -// rand = new Random(seed); -// -// for (int i = 0; i < count; i++) -// { -// var account = NibblePath.FromKey(rand.NextKeccak()); -// -// var accountKey = Key.Raw(account, DataType.Account, NibblePath.Empty); -// var merkleKey = Key.Raw(account, DataType.Merkle, NibblePath.Empty); -// -// dataPage.TryGet(accountKey, batch, out var actualAccountValue).Should().BeTrue(); -// actualAccountValue.SequenceEqual(GetAccountValue(i)).Should().BeTrue(); -// -// dataPage.TryGet(merkleKey, batch, out var actualMerkleValue).Should().BeTrue(); -// actualMerkleValue.SequenceEqual(GetMerkleValue(i)).Should().BeTrue(); -// } -// -// static byte[] GetAccountValue(int i) => BitConverter.GetBytes(i * 2 + 1); -// static byte[] GetMerkleValue(int i) => BitConverter.GetBytes(i * 2); -// } -// -// [TestCase(1, 1000, TestName = "Value at the beginning")] -// [TestCase(999, 1000, TestName = "Value at the end")] -// public void Delete(int deleteAt, int count) -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// var account = NibblePath.FromKey(GetKey(0)); -// -// const int seed = 13; -// var random = new Random(seed); -// -// for (var i = 0; i < count; i++) -// { -// var storagePath = NibblePath.FromKey(random.NextKeccak()); -// var merkleKey = Key.Raw(account, DataType.Merkle, storagePath); -// var value = GetValue(i); -// -// dataPage = dataPage.Set(new SetContext(merkleKey, value, batch)).Cast(); -// } -// -// // delete -// random = new Random(seed); -// for (var i = 0; i < deleteAt; i++) -// { -// // skip till set -// random.NextKeccak(); -// } -// -// { -// var storagePath = NibblePath.FromKey(random.NextKeccak()); -// var merkleKey = Key.Raw(account, DataType.Merkle, storagePath); -// dataPage = dataPage.Set(new SetContext(merkleKey, ReadOnlySpan.Empty, batch)).Cast(); -// } -// -// // assert -// random = new Random(seed); -// -// for (var i = 0; i < count; i++) -// { -// var storagePath = NibblePath.FromKey(random.NextKeccak()); -// var merkleKey = Key.Raw(account, DataType.Merkle, storagePath); -// dataPage.TryGet(merkleKey, batch, out var actual).Should().BeTrue(); -// var value = i == deleteAt ? ReadOnlySpan.Empty : GetValue(i); -// actual.SequenceEqual(value).Should().BeTrue($"Does not match for i: {i} and delete at: {deleteAt}"); -// } -// } -// -// [Test] -// [Ignore("This test should be removed or rewritten")] -// public void Small_prefix_tree_with_regular() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int count = 19; // this is the number until a prefix tree is extracted -// -// var account = Keccak.EmptyTreeHash; -// -// dataPage = dataPage -// .SetAccount(account, GetValue(0), batch) -// .SetMerkle(account, GetValue(1), batch); -// -// for (var i = 0; i < count; i++) -// { -// var storage = GetKey(i); -// -// dataPage = dataPage -// .SetStorage(account, storage, GetValue(i), batch) -// .SetMerkle(account, NibblePath.FromKey(storage), GetValue(i), batch); -// } -// -// // write 256 more to fill up the page for each nibble -// for (var i = 0; i < ushort.MaxValue; i++) -// { -// dataPage = dataPage.SetAccount(GetKey(i), GetValue(i), batch); -// } -// -// // assert -// dataPage.ShouldHaveAccount(account, GetValue(0), batch); -// dataPage.ShouldHaveMerkle(account, GetValue(1), batch); -// -// for (var i = 0; i < count; i++) -// { -// var storage = GetKey(i); -// -// dataPage.ShouldHaveStorage(account, storage, GetValue(i), batch); -// dataPage.ShouldHaveMerkle(account, NibblePath.FromKey(storage), GetValue(i), batch); -// } -// -// // write 256 more to fill up the page for each nibble -// for (var i = 0; i < ushort.MaxValue; i++) -// { -// dataPage.ShouldHaveAccount(GetKey(i), GetValue(i), batch); -// } -// } -// -// [Test] -// public void Massive_prefix_tree() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int count = 10_000; -// -// var account = Keccak.EmptyTreeHash; -// -// dataPage = dataPage -// .SetAccount(account, GetValue(0), batch) -// .SetMerkle(account, GetValue(1), batch); -// -// for (var i = 0; i < count; i++) -// { -// var storage = GetKey(i); -// dataPage = dataPage -// .SetStorage(account, storage, GetValue(i), batch) -// .SetMerkle(account, GetMerkleKey(storage, i), GetValue(i), batch); -// } -// -// // assert -// dataPage.ShouldHaveAccount(account, GetValue(0), batch); -// dataPage.ShouldHaveMerkle(account, GetValue(1), batch); -// -// for (var i = 0; i < count; i++) -// { -// var storage = GetKey(i); -// -// dataPage.ShouldHaveStorage(account, storage, GetValue(i), batch); -// dataPage.ShouldHaveMerkle(account, GetMerkleKey(storage, i), GetValue(i), batch); -// } -// -// return; -// -// static NibblePath GetMerkleKey(in Keccak storage, int i) -// { -// return NibblePath.FromKey(storage).SliceTo(Math.Min(i + 1, NibblePath.KeccakNibbleCount)); -// } -// } -// -// [Test] -// public void Different_at_start_keys() -// { -// var page = AllocPage(); -// page.Clear(); -// -// var batch = NewBatch(BatchId); -// var dataPage = new DataPage(page); -// -// const int count = 10_000; -// -// Span dest = stackalloc byte[sizeof(int)]; -// Span store = stackalloc byte[StoreKey.StorageKeySize]; -// -// const DataType compressedAccount = DataType.Account | DataType.CompressedAccount; -// const DataType compressedMerkle = DataType.Merkle | DataType.CompressedAccount; -// -// ReadOnlySpan accountValue = stackalloc byte[1] { (byte)compressedAccount }; -// ReadOnlySpan merkleValue = stackalloc byte[1] { (byte)compressedMerkle }; -// -// for (var i = 0; i < count; i++) -// { -// BinaryPrimitives.WriteInt32LittleEndian(dest, i); -// var path = NibblePath.FromKey(dest); -// -// // account -// { -// var accountKey = Key.Raw(path, compressedAccount, NibblePath.Empty); -// var accountStoreKey = StoreKey.Encode(accountKey, store); -// -// dataPage = new DataPage(dataPage.Set(NibblePath.FromKey(accountStoreKey.Payload), accountValue, batch)); -// } -// -// // merkle -// { -// var merkleKey = Key.Raw(path, compressedMerkle, NibblePath.Empty); -// var merkleStoreKey = StoreKey.Encode(merkleKey, store); -// -// dataPage = new DataPage(dataPage.Set(NibblePath.FromKey(merkleStoreKey.Payload), merkleValue, batch)); -// } -// } -// -// for (var i = 0; i < count; i++) -// { -// BinaryPrimitives.WriteInt32LittleEndian(dest, i); -// var path = NibblePath.FromKey(dest); -// -// // account -// { -// var accountKey = Key.Raw(path, compressedAccount, NibblePath.Empty); -// var accountStoreKey = StoreKey.Encode(accountKey, store); -// -// dataPage.TryGet(NibblePath.FromKey(accountStoreKey.Payload), batch, out var value).Should().BeTrue(); -// value.SequenceEqual(accountValue).Should().BeTrue(); -// } -// -// // merkle -// { -// var merkleKey = Key.Raw(path, compressedMerkle, NibblePath.Empty); -// var merkleStoreKey = StoreKey.Encode(merkleKey, store); -// -// dataPage.TryGet(NibblePath.FromKey(merkleStoreKey.Payload), batch, out var value).Should().BeTrue(); -// value.SequenceEqual(merkleValue).Should().BeTrue(); -// } -// } -// } -// } \ No newline at end of file +using System.Buffers.Binary; +using System.Diagnostics; +using FluentAssertions; +using Nethermind.Int256; +using NUnit.Framework; +using Paprika.Crypto; +using Paprika.Data; +using Paprika.Store; + +namespace Paprika.Tests.Store; + +public class DataPageTests : BasePageTests +{ + private const uint BatchId = 1; + + [DebuggerStepThrough] + private static byte[] GetValue(int i) => new UInt256((uint)i).ToBigEndian(); + + [Test] + public void Spinning_through_same_keys_should_use_limited_number_of_pages() + { + var batch = NewBatch(BatchId); + var page = batch.GetNewPage(out _, true); + + var data = new DataPage(page); + + const int spins = 2_000; + const int count = 1024; + + for (var spin = 0; spin < spins; spin++) + { + for (var i = 0; i < count; i++) + { + Keccak keccak = default; + BinaryPrimitives.WriteInt32LittleEndian(keccak.BytesAsSpan, i); + var path = NibblePath.FromKey(keccak); + + data = new DataPage(data.Set(path, GetValue(i), batch)); + } + + batch = batch.Next(); + } + + for (var j = 0; j < count; j++) + { + Keccak search = default; + BinaryPrimitives.WriteInt32LittleEndian(search.BytesAsSpan, j); + + data.TryGet(NibblePath.FromKey(search), batch, out var result) + .Should() + .BeTrue($"Failed to read {j}"); + + result.SequenceEqual(GetValue(j)) + .Should() + .BeTrue($"Failed to read value of {j}"); + } + + batch.PageCount.Should().BeLessThan(60); + } + + // private static Keccak GetKey(int i) + // { + // var keccak = Keccak.Zero; + // BinaryPrimitives.WriteInt32LittleEndian(keccak.BytesAsSpan, i); + // return keccak; + // } + // [Test] + // public void Update_key() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var value0 = GetValue(0); + // var value1 = GetValue(1); + // + // var dataPage = new DataPage(page); + // + // var updated = dataPage + // .SetAccount(Key0, value0, batch) + // .SetAccount(Key0, value1, batch); + // + // updated.ShouldHaveAccount(Key0, value1, batch); + // } + // + // [Test] + // public void Works_with_bucket_collision() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // + // var dataPage = new DataPage(page); + // var value1A = GetValue(0); + // var value1B = GetValue(1); + // + // var updated = dataPage + // .SetAccount(Key1A, value1A, batch) + // .SetAccount(Key1B, value1B, batch); + // + // updated.ShouldHaveAccount(Key1A, value1A, batch); + // updated.ShouldHaveAccount(Key1B, value1B, batch); + // } + // + // [Test] + // public void Page_overflows() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int count = 128 * 1024; + // const int seed = 13; + // + // var random = new Random(seed); + // for (var i = 0; i < count; i++) + // { + // dataPage = dataPage.SetAccount(random.NextKeccak(), GetValue(i), batch); + // } + // + // random = new Random(seed); + // for (var i = 0; i < count; i++) + // { + // dataPage.ShouldHaveAccount(random.NextKeccak(), GetValue(i), batch, i); + // } + // } + // + // [Test(Description = "The test for a page that has some accounts and their storages with 50-50 ratio")] + // public void Page_overflows_with_some_storage_and_some_accounts() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int count = 35; + // + // for (int i = 0; i < count; i++) + // { + // var key = GetKey(i); + // var address = key; + // var value = GetValue(i); + // + // dataPage = dataPage + // .SetAccount(key, value, batch) + // .SetStorage(key, address, value, batch); + // } + // + // for (int i = 0; i < count; i++) + // { + // var key = GetKey(i); + // var address = key; + // var value = GetValue(i); + // + // dataPage.ShouldHaveAccount(key, value, batch); + // dataPage.ShouldHaveStorage(key, address, value, batch); + // } + // } + // + // [Test(Description = + // "The scenario to test handling updates over multiple batches so that the pages are properly linked and used.")] + // public void Multiple_batches() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int count = 32 * 1024; + // const int batchEvery = 32; + // + // for (int i = 0; i < count; i++) + // { + // var key = GetKey(i); + // + // if (i % batchEvery == 0) + // { + // batch = batch.Next(); + // } + // + // dataPage = dataPage.SetAccount(key, GetValue(i), batch); + // } + // + // for (int i = 0; i < count; i++) + // { + // var key = GetKey(i); + // + // dataPage.ShouldHaveAccount(key, GetValue(i), batch); + // } + // } + // + // [Test(Description = "Ensures that tree can hold entries with NibblePaths of various lengths")] + // public void Var_length_NibblePaths() + // { + // Span data = stackalloc byte[1] { 13 }; + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // // big enough to fill the page + // const int count = 200; + // + // // set the empty path which may happen on var-length scenarios + // var keccakKey = Key.Account(NibblePath.Empty); + // dataPage = dataPage.Set(new SetContext(NibblePath.Empty, data, batch)).Cast(); + // + // for (var i = 0; i < count; i++) + // { + // var key = GetKey(i); + // dataPage = dataPage.SetAccount(key, GetValue(i), batch); + // } + // + // // assert + // dataPage.TryGet(keccakKey, batch, out var value).Should().BeTrue(); + // value.SequenceEqual(data).Should().BeTrue(); + // + // for (int i = 0; i < count; i++) + // { + // var key = GetKey(i); + // var path = NibblePath.FromKey(key); + // dataPage.ShouldHaveAccount(key, GetValue(i), batch); + // } + // } + // + // [Test] + // public void Page_overflows_with_merkle() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int seed = 13; + // var rand = new Random(seed); + // + // const int count = 10_000; + // for (int i = 0; i < count; i++) + // { + // var account = NibblePath.FromKey(rand.NextKeccak()); + // + // var accountKey = Key.Raw(account, DataType.Account, NibblePath.Empty); + // var merkleKey = Key.Raw(account, DataType.Merkle, NibblePath.Empty); + // + // dataPage = dataPage.Set(new SetContext(accountKey, GetAccountValue(i), batch)).Cast(); + // dataPage = dataPage.Set(new SetContext(merkleKey, GetMerkleValue(i), batch)).Cast(); + // } + // + // rand = new Random(seed); + // + // for (int i = 0; i < count; i++) + // { + // var account = NibblePath.FromKey(rand.NextKeccak()); + // + // var accountKey = Key.Raw(account, DataType.Account, NibblePath.Empty); + // var merkleKey = Key.Raw(account, DataType.Merkle, NibblePath.Empty); + // + // dataPage.TryGet(accountKey, batch, out var actualAccountValue).Should().BeTrue(); + // actualAccountValue.SequenceEqual(GetAccountValue(i)).Should().BeTrue(); + // + // dataPage.TryGet(merkleKey, batch, out var actualMerkleValue).Should().BeTrue(); + // actualMerkleValue.SequenceEqual(GetMerkleValue(i)).Should().BeTrue(); + // } + // + // static byte[] GetAccountValue(int i) => BitConverter.GetBytes(i * 2 + 1); + // static byte[] GetMerkleValue(int i) => BitConverter.GetBytes(i * 2); + // } + // + // [TestCase(1, 1000, TestName = "Value at the beginning")] + // [TestCase(999, 1000, TestName = "Value at the end")] + // public void Delete(int deleteAt, int count) + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // var account = NibblePath.FromKey(GetKey(0)); + // + // const int seed = 13; + // var random = new Random(seed); + // + // for (var i = 0; i < count; i++) + // { + // var storagePath = NibblePath.FromKey(random.NextKeccak()); + // var merkleKey = Key.Raw(account, DataType.Merkle, storagePath); + // var value = GetValue(i); + // + // dataPage = dataPage.Set(new SetContext(merkleKey, value, batch)).Cast(); + // } + // + // // delete + // random = new Random(seed); + // for (var i = 0; i < deleteAt; i++) + // { + // // skip till set + // random.NextKeccak(); + // } + // + // { + // var storagePath = NibblePath.FromKey(random.NextKeccak()); + // var merkleKey = Key.Raw(account, DataType.Merkle, storagePath); + // dataPage = dataPage.Set(new SetContext(merkleKey, ReadOnlySpan.Empty, batch)).Cast(); + // } + // + // // assert + // random = new Random(seed); + // + // for (var i = 0; i < count; i++) + // { + // var storagePath = NibblePath.FromKey(random.NextKeccak()); + // var merkleKey = Key.Raw(account, DataType.Merkle, storagePath); + // dataPage.TryGet(merkleKey, batch, out var actual).Should().BeTrue(); + // var value = i == deleteAt ? ReadOnlySpan.Empty : GetValue(i); + // actual.SequenceEqual(value).Should().BeTrue($"Does not match for i: {i} and delete at: {deleteAt}"); + // } + // } + // + // [Test] + // [Ignore("This test should be removed or rewritten")] + // public void Small_prefix_tree_with_regular() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int count = 19; // this is the number until a prefix tree is extracted + // + // var account = Keccak.EmptyTreeHash; + // + // dataPage = dataPage + // .SetAccount(account, GetValue(0), batch) + // .SetMerkle(account, GetValue(1), batch); + // + // for (var i = 0; i < count; i++) + // { + // var storage = GetKey(i); + // + // dataPage = dataPage + // .SetStorage(account, storage, GetValue(i), batch) + // .SetMerkle(account, NibblePath.FromKey(storage), GetValue(i), batch); + // } + // + // // write 256 more to fill up the page for each nibble + // for (var i = 0; i < ushort.MaxValue; i++) + // { + // dataPage = dataPage.SetAccount(GetKey(i), GetValue(i), batch); + // } + // + // // assert + // dataPage.ShouldHaveAccount(account, GetValue(0), batch); + // dataPage.ShouldHaveMerkle(account, GetValue(1), batch); + // + // for (var i = 0; i < count; i++) + // { + // var storage = GetKey(i); + // + // dataPage.ShouldHaveStorage(account, storage, GetValue(i), batch); + // dataPage.ShouldHaveMerkle(account, NibblePath.FromKey(storage), GetValue(i), batch); + // } + // + // // write 256 more to fill up the page for each nibble + // for (var i = 0; i < ushort.MaxValue; i++) + // { + // dataPage.ShouldHaveAccount(GetKey(i), GetValue(i), batch); + // } + // } + // + // [Test] + // public void Massive_prefix_tree() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int count = 10_000; + // + // var account = Keccak.EmptyTreeHash; + // + // dataPage = dataPage + // .SetAccount(account, GetValue(0), batch) + // .SetMerkle(account, GetValue(1), batch); + // + // for (var i = 0; i < count; i++) + // { + // var storage = GetKey(i); + // dataPage = dataPage + // .SetStorage(account, storage, GetValue(i), batch) + // .SetMerkle(account, GetMerkleKey(storage, i), GetValue(i), batch); + // } + // + // // assert + // dataPage.ShouldHaveAccount(account, GetValue(0), batch); + // dataPage.ShouldHaveMerkle(account, GetValue(1), batch); + // + // for (var i = 0; i < count; i++) + // { + // var storage = GetKey(i); + // + // dataPage.ShouldHaveStorage(account, storage, GetValue(i), batch); + // dataPage.ShouldHaveMerkle(account, GetMerkleKey(storage, i), GetValue(i), batch); + // } + // + // return; + // + // static NibblePath GetMerkleKey(in Keccak storage, int i) + // { + // return NibblePath.FromKey(storage).SliceTo(Math.Min(i + 1, NibblePath.KeccakNibbleCount)); + // } + // } + // + // [Test] + // public void Different_at_start_keys() + // { + // var page = AllocPage(); + // page.Clear(); + // + // var batch = NewBatch(BatchId); + // var dataPage = new DataPage(page); + // + // const int count = 10_000; + // + // Span dest = stackalloc byte[sizeof(int)]; + // Span store = stackalloc byte[StoreKey.StorageKeySize]; + // + // const DataType compressedAccount = DataType.Account | DataType.CompressedAccount; + // const DataType compressedMerkle = DataType.Merkle | DataType.CompressedAccount; + // + // ReadOnlySpan accountValue = stackalloc byte[1] { (byte)compressedAccount }; + // ReadOnlySpan merkleValue = stackalloc byte[1] { (byte)compressedMerkle }; + // + // for (var i = 0; i < count; i++) + // { + // BinaryPrimitives.WriteInt32LittleEndian(dest, i); + // var path = NibblePath.FromKey(dest); + // + // // account + // { + // var accountKey = Key.Raw(path, compressedAccount, NibblePath.Empty); + // var accountStoreKey = StoreKey.Encode(accountKey, store); + // + // dataPage = new DataPage(dataPage.Set(NibblePath.FromKey(accountStoreKey.Payload), accountValue, batch)); + // } + // + // // merkle + // { + // var merkleKey = Key.Raw(path, compressedMerkle, NibblePath.Empty); + // var merkleStoreKey = StoreKey.Encode(merkleKey, store); + // + // dataPage = new DataPage(dataPage.Set(NibblePath.FromKey(merkleStoreKey.Payload), merkleValue, batch)); + // } + // } + // + // for (var i = 0; i < count; i++) + // { + // BinaryPrimitives.WriteInt32LittleEndian(dest, i); + // var path = NibblePath.FromKey(dest); + // + // // account + // { + // var accountKey = Key.Raw(path, compressedAccount, NibblePath.Empty); + // var accountStoreKey = StoreKey.Encode(accountKey, store); + // + // dataPage.TryGet(NibblePath.FromKey(accountStoreKey.Payload), batch, out var value).Should().BeTrue(); + // value.SequenceEqual(accountValue).Should().BeTrue(); + // } + // + // // merkle + // { + // var merkleKey = Key.Raw(path, compressedMerkle, NibblePath.Empty); + // var merkleStoreKey = StoreKey.Encode(merkleKey, store); + // + // dataPage.TryGet(NibblePath.FromKey(merkleStoreKey.Payload), batch, out var value).Should().BeTrue(); + // value.SequenceEqual(merkleValue).Should().BeTrue(); + // } + // } + // } +} \ No newline at end of file diff --git a/src/Paprika.Tests/Store/DbTests.cs b/src/Paprika.Tests/Store/DbTests.cs index fee93f8a..79846340 100644 --- a/src/Paprika.Tests/Store/DbTests.cs +++ b/src/Paprika.Tests/Store/DbTests.cs @@ -14,6 +14,7 @@ public class DbTests private const int MB = 1024 * 1024; private const int MB16 = 16 * MB; private const int MB64 = 64 * MB; + private const int MB128 = 128 * MB; private const int MB256 = 256 * MB; [Test] @@ -193,7 +194,7 @@ public async Task Spin_large() const int size = MB256; using var db = PagedDb.NativeMemoryDb(size); - const int batches = 100; + const int batches = 50; const int storageSlots = 20_000; const int storageKeyLength = 32; @@ -245,6 +246,43 @@ Keccak GetStorageAddress(int i) } } + [Test] + public async Task Uniform_buckets_spin() + { + var account = Keccak.EmptyTreeHash; + + const int size = MB16; + using var db = PagedDb.NativeMemoryDb(size); + + const int batches = 2_000; + const int storageSlots = 256; + + var value = new byte[32]; + + var random = new Random(13); + random.NextBytes(value); + + for (var i = 0; i < batches; i++) + { + using var batch = db.BeginNextBatch(); + + for (var slot = 0; slot < storageSlots; slot++) + { + batch.SetStorage(account, GetStorageAddress(slot), value); + } + + await batch.Commit(CommitOptions.FlushDataAndRoot); + } + + return; + + Keccak GetStorageAddress(int i) + { + Keccak result = default; + BinaryPrimitives.WriteInt32LittleEndian(result.BytesAsSpan, i); + return result; + } + } private static void AssertPageMetadataAssigned(PagedDb db) { diff --git a/src/Paprika/Chain/Blockchain.cs b/src/Paprika/Chain/Blockchain.cs index d58b46c0..6f84bf64 100644 --- a/src/Paprika/Chain/Blockchain.cs +++ b/src/Paprika/Chain/Blockchain.cs @@ -792,8 +792,8 @@ public ReadOnlySpanOwnerWithMetadata Get(scoped in Key key) return new ReadOnlySpanOwnerWithMetadata(new ReadOnlySpanOwner(result, this), 0); } - // Return as nested to show that it's beyond level 0. - return parent.Get(key).Nest(); + // Don't nest, as reaching to parent should be easy. + return parent.Get(key); } public void Set(in Key key, in ReadOnlySpan payload, EntryType type) diff --git a/src/Paprika/Data/SlottedArray.cs b/src/Paprika/Data/SlottedArray.cs index 77879a4c..510f03c5 100644 --- a/src/Paprika/Data/SlottedArray.cs +++ b/src/Paprika/Data/SlottedArray.cs @@ -21,6 +21,11 @@ namespace Paprika.Data; /// public readonly ref struct SlottedArray { + /// + /// Provides size of the metadata required to store one slot. + /// + public const int OneSlotArrayMinimalSize = Header.Size + Slot.Size; + private readonly ref Header _header; private readonly Span _data; private readonly Span _slots; @@ -115,7 +120,11 @@ public bool TrySet(in NibblePath key, ReadOnlySpan data, ushort? keyHash = /// public int Count => _header.Low / Slot.Size; - public int CapacityLeft => _data.Length - _header.Taken; + /// + /// Returns the capacity of the map. + /// It includes slots that were deleted and that can be reclaimed when a defragmentation happens. + /// + public int CapacityLeft => _data.Length - _header.Taken + _header.Deleted; public Enumerator EnumerateAll() => new(this); @@ -255,6 +264,9 @@ public void GatherCountStatistics(Span buckets) private const int KeyLengthLength = 1; + public static int EstimateNeededCapacity(in NibblePath key, ReadOnlySpan data) => + GetTotalSpaceRequired(key, data) + Slot.Size; + private static int GetTotalSpaceRequired(in NibblePath key, ReadOnlySpan data) { return (key.RawPreamble <= Slot.MaxSlotPreamble ? 0 : KeyLengthLength) + @@ -281,8 +293,14 @@ public bool Delete(in NibblePath key) private void DeleteImpl(int index) { // mark as deleted first - _slots[index].IsDeleted = true; - _header.Deleted++; + ref var slot = ref _slots[index]; + slot.IsDeleted = true; + + var size = (ushort)(GetSlotLength(ref slot) + Slot.Size); + + Debug.Assert(_header.Deleted + size <= _data.Length, "Deleted marker breached size"); + + _header.Deleted += size; // always try to compact after delete CollectTombstones(); @@ -341,14 +359,19 @@ private void CollectTombstones() // undo writing low _header.Low -= Slot.Size; + ref var slot = ref _slots[index]; + // undo writing high - var slice = GetSlotPayload(ref _slots[index]); + var slice = GetSlotPayload(ref slot); var total = slice.Length; _header.High = (ushort)(_header.High - total); // cleanup - _slots[index] = default; - _header.Deleted--; + Debug.Assert(_header.Deleted >= total + Slot.Size, "Deleted marker breached size"); + + _header.Deleted -= (ushort)(total + Slot.Size); + + slot = default; // move back by one to see if it's deleted as well index--; @@ -367,6 +390,17 @@ public bool TryGet(in NibblePath key, out ReadOnlySpan data) return false; } + public bool HasSpaceToUpdateExisting(in NibblePath key, in ReadOnlySpan data) + { + if (!TryGetImpl(key, GetHash(key), out _, out var index)) + { + return false; + } + + var requiredWithoutSlotLength = GetTotalSpaceRequired(key, data); + return requiredWithoutSlotLength <= GetSlotLength(ref _slots[index]); + } + [OptimizationOpportunity(OptimizationType.CPU, "key encoding is delayed but it might be called twice, here + TrySet")] private bool TryGetImpl(in NibblePath key, ushort hash, out Span data, out int slotIndex) @@ -445,15 +479,17 @@ private bool TryGetImpl(in NibblePath key, ushort hash, out Span data, out /// /// Gets the payload pointed to by the given slot without the length prefix. /// - private Span GetSlotPayload(ref Slot slot) + private Span GetSlotPayload(ref Slot slot) => _data.Slice(slot.ItemAddress, GetSlotLength(ref slot)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ushort GetSlotLength(ref Slot slot) { // assert whether the slot has a previous, if not use data.length var previousSlotAddress = Unsafe.IsAddressLessThan(ref _slots[0], ref slot) ? Unsafe.Add(ref slot, -1).ItemAddress : _data.Length; - var length = previousSlotAddress - slot.ItemAddress; - return _data.Slice(slot.ItemAddress, length); + return (ushort)(previousSlotAddress - slot.ItemAddress); } [StructLayout(LayoutKind.Explicit, Size = Size)] diff --git a/src/Paprika/Merkle/CommitExtensions.cs b/src/Paprika/Merkle/CommitExtensions.cs index 7267b83f..9dd81dfe 100644 --- a/src/Paprika/Merkle/CommitExtensions.cs +++ b/src/Paprika/Merkle/CommitExtensions.cs @@ -45,10 +45,10 @@ public static void SetBranch(this ICommit commit, in Key key, NibbleSet.Readonly commit.Set(key, branch.WriteTo(stackalloc byte[branch.MaxByteLength]), rlp, type); } - public static void SetExtension(this ICommit commit, in Key key, in NibblePath path) + public static void SetExtension(this ICommit commit, in Key key, in NibblePath path, EntryType type = EntryType.Persistent) { var extension = new Node.Extension(path); - commit.Set(key, extension.WriteTo(stackalloc byte[extension.MaxByteLength])); + commit.Set(key, extension.WriteTo(stackalloc byte[extension.MaxByteLength]), type); } public static void DeleteKey(this ICommit commit, in Key key) => commit.Set(key, ReadOnlySpan.Empty); diff --git a/src/Paprika/Merkle/ComputeMerkleBehavior.cs b/src/Paprika/Merkle/ComputeMerkleBehavior.cs index 42813fde..0b205f1f 100644 --- a/src/Paprika/Merkle/ComputeMerkleBehavior.cs +++ b/src/Paprika/Merkle/ComputeMerkleBehavior.cs @@ -326,9 +326,9 @@ public ComputeContext(ICommit commit, TrieType trieType, ComputeHint hint, Cache private KeccakOrRlp Compute(scoped in Key key, scoped in ComputeContext ctx) { - // As leafs are not stored in the database, hint to lookup again on missing. using var owner = ctx.Commit.Get(key); + // The computation might be done for a node that was not traversed and might require a cache if (ctx.Budget.ShouldCache(owner, out var entryType)) { ctx.Commit.Set(key, owner.Span, entryType); @@ -385,6 +385,7 @@ private KeccakOrRlp EncodeLeafByPath(scoped in Key key, scoped in ComputeContext } #endif + // leaf data might be coming from the db, potentially cache them if (ctx.Budget.ShouldCache(leafData, out var entryType)) { ctx.Commit.Set(leafKey, leafData.Span, entryType); @@ -775,6 +776,11 @@ private static DeleteStatus Delete(in NibblePath path, int at, ICommit commit, C if (status == DeleteStatus.NodeTypePreserved) { + if (budget.ShouldCache(owner, out var entryType)) + { + commit.SetExtension(key, ext.Path, entryType); + } + // The node has not change its type return DeleteStatus.NodeTypePreserved; } @@ -1047,6 +1053,12 @@ private static void MarkPathDirty(in NibblePath path, ICommit commit, CacheBudge { // the path overlaps with what is there, move forward i += ext.Path.Length - 1; + + if (budget.ShouldCache(owner, out var entryType)) + { + commit.SetExtension(key, ext.Path, entryType); + } + continue; } diff --git a/src/Paprika/Store/BatchContextBase.cs b/src/Paprika/Store/BatchContextBase.cs index 7aef3a4f..5e88e0b3 100644 --- a/src/Paprika/Store/BatchContextBase.cs +++ b/src/Paprika/Store/BatchContextBase.cs @@ -5,14 +5,9 @@ namespace Paprika.Store; /// /// The base class for all context implementations. /// -abstract class BatchContextBase : IBatchContext +abstract class BatchContextBase(uint batchId) : IBatchContext { - protected BatchContextBase(uint batchId) - { - BatchId = batchId; - } - - public uint BatchId { get; } + public uint BatchId { get; } = batchId; public abstract Page GetAt(DbAddress address); diff --git a/src/Paprika/Store/DataPage.cs b/src/Paprika/Store/DataPage.cs index f39e374d..41d66e96 100644 --- a/src/Paprika/Store/DataPage.cs +++ b/src/Paprika/Store/DataPage.cs @@ -1,5 +1,4 @@ -using System.Buffers; -using System.Diagnostics; +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using Paprika.Data; @@ -19,6 +18,8 @@ namespace Paprika.Store; [method: DebuggerStepThrough] public readonly unsafe struct DataPage(Page page) : IPageWithData { + private const int ConsumedNibbles = 1; + public static DataPage Wrap(Page page) => new(page); private const int BucketCount = 16; @@ -33,7 +34,6 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat { // the page is from another batch, meaning, it's readonly. Copy var writable = batch.GetWritableCopy(page); - return new DataPage(writable).Set(key, data, batch); } @@ -42,207 +42,150 @@ public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext bat if (isDelete) { - // delete locally - if (LeafCount <= MaxLeafCount) + // If it's a delete and a key is empty or there's no child page, delete in-situ + if (key.IsEmpty || Data.Buckets[key.FirstNibble].IsNull) { - map.Delete(key); - for (var i = 0; i < MaxLeafCount; i++) - { - // TODO: consider checking whether the array contains the data first, - // only then make it writable as it results in a COW - if (TryGetWritableLeaf(i, batch, out var leaf)) leaf.Delete(key); - } - - return page; - } - - if (key.IsEmpty) - { - // there's no lower level, delete in map - map.Delete(key); - return page; - } - - var childPageAddress = Data.Buckets[key.FirstNibble]; - if (childPageAddress.IsNull) - { - // there's no lower level, delete in map + // Empty key can be deleted only in-situ map.Delete(key); return page; } } - // try write in map + // Try write in map if (map.TrySet(key, data)) { return page; } - // if no Descendants, create first leaf - if (LeafCount == 0) - { - TryGetWritableLeaf(0, batch, out var leaf, true); - this.LeafCount = 1; - } - - if (LeafCount <= MaxLeafCount) + // Page is full, flush down + if (FlushDownToExistingLeafs(map, batch)) { - // try get the newest - TryGetWritableLeaf(LeafCount - 1, batch, out var newest); - - // move as many as possible to the first leaf and try to re-add - var anyMoved = map.MoveTo(newest) > 0; - - if (anyMoved && map.TrySet(key, data)) + // Flush down succeeded, try to set again + if (map.TrySet(key, data)) { return page; } - - this.LeafCount += 1; - - if (LeafCount <= MaxLeafCount) - { - // still within leafs count - TryGetWritableLeaf(LeafCount - 1, batch, out newest, true); - - map.MoveTo(newest); - if (map.TrySet(key, data)) - { - return page; - } - - Debug.Fail("Shall never enter here as new entries are copied to the map"); - return page; - } - - // copy leafs and clear the buckets as they will be used by child pages now - Span leafs = stackalloc DbAddress[MaxLeafCount]; - Data.Buckets.Slice(0, MaxLeafCount).CopyTo(leafs); - Data.Buckets.Clear(); - - // need to deep copy the page, first memoize the map which has the newest data - var bytes = ArrayPool.Shared.Rent(Data.DataSpan.Length); - var copy = bytes.AsSpan(0, Data.DataSpan.Length); - Data.DataSpan.CopyTo(copy); - - // clear the map - Data.DataSpan.Clear(); - - // as oldest go first, iterate in the same direction - foreach (var leaf in leafs) - { - var leafPage = batch.GetAt(leaf); - batch.RegisterForFutureReuse(leafPage); - var leafMap = GetLeafSlottedArray(leafPage); - - foreach (var item in leafMap.EnumerateAll()) - { - Set(item.Key, item.RawData, batch); - } - } - - foreach (var item in new SlottedArray(copy).EnumerateAll()) - { - Set(item.Key, item.RawData, batch); - } - - ArrayPool.Shared.Return(bytes); - - // set the actual data - return Set(key, data, batch); } + // None of the existing leafs was able to accept the write. Proceed with a regular flush + // Find most frequent nibble var nibble = FindMostFrequentNibble(map); - // try get the child page + // Try get the child page ref var address = ref Data.Buckets[nibble]; Page child; if (address.IsNull) { - // create child as the same type as the parent - child = batch.GetNewPage(out Data.Buckets[nibble], true); - child.Header.PageType = Header.PageType; + // Create child as leaf page + child = batch.GetNewPage(out address, true); + child.Header.PageType = PageType.Leaf; child.Header.Level = (byte)(Header.Level + 1); } else { - // the child page is not-null, retrieve it + // The child page is not-null, retrieve it child = batch.GetAt(address); } - var dataPage = new DataPage(child); + child = FlushDown(map, nibble, child, batch); + address = batch.GetAddress(child); - dataPage = FlushDown(map, nibble, dataPage, batch); - address = batch.GetAddress(dataPage.AsPage()); // The page has some of the values flushed down, try to add again. return Set(key, data, batch); } - private static DataPage FlushDown(in SlottedArray map, byte nibble, DataPage destination, IBatchContext batch) + /// + /// This method tries to flush down data to existing leafs, it will never create new leafs or transform leafs to data pages. + /// + private bool FlushDownToExistingLeafs(in SlottedArray map, IBatchContext batch) { + var leafCount = 0; + Span leafs = stackalloc LeafPage[BucketCount]; + + for (var i = 0; i < BucketCount; i++) + { + var addr = Data.Buckets[i]; + if (addr.IsNull == false) + { + var child = batch.GetAt(addr); + if (child.Header.PageType == PageType.Leaf) + { + leafs[i] = new LeafPage(child); + leafCount++; + } + } + } + + if (leafCount == 0) + { + return false; + } + + var flushCount = 0; + + // Try flush down to leafs first foreach (var item in map.EnumerateAll()) { var key = item.Key; - if (key.IsEmpty) // empty keys are left in page + if (key.IsEmpty) + { continue; + } - if (key.FirstNibble != nibble) + var first = key.FirstNibble; + ref var leaf = ref leafs[first]; + + if (leaf.IsNull) + { continue; + } - var sliced = key.SliceFrom(1); + // the key is non-empty and the leaf is not null + var sliced = key.SliceFrom(ConsumedNibbles); - destination = new DataPage(destination.Set(sliced, item.RawData, batch)); + var (cow, success) = leaf.TrySet(sliced, item.RawData, batch); - // use the special delete for the item that is much faster than map.Delete(item.Key); - map.Delete(item); + // save cow + leaf = cow; + Data.Buckets[first] = batch.GetAddress(cow.AsPage()); + + if (success) + { + map.Delete(item); + flushCount++; + } } - return destination; + return flushCount > 0; } - private ref byte LeafCount => ref Header.Metadata; - private const byte MaxLeafCount = 6; - - private bool TryGetWritableLeaf(int index, IBatchContext batch, out SlottedArray leaf, - bool allocateOnMissing = false) + private static Page FlushDown(in SlottedArray map, byte nibble, Page destination, IBatchContext batch) { - ref var addr = ref Data.Buckets[index]; + foreach (var item in map.EnumerateAll()) + { + var key = item.Key; + if (key.IsEmpty) // empty keys are left in page + continue; - Page page; + if (key.FirstNibble != nibble) + continue; - if (addr.IsNull) - { - if (!allocateOnMissing) - { - leaf = default; - return false; - } + var sliced = key.SliceFrom(1); - page = batch.GetNewPage(out addr, true); - page.Header.PageType = PageType.Leaf; - page.Header.Level = 0; - } - else - { - page = batch.GetAt(addr); - } + destination = destination.Header.PageType == PageType.Leaf + ? new LeafPage(destination).Set(sliced, item.RawData, batch) + : new DataPage(destination).Set(sliced, item.RawData, batch); - // ensure writable - if (page.Header.BatchId != batch.BatchId) - { - page = batch.GetWritableCopy(page); - addr = batch.GetAddress(page); + // Use the special delete for the item that is much faster than map.Delete(item.Key); + map.Delete(item); } - leaf = GetLeafSlottedArray(page); - return true; + return destination; } - private static SlottedArray GetLeafSlottedArray(Page page) => new(new Span(page.Payload, Payload.Size)); - private static byte FindMostFrequentNibble(SlottedArray map) { const int count = SlottedArray.BucketCount; @@ -312,20 +255,6 @@ public bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadO return true; } - if (LeafCount is > 0 and <= MaxLeafCount) - { - // start with the oldest - for (var i = LeafCount - 1; i >= 0; i--) - { - var leafMap = GetLeafSlottedArray(batch.GetAt(Data.Buckets[i])); - if (leafMap.TryGet(key, out result)) - return true; - } - - result = default; - return false; - } - if (key.IsEmpty) // empty keys are left in page { return false; @@ -337,8 +266,11 @@ public bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadO // non-null page jump, follow it! if (bucket.IsNull == false) { - var child = new DataPage(batch.GetAt(bucket)); - return child.TryGet(key.SliceFrom(1), batch, out result); + var sliced = key.SliceFrom(1); + var child = batch.GetAt(bucket); + return child.Header.PageType == PageType.Leaf + ? new LeafPage(child).TryGet(sliced, batch, out result) + : new DataPage(child).TryGet(sliced, batch, out result); } result = default; @@ -351,46 +283,24 @@ public void Report(IReporter reporter, IPageResolver resolver, int level) { var emptyBuckets = 0; - if (LeafCount <= MaxLeafCount) + foreach (var bucket in Data.Buckets) { - foreach (var leaf in Data.Buckets.Slice(0, LeafCount)) + if (bucket.IsNull) { - var page = resolver.GetAt(leaf); - var leafMap = GetLeafSlottedArray(page); - - // foreach (var item in leafMap.EnumerateAll()) - // { - // //reporter.ReportItem(new StoreKey(item.Key), item.RawData); - // } - - reporter.ReportDataUsage(page.Header.PageType, level + 1, 0, leafMap.Count, - leafMap.CapacityLeft); + emptyBuckets++; } - - emptyBuckets = BucketCount - LeafCount; - } - else - { - foreach (var bucket in Data.Buckets) + else { - if (bucket.IsNull) - { - emptyBuckets++; - } + var child = resolver.GetAt(bucket); + if (child.Header.PageType == PageType.Leaf) + new LeafPage(child).Report(reporter, resolver, level + 1); else - { - new DataPage(resolver.GetAt(bucket)).Report(reporter, resolver, level + 1); - } + new DataPage(child).Report(reporter, resolver, level + 1); } } var slotted = new SlottedArray(Data.DataSpan); - // foreach (var item in slotted.EnumerateAll()) - // { - // // reporter.ReportItem(new StoreKey(item.Key), item.RawData); - // } - reporter.ReportDataUsage(Header.PageType, level, BucketCount - emptyBuckets, slotted.Count, slotted.CapacityLeft); } diff --git a/src/Paprika/Store/IBatchContext.cs b/src/Paprika/Store/IBatchContext.cs index b5c02da8..ccec90b7 100644 --- a/src/Paprika/Store/IBatchContext.cs +++ b/src/Paprika/Store/IBatchContext.cs @@ -1,5 +1,6 @@ using System.Diagnostics; using Paprika.Crypto; +using Paprika.Utils; namespace Paprika.Store; diff --git a/src/Paprika/Store/LeafPage.cs b/src/Paprika/Store/LeafPage.cs new file mode 100644 index 00000000..fc4c3468 --- /dev/null +++ b/src/Paprika/Store/LeafPage.cs @@ -0,0 +1,125 @@ +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Paprika.Data; + +namespace Paprika.Store; + +/// +/// Represents the lowest level of the Paprika tree. No buckets, no nothing, just data. +/// +[method: DebuggerStepThrough] +public readonly unsafe struct LeafPage(Page page) : IPageWithData +{ + public static LeafPage Wrap(Page page) => new(page); + + public bool IsNull => page.Raw == UIntPtr.Zero; + + private ref PageHeader Header => ref page.Header; + + private ref Payload Data => ref Unsafe.AsRef(page.Payload); + + public Page Set(in NibblePath key, in ReadOnlySpan data, IBatchContext batch) + { + if (Header.BatchId != batch.BatchId) + { + // the page is from another batch, meaning, it's readonly. Copy + var writable = batch.GetWritableCopy(page); + return new LeafPage(writable).Set(key, data, batch); + } + + if (data.IsEmpty) + { + // Deletes are in-situ + Map.Delete(key); + return page; + } + + // Try write in map + if (Map.TrySet(key, data)) + { + return page; + } + + // Register this page for reuse as its data will be copied to the data page. + batch.RegisterForFutureReuse(page); + + // Not enough space, transform into a data page. + var @new = batch.GetNewPage(out _, true); + + ref var header = ref @new.Header; + header.PageType = PageType.Standard; + header.Level = page.Header.Level; // same level + + var dataPage = new DataPage(@new); + + foreach (var item in Map.EnumerateAll()) + { + dataPage = new DataPage(dataPage.Set(item.Key, item.RawData, batch)); + } + + // Set this value and return data page + return dataPage.Set(key, data, batch); + } + + public (LeafPage page, bool) TrySet(in NibblePath key, in ReadOnlySpan data, IBatchContext batch) + { + var map = Map; + + // Check whether should set in this leaf, make this as simple and as fast as possible starting from simple checks: + // 1. if data is empty, it's a delete + // 2. if there's a capacity in the map, just write it + // 3. if the data is an update and can be put in the map + + var shouldTrySet = + data.IsEmpty || + SlottedArray.EstimateNeededCapacity(key, data) <= map.CapacityLeft || + map.HasSpaceToUpdateExisting(key, data); + + if (shouldTrySet == false) + { + return (new LeafPage(page), false); + } + + if (Header.BatchId != batch.BatchId) + { + // The page is from another batch, meaning, it's readonly. COW + // It could be useful to check whether the map will accept the write first, before doing COW, + // but this would result in a check for each TrySet. This should be implemented in map. + var writable = batch.GetWritableCopy(page); + return new LeafPage(writable).TrySet(key, data, batch); + } + + return (new LeafPage(page), Map.TrySet(key, data)); + } + + [StructLayout(LayoutKind.Explicit, Size = Size)] + private struct Payload + { + private const int Size = Page.PageSize - PageHeader.Size; + + /// + /// The first item of map of frames to allow ref to it. + /// + [FieldOffset(0)] private byte DataStart; + + /// + /// Writable area. + /// + public Span DataSpan => MemoryMarshal.CreateSpan(ref DataStart, Size); + } + + public bool TryGet(scoped NibblePath key, IReadOnlyBatchContext batch, out ReadOnlySpan result) + { + batch.AssertRead(Header); + return Map.TryGet(key, out result); + } + + private SlottedArray Map => new(Data.DataSpan); + + public void Report(IReporter reporter, IPageResolver resolver, int level) + { + var slotted = new SlottedArray(Data.DataSpan); + reporter.ReportDataUsage(Header.PageType, level, 0, slotted.Count, slotted.CapacityLeft); + } +} \ No newline at end of file diff --git a/src/Paprika/Store/PageManagers/PointerPageManager.cs b/src/Paprika/Store/PageManagers/PointerPageManager.cs index c177833d..32aeb967 100644 --- a/src/Paprika/Store/PageManagers/PointerPageManager.cs +++ b/src/Paprika/Store/PageManagers/PointerPageManager.cs @@ -1,13 +1,10 @@ -using System.Diagnostics; -using System.Runtime.CompilerServices; +using System.Runtime.CompilerServices; namespace Paprika.Store.PageManagers; -public abstract unsafe class PointerPageManager : IPageManager +public abstract unsafe class PointerPageManager(long size) : IPageManager { - public int MaxPage { get; } - - protected PointerPageManager(long size) => MaxPage = (int)(size / Page.PageSize); + public int MaxPage { get; } = (int)(size / Page.PageSize); protected abstract void* Ptr { get; } diff --git a/src/Paprika/Store/PagedDb.cs b/src/Paprika/Store/PagedDb.cs index 05d0ae2e..a1784cf5 100644 --- a/src/Paprika/Store/PagedDb.cs +++ b/src/Paprika/Store/PagedDb.cs @@ -406,10 +406,10 @@ public void Report(IReporter state, IReporter storage) { if (root.Data.StateRoot.IsNull == false) { - new DataPage(GetAt(root.Data.StateRoot)).Report(state, this, 1); + new FanOutPage(GetAt(root.Data.StateRoot)).Report(state, this, 0); } - root.Data.Storage.Report(state, this, 1); + root.Data.Storage.Report(storage, this, 0); } public uint BatchId => root.Header.BatchId; diff --git a/src/Paprika/Store/RootPage.cs b/src/Paprika/Store/RootPage.cs index c1e79db4..a8b58864 100644 --- a/src/Paprika/Store/RootPage.cs +++ b/src/Paprika/Store/RootPage.cs @@ -67,7 +67,7 @@ public struct Payload [FieldOffset(DbAddress.Size * 2 + sizeof(uint) + Metadata.Size)] private DbAddress StoragePayload; - public FanOutList, StandardType> Storage => new(MemoryMarshal.CreateSpan(ref StoragePayload, FanOutList.FanOut)); + public FanOutList>>, StandardType> Storage => new(MemoryMarshal.CreateSpan(ref StoragePayload, FanOutList.FanOut)); /// /// Identifiers diff --git a/src/Paprika/Utils/ReadOnlySpanOwner.cs b/src/Paprika/Utils/ReadOnlySpanOwner.cs index 8720059f..5722526e 100644 --- a/src/Paprika/Utils/ReadOnlySpanOwner.cs +++ b/src/Paprika/Utils/ReadOnlySpanOwner.cs @@ -3,27 +3,19 @@ /// /// Provides a under ownership. /// -/// -public readonly ref struct ReadOnlySpanOwner +public readonly ref struct ReadOnlySpanOwner(ReadOnlySpan span, IDisposable? owner) { - public readonly ReadOnlySpan Span; - private readonly IDisposable? _owner; - - public ReadOnlySpanOwner(ReadOnlySpan span, IDisposable? owner) - { - Span = span; - _owner = owner; - } + public readonly ReadOnlySpan Span = span; public bool IsEmpty => Span.IsEmpty; /// /// Disposes the owner provided as once. /// - public void Dispose() => _owner?.Dispose(); + public void Dispose() => owner?.Dispose(); /// - /// Answers whether this span is owned and provided by . + /// Answers whether this span is owned and provided by . /// - public bool IsOwnedBy(object owner) => ReferenceEquals(owner, _owner); + public bool IsOwnedBy(object owner1) => ReferenceEquals(owner1, owner); } \ No newline at end of file