From 2fe3a29766dcdcc572dcd1baa7cdeb65ab7e9899 Mon Sep 17 00:00:00 2001 From: Szymon Kulec Date: Fri, 23 Feb 2024 09:41:59 +0100 Subject: [PATCH 1/2] ReadOnlyBatch caches ids (#250) * ReadOnlyBatch caches ids * limit the cache size --- src/Paprika/Store/BatchContextBase.cs | 2 +- src/Paprika/Store/IBatchContext.cs | 4 +- src/Paprika/Store/PagedDb.cs | 8 ++- src/Paprika/Store/RootPage.cs | 70 ++++++++++++++++++--------- 4 files changed, 57 insertions(+), 27 deletions(-) diff --git a/src/Paprika/Store/BatchContextBase.cs b/src/Paprika/Store/BatchContextBase.cs index 13598525..7aef3a4f 100644 --- a/src/Paprika/Store/BatchContextBase.cs +++ b/src/Paprika/Store/BatchContextBase.cs @@ -49,7 +49,7 @@ public Page GetWritableCopy(Page page) public abstract void RegisterForFutureReuse(Page page); - public abstract Dictionary IdCache { get; } + public abstract IDictionary IdCache { get; } /// /// Assigns the batch identifier to a given page, marking it writable by this batch. diff --git a/src/Paprika/Store/IBatchContext.cs b/src/Paprika/Store/IBatchContext.cs index c131839b..b5c02da8 100644 --- a/src/Paprika/Store/IBatchContext.cs +++ b/src/Paprika/Store/IBatchContext.cs @@ -33,8 +33,6 @@ public interface IBatchContext : IReadOnlyBatchContext /// void RegisterForFutureReuse(Page page); - Dictionary IdCache { get; } - /// /// Assigns the batch identifier to a given page, marking it writable by this batch. /// @@ -55,6 +53,8 @@ public interface IReadOnlyBatchContext : IPageResolver /// Gets the current id. /// uint BatchId { get; } + + IDictionary IdCache { get; } } public static class ReadOnlyBatchContextExtensions diff --git a/src/Paprika/Store/PagedDb.cs b/src/Paprika/Store/PagedDb.cs index 1ed17117..95cca42b 100644 --- a/src/Paprika/Store/PagedDb.cs +++ b/src/Paprika/Store/PagedDb.cs @@ -1,4 +1,5 @@ -using System.Diagnostics; +using System.Collections.Concurrent; +using System.Diagnostics; using System.Diagnostics.Metrics; using System.Runtime.InteropServices; using Paprika.Chain; @@ -356,6 +357,9 @@ private DbAddress SetNewRoot(RootPage root) private class ReadOnlyBatch(PagedDb db, RootPage root, string name) : IReportingReadOnlyBatch, IReadOnlyBatchContext { + private readonly ConcurrentDictionary _idCache = new(Environment.ProcessorCount, + RootPage.IdCacheLimit); + public RootPage Root => root; private long _reads; @@ -393,6 +397,8 @@ public void Report(IReporter state, IReporter storage) public uint BatchId => root.Header.BatchId; + public IDictionary IdCache => _idCache; + public Page GetAt(DbAddress address) => db._manager.GetAt(address); public override string ToString() => $"{nameof(ReadOnlyBatch)}, Name: {name}, BatchId: {BatchId}"; diff --git a/src/Paprika/Store/RootPage.cs b/src/Paprika/Store/RootPage.cs index ffb1a1b5..8bcc10e3 100644 --- a/src/Paprika/Store/RootPage.cs +++ b/src/Paprika/Store/RootPage.cs @@ -105,6 +105,11 @@ public void Accept(IPageVisitor visitor, IPageResolver resolver) Data.AbandonedList.Accept(visitor, resolver); } + /// + /// How many id entries should be cached per readonly batch. + /// + public const int IdCacheLimit = 2_000; + public bool TryGet(scoped in Key key, IReadOnlyBatchContext batch, out ReadOnlySpan result) { if (key.IsState) @@ -118,10 +123,39 @@ public bool TryGet(scoped in Key key, IReadOnlyBatchContext batch, out ReadOnlyS return new FanOutPage(batch.GetAt(Data.StateRoot)).TryGet(key.Path, batch, out result); } - if (Data.Ids.TryGet(key.Path, batch, out var id) == false) + Span idSpan = stackalloc byte[sizeof(uint)]; + + ReadOnlySpan id; + var cache = batch.IdCache; + var keccak = key.Path.UnsafeAsKeccak; + + if (cache.TryGetValue(keccak, out var cachedId)) { - result = default; - return false; + if (cachedId == 0) + { + result = default; + return false; + } + + WriteId(idSpan, cachedId); + id = idSpan; + } + else + { + if (Data.Ids.TryGet(key.Path, batch, out id)) + { + if (cache.Count < IdCacheLimit) + { + cache[keccak] = ReadId(id); + } + } + else + { + // Not found, for now, not remember misses, remember miss + // cache[keccak] = 0; + result = default; + return false; + } } var path = NibblePath.FromKey(id).Append(key.StoragePath, stackalloc byte[StorageKeySize]); @@ -129,20 +163,9 @@ public bool TryGet(scoped in Key key, IReadOnlyBatchContext batch, out ReadOnlyS return Data.Storage.TryGet(path, batch, out result); } - /// - /// Encodes the path in a way that makes it byte-aligned and unique, but also sort: - /// - /// - empty path is left empty - /// - odd-length path is padded with a single nibble with value of 0x01 - /// - even-length path is padded with a single byte (2 nibbles with value of 0x00) - /// - /// To ensure that and / - /// of the same length can coexist, the merkle marker is added as well. - /// - public static NibblePath Encode(in NibblePath path, in Span destination, DataType type) - { - return path; - } + private static uint ReadId(ReadOnlySpan id) => BinaryPrimitives.ReadUInt32LittleEndian(id); + private static void WriteId(Span idSpan, uint cachedId) => BinaryPrimitives.WriteUInt32LittleEndian(idSpan, cachedId); + public void SetRaw(in Key key, IBatchContext batch, ReadOnlySpan rawData) { @@ -155,9 +178,11 @@ public void SetRaw(in Key key, IBatchContext batch, ReadOnlySpan rawData) scoped NibblePath id; Span idSpan = stackalloc byte[sizeof(uint)]; - if (batch.IdCache.TryGetValue(key.Path.UnsafeAsKeccak, out var cachedId)) + var keccak = key.Path.UnsafeAsKeccak; + + if (batch.IdCache.TryGetValue(keccak, out var cachedId)) { - BinaryPrimitives.WriteUInt32LittleEndian(idSpan, cachedId); + WriteId(idSpan, cachedId); id = NibblePath.FromKey(idSpan); } else @@ -166,10 +191,10 @@ public void SetRaw(in Key key, IBatchContext batch, ReadOnlySpan rawData) if (Data.Ids.TryGet(key.Path, batch, out var existingId) == false) { Data.AccountCounter++; - BinaryPrimitives.WriteUInt32LittleEndian(idSpan, Data.AccountCounter); + WriteId(idSpan, Data.AccountCounter); // memoize in cache - batch.IdCache[key.Path.UnsafeAsKeccak] = Data.AccountCounter; + batch.IdCache[keccak] = Data.AccountCounter; // update root Data.Ids.Set(key.Path, idSpan, batch); @@ -179,13 +204,12 @@ public void SetRaw(in Key key, IBatchContext batch, ReadOnlySpan rawData) else { // memoize in cache - batch.IdCache[key.Path.UnsafeAsKeccak] = BinaryPrimitives.ReadUInt32LittleEndian(existingId); + batch.IdCache[keccak] = ReadId(existingId); id = NibblePath.FromKey(existingId); } } var path = id.Append(key.StoragePath, stackalloc byte[StorageKeySize]); - Data.Storage.Set(path, rawData, batch); } } From 93b2eabf1eaedf29b22c5af9a7621aa182757646 Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Fri, 23 Feb 2024 08:46:13 +0000 Subject: [PATCH 2/2] Better hash (#251) * Better hash * fix --- src/Paprika.Tests/Data/KeyTests.cs | 17 ++++++- src/Paprika/Data/Key.cs | 12 +++-- src/Paprika/Data/NibblePath.cs | 73 ++++++++++++++++++------------ 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/src/Paprika.Tests/Data/KeyTests.cs b/src/Paprika.Tests/Data/KeyTests.cs index c70590c5..d6c265e7 100644 --- a/src/Paprika.Tests/Data/KeyTests.cs +++ b/src/Paprika.Tests/Data/KeyTests.cs @@ -97,6 +97,18 @@ public void LongHash() NibblePath.FromKey(Keccak.EmptyTreeHash).SliceTo(i))); } + for (var x = 0; x < 16; x++) + { + var nibbles = NibblePath.Parse("0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde" + x.ToString("x").ToLowerInvariant()); + Unique(Key.Raw(aPath, DataType.Merkle, nibbles)); + + nibbles = NibblePath.Parse(x.ToString("x").ToLowerInvariant() + "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde"); + for (var i = 1; i < 32; i++) + { + Unique(Key.Raw(aPath, DataType.Merkle, nibbles.SliceTo(i))); + } + } + // Additional colliding ones Unique(Key.Merkle(NibblePath.Parse("DAE3"))); Unique(Key.Merkle(NibblePath.Parse("251C"))); @@ -118,7 +130,8 @@ void Unique(in Key key) if (hashes.TryAdd(hash, hex) == false) { var existing = hashes[hash]; - Assert.Fail($"The hash for {hex} is the same as for {existing}"); + if (existing != hex) + Assert.Fail($"The hash for {hex} is the same as for {existing}"); } } } @@ -130,4 +143,4 @@ private static void ReadWriteAssert(in Key expected) Key.ReadFrom(written, out var actual); actual.Equals(expected).Should().BeTrue(); } -} \ No newline at end of file +} diff --git a/src/Paprika/Data/Key.cs b/src/Paprika/Data/Key.cs index cb7deb74..1d2bd636 100644 --- a/src/Paprika/Data/Key.cs +++ b/src/Paprika/Data/Key.cs @@ -1,5 +1,6 @@ using System.Diagnostics; using System.IO.Hashing; +using System.Numerics; using System.Runtime.CompilerServices; using Paprika.Crypto; using Paprika.Store; @@ -99,14 +100,17 @@ public static ReadOnlySpan ReadFrom(ReadOnlySpan source, out Key key [SkipLocalsInit] public override int GetHashCode() { - return (int)Type ^ Path.GetHashCode() ^ StoragePath.GetHashCode(); + return (int)BitOperations.Crc32C((uint)Path.GetHashCode(), (uint)StoragePath.GetHashCode()) + (byte)Type; } [SkipLocalsInit] public ulong GetHashCodeULong() { - return unchecked((ulong)(((long)Path.GetHashCode() << 32) | - (long)(StoragePath.GetHashCode() ^ (byte)Type))); + var pathHash = Path.GetHashCode(); + var storageHash = StoragePath.GetHashCode(); + + ulong hash = BitOperations.Crc32C((uint)pathHash, (uint)storageHash) + (byte)Type; + return (((ulong)(uint)pathHash) << 32 | (uint)storageHash) ^ (hash << 32 | hash); } public override string ToString() @@ -120,4 +124,4 @@ public override string ToString() /// The predicate over a key. /// public delegate bool Predicate(in Key key); -} \ No newline at end of file +} diff --git a/src/Paprika/Data/NibblePath.cs b/src/Paprika/Data/NibblePath.cs index 28744b53..36abbcaa 100644 --- a/src/Paprika/Data/NibblePath.cs +++ b/src/Paprika/Data/NibblePath.cs @@ -1,5 +1,7 @@ -using System.Diagnostics; +using System; +using System.Diagnostics; using System.Globalization; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using Paprika.Crypto; @@ -528,13 +530,13 @@ public override int GetHashCode() { ref var span = ref _span; - int hash = Length << 24; - var length = Length; + uint hash = (uint)Length << 24; + nuint length = Length; if (_odd == OddBit) { // mix in first half - hash ^= (_span & 0x0F) << 20; + hash |= (uint)(_span & 0x0F) << 20; span = ref Unsafe.Add(ref span, 1); length -= 1; } @@ -542,11 +544,7 @@ public override int GetHashCode() if (length % 2 == 1) { // mix in - unchecked - { - hash ^= GetAt(length - 1) << 16; - } - + hash |= (uint)GetAt((int)length - 1) << 16; length -= 1; } @@ -554,37 +552,56 @@ public override int GetHashCode() length /= 2; // make it byte - // 4 bytes - var intLoop = Math.DivRem(length, sizeof(int), out var remainder); - for (var i = 0; i < intLoop; i++) + // 8 bytes + if (length >= sizeof(long)) { - unchecked + nuint offset = 0; + nuint longLoop = length - sizeof(long); + if (longLoop != 0) { - var v = Unsafe.ReadUnaligned(ref span); - hash ^= v; - span = ref Unsafe.Add(ref span, sizeof(int)); + do + { + hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned(ref Unsafe.Add(ref span, offset))); + offset += sizeof(long); + } while (longLoop > offset); } + + // Do final hash as sizeof(long) from end rather than start + hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned(ref Unsafe.Add(ref span, longLoop))); + + return (int)hash; } - // 2 bytes - if (remainder >= sizeof(short)) + // 4 bytes + if (length >= sizeof(int)) { - unchecked + hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned(ref span)); + length -= sizeof(int); + if (length > 0) { - var v = Unsafe.ReadUnaligned(ref span); - hash ^= v; - span = ref Unsafe.Add(ref span, sizeof(short)); - remainder -= sizeof(short); + // Do final hash as sizeof(long) from end rather than start + hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned(ref Unsafe.Add(ref span, length))); } + + return (int)hash; } - // 1 byte - if (remainder > 0) + // 2 bytes + if (length >= sizeof(short)) { - hash ^= span; + hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned(ref span)); + length -= sizeof(short); + if (length > 0) + { + // Do final hash as sizeof(long) from end rather than start + hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned(ref Unsafe.Add(ref span, length))); + } + + return (int)hash; } - return hash; + // 1 byte + return (int)BitOperations.Crc32C(hash, span); } } @@ -601,4 +618,4 @@ public bool HasOnlyZeroes() return true; } -} \ No newline at end of file +}