diff --git a/src/Paprika.Benchmarks/BitMapFilterBenchmarks.cs b/src/Paprika.Benchmarks/BitMapFilterBenchmarks.cs new file mode 100644 index 00000000..4371b120 --- /dev/null +++ b/src/Paprika.Benchmarks/BitMapFilterBenchmarks.cs @@ -0,0 +1,82 @@ +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; +using Paprika.Data; +using Paprika.Store; + +namespace Paprika.Benchmarks; + +[DisassemblyDiagnoser] +[MemoryDiagnoser] +public class BitMapFilterBenchmarks +{ + private readonly Page[] _pages1A = AlignedAlloc(1); + private readonly Page[] _pages1B = AlignedAlloc(1); + + private readonly Page[] _pages2A = AlignedAlloc(2); + private readonly Page[] _pages2B = AlignedAlloc(2); + + private readonly Page[] _pages16A = AlignedAlloc(128); + private readonly Page[] _pages16B = AlignedAlloc(128); + + [Benchmark(OperationsPerInvoke = 4)] + public void Or_BitMapFilter_Of1() + { + var a = new BitMapFilter(new BitMapFilter.Of1(_pages1A[0])); + var b = new BitMapFilter(new BitMapFilter.Of1(_pages1B[0])); + + a.OrWith(b); + a.OrWith(b); + a.OrWith(b); + a.OrWith(b); + } + + [Benchmark(OperationsPerInvoke = 4)] + public void Or_BitMapFilter_Of2() + { + var a = new BitMapFilter(new BitMapFilter.Of2(_pages2A[0], _pages2A[1])); + var b = new BitMapFilter(new BitMapFilter.Of2(_pages2B[0], _pages2B[1])); + + a.OrWith(b); + a.OrWith(b); + a.OrWith(b); + a.OrWith(b); + } + + [Benchmark(OperationsPerInvoke = 4)] + public void Or_BitMapFilter_OfN_128() + { + var a = new BitMapFilter(new BitMapFilter.OfN(_pages16A)); + var b = new BitMapFilter(new BitMapFilter.OfN(_pages16B)); + + a.OrWith(b); + a.OrWith(b); + a.OrWith(b); + a.OrWith(b); + } + + [Benchmark(OperationsPerInvoke = 4)] + public int MayContainAny_BitMapFilter_OfN_128() + { + var a = new BitMapFilter(new BitMapFilter.OfN(_pages16A)); + + return (a.MayContainAny(13, 17) ? 1 : 0) + + (a.MayContainAny(2342, 2345) ? 1 : 0) + + (a.MayContainAny(3453453, 8789345) ? 1 : 0) + + (a.MayContainAny(2346345, 432509) ? 1 : 0); + } + + private static unsafe Page[] AlignedAlloc(int pageCount) + { + var pages = new Page[pageCount]; + + for (int i = 0; i < pageCount; i++) + { + pages[i] = new Page((byte*)NativeMemory.AlignedAlloc(Page.PageSize, Page.PageSize)); + + // make data more interesting + pages[i].Span.Fill((byte)(1 << (i & 7))); + } + + return pages; + } +} \ No newline at end of file diff --git a/src/Paprika.Tests/Data/BitMapFilterTests.cs b/src/Paprika.Tests/Data/BitMapFilterTests.cs new file mode 100644 index 00000000..27649e9e --- /dev/null +++ b/src/Paprika.Tests/Data/BitMapFilterTests.cs @@ -0,0 +1,75 @@ +using FluentAssertions; +using Paprika.Chain; +using Paprika.Data; + +namespace Paprika.Tests.Data; + +public abstract class BitMapFilterTests : IDisposable + where TAccessor : struct, BitMapFilter.IAccessor +{ + private readonly BufferPool _pool = new(32); + + protected abstract BitMapFilter Build(BufferPool pool); + + [Test] + public void Non_colliding_set() + { + var filter = Build(_pool); + + var count = (ulong)filter.BucketCount; + + for (ulong i = 0; i < count; i++) + { + filter[i].Should().BeFalse(); + filter[i] = true; + filter[i].Should().BeTrue(); + } + + filter.Return(_pool); + } + + [Test] + public void Or_with() + { + var filter1 = Build(_pool); + var filter2 = Build(_pool); + + const ulong v1 = 1; + const ulong v2 = 4213798855897314219; + + filter1.Add(v1); + filter2.Add(v2); + + filter1.OrWith(filter2); + + filter1.MayContain(v1).Should().BeTrue(); + filter1.MayContain(v2).Should().BeTrue(); + filter1.MayContainAny(v1, v2).Should().BeTrue(); + + filter1.Return(_pool); + filter2.Return(_pool); + } + + public void Dispose() => _pool.Dispose(); +} + +[TestFixture] +public class BitMapFilterTestsOf1 : BitMapFilterTests +{ + protected override BitMapFilter Build(BufferPool pool) => BitMapFilter.CreateOf1(pool); +} + +[TestFixture] +public class BitMapFilterTestsOf2 : BitMapFilterTests +{ + protected override BitMapFilter Build(BufferPool pool) => BitMapFilter.CreateOf2(pool); +} + +[TestFixture] +public class BitMapFilterTestsOf4 : BitMapFilterTests +{ + protected override BitMapFilter Build(BufferPool pool) => BitMapFilter.CreateOfN(pool, 4); +} + + + diff --git a/src/Paprika.Tests/Data/PageExtensionsTests.cs b/src/Paprika.Tests/Data/PageExtensionsTests.cs new file mode 100644 index 00000000..da1aa5a3 --- /dev/null +++ b/src/Paprika.Tests/Data/PageExtensionsTests.cs @@ -0,0 +1,27 @@ +using System.Runtime.InteropServices; +using FluentAssertions; +using Paprika.Data; +using Paprika.Store; + +namespace Paprika.Tests.Data; + +public class PageExtensionsTests +{ + [Test] + public void Or_with() + { + var page0 = Page.DevOnlyNativeAlloc(); + page0.Clear(); + + var page1 = Page.DevOnlyNativeAlloc(); + const byte fill = 0xFF; + page1.Span.Fill(fill); // fill whole page with FFFFF + + const int notFound = -1; + page0.Span.IndexOfAnyExcept((byte)0).Should().Be(notFound); + + page0.OrWith(page1); + + page0.Span.IndexOfAnyExcept(fill).Should().Be(notFound); + } +} \ No newline at end of file diff --git a/src/Paprika.Tests/Store/AbandonedTests.cs b/src/Paprika.Tests/Store/AbandonedTests.cs index 5dfa3e84..7d7c5bc3 100644 --- a/src/Paprika.Tests/Store/AbandonedTests.cs +++ b/src/Paprika.Tests/Store/AbandonedTests.cs @@ -199,6 +199,8 @@ public async Task Work_proper_bookkeeping_when_lots_of_reads() using var block = db.BeginNextBatch(); block.SetAccount(account, value); + block.VerifyDbPagesOnCommit(); + await block.Commit(CommitOptions.FlushDataAndRoot); } diff --git a/src/Paprika.Tests/Store/DbAddressSetTests.cs b/src/Paprika.Tests/Store/DbAddressSetTests.cs new file mode 100644 index 00000000..dad200c5 --- /dev/null +++ b/src/Paprika.Tests/Store/DbAddressSetTests.cs @@ -0,0 +1,41 @@ +using FluentAssertions; +using Paprika.Store; + +namespace Paprika.Tests.Store; + +public class DbAddressSetTests +{ + [Test] + public void Dummy() + { + const uint max = 1000; + + var set = new DbAddressSet(DbAddress.Page(max)); + + for (uint i = 0; i < max; i++) + { + var addr = DbAddress.Page(i); + set[addr].Should().BeTrue(); + set[addr] = false; + } + + for (uint i = 0; i < max; i++) + { + var addr = DbAddress.Page(i); + set[addr].Should().BeFalse(); + } + } + + [Test] + public void Set_enumeration() + { + const uint max = 5; + var set = new DbAddressSet(DbAddress.Page(max)); + + set[DbAddress.Page(0)] = false; + set[DbAddress.Page(2)] = false; + set[DbAddress.Page(4)] = false; + + set.EnumerateSet().ToArray().Should().BeEquivalentTo([DbAddress.Page(1), DbAddress.Page(3)]); + } +} \ No newline at end of file diff --git a/src/Paprika.Tests/Store/DbTests.cs b/src/Paprika.Tests/Store/DbTests.cs index 8652a18e..4839fcb9 100644 --- a/src/Paprika.Tests/Store/DbTests.cs +++ b/src/Paprika.Tests/Store/DbTests.cs @@ -114,6 +114,8 @@ public async Task Readonly_transaction_block_till_they_are_released() // write current block.SetAccount(Key0, GetValue(i)); + + block.VerifyDbPagesOnCommit(); await block.Commit(CommitOptions.FlushDataOnly); read.ShouldHaveAccount(Key0, GetValue(start)); diff --git a/src/Paprika/Chain/Blockchain.cs b/src/Paprika/Chain/Blockchain.cs index 080a12e5..e2185d31 100644 --- a/src/Paprika/Chain/Blockchain.cs +++ b/src/Paprika/Chain/Blockchain.cs @@ -11,6 +11,7 @@ using Paprika.Merkle; using Paprika.Store; using Paprika.Utils; +using BitFilter = Paprika.Data.BitMapFilter; namespace Paprika.Chain; @@ -26,6 +27,11 @@ public class Blockchain : IAsyncDisposable // allocate 1024 pages (4MB) at once private readonly BufferPool _pool; + /// + /// 512 kb gives 4 million buckets. + /// + private const int BitMapFilterSizePerBlock = 512 * 1024 / Page.PageSize; + private readonly object _blockLock = new(); private readonly Dictionary> _blocksByNumber = new(); private readonly Dictionary _blocksByHash = new(); @@ -340,7 +346,9 @@ public IRawState StartRaw() public IReadOnlyWorldState StartReadOnly(Keccak keccak) { var (batch, ancestors) = BuildBlockDataDependencies(keccak); - return new ReadOnlyState(keccak, new ReadOnlyBatchCountingRefs(batch), ancestors); + var filter = CreateAncestorsFilter(ancestors); + + return new ReadOnlyState(keccak, new ReadOnlyBatchCountingRefs(batch), ancestors, filter, _pool); } public IReadOnlyWorldState StartReadOnlyLatestFromDb() @@ -471,17 +479,17 @@ static void ThrowFinalizedBlockMissing() } } + private BitFilter CreateBitFilter() => BitMapFilter.CreateOfN(_pool, BitMapFilterSizePerBlock); + /// /// Represents a block that is a result of ExecutionPayload. /// private class BlockState : RefCountingDisposable, IWorldState, ICommit, IProvideDescription, IStateStats { - [ThreadStatic] private static HashSet? s_bloomCache; - /// - /// A simple bloom filter to assert whether the given key was set in a given block, used to speed up getting the keys. + /// A simple set filter to assert whether the given key was set in a given block, used to speed up getting the keys. /// - private HashSet _bloom; + private readonly BitFilter _filter; private readonly Dictionary? _stats; @@ -492,6 +500,7 @@ private class BlockState : RefCountingDisposable, IWorldState, ICommit, IProvide private readonly ReadOnlyBatchCountingRefs _batch; private readonly CommittedBlockState[] _ancestors; + private readonly BitFilter _ancestorsFilter; private readonly Blockchain _blockchain; @@ -528,11 +537,13 @@ public BlockState(Keccak parentStateRoot, IReadOnlyBatch batch, CommittedBlockSt _ancestors = ancestors; + // ancestors filter + _ancestorsFilter = blockchain.CreateAncestorsFilter(ancestors); _blockchain = blockchain; ParentHash = parentStateRoot; - _bloom = Interlocked.Exchange(ref s_bloomCache, null) ?? new HashSet(); + _filter = _blockchain.CreateBitFilter(); _destroyed = null; _stats = new Dictionary(); @@ -639,20 +650,20 @@ private static void ReportCacheUsage(in CacheBudget.Options budget, CacheBudget BlockNumber = blockNumber; - var xor = new Xor8(_bloom); + var filter = _blockchain.CreateBitFilter(); // clean no longer used fields var data = new PooledSpanDictionary(Pool, false); // use append for faster copies as state and storage won't overwrite each other - _state.CopyTo(data, OmitUseOnce, true); - _storage.CopyTo(data, OmitUseOnce, true); + _state.CopyTo(data, OmitUseOnce, filter, true); + _storage.CopyTo(data, OmitUseOnce, filter, true); // TODO: apply InspectBeforeApply here to reduce memory usage? - _preCommit.CopyTo(data, OmitUseOnce); + _preCommit.CopyTo(data, OmitUseOnce, filter); // Creation acquires the lease - return new CommittedBlockState(xor, _destroyed, _blockchain, data, hash, + return new CommittedBlockState(filter, _destroyed, _blockchain, data, hash, ParentHash, blockNumber, raw); @@ -674,7 +685,7 @@ static void ThrowSameState() public void Reset() { _hash = ParentHash; - _bloom.Clear(); + _filter.Clear(); _destroyed = null; CreateDictionaries(); @@ -767,7 +778,7 @@ public void BlockFurtherPrefetching() foreach (var key in _cached) { - parent._bloom.Add(key); + parent._filter.Add(key); } } @@ -950,7 +961,7 @@ private void SetImpl(in Key key, in ReadOnlySpan payload, EntryType type, _hash = null; var hash = GetHash(key); - _bloom.Add(hash); + _filter.Add(hash); var k = key.WriteTo(stackalloc byte[key.MaxByteLength]); dict.Set(k, hash, payload, (byte)type); @@ -964,7 +975,7 @@ private void SetImpl(in Key key, in ReadOnlySpan payload0, in ReadOnlySpan _hash = null; var hash = GetHash(key); - _bloom.Add(hash); + _filter.Add(hash); var k = key.WriteTo(stackalloc byte[key.MaxByteLength]); @@ -1105,20 +1116,23 @@ private ReadOnlySpanOwnerWithMetadata TryGet(scoped in Key key, scoped Rea } private ReadOnlySpanOwnerWithMetadata TryGetAncestors(scoped in Key key, - scoped ReadOnlySpan keyWritten, ulong bloom) + scoped ReadOnlySpan keyWritten, ulong keyHash) { - ushort depth = 1; - var destroyedHash = CommittedBlockState.GetDestroyedHash(key); - // walk all the blocks locally - foreach (var ancestor in _ancestors) + if (_ancestorsFilter.MayContainAny(keyHash, destroyedHash)) { - var owner = ancestor.TryGetLocal(key, keyWritten, bloom, destroyedHash, out var succeeded); - if (succeeded) - return owner.WithDepth(depth); + ushort depth = 1; - depth++; + // Walk through the ancestors only if the filter shows that they may contain the value + foreach (var ancestor in _ancestors) + { + var owner = ancestor.TryGetLocal(key, keyWritten, keyHash, destroyedHash, out var succeeded); + if (succeeded) + return owner.WithDepth(depth); + + depth++; + } } return TryGetDatabase(key); @@ -1147,7 +1161,7 @@ private ReadOnlySpanOwnerWithMetadata TryGetDatabase(scoped in Key key) private ReadOnlySpanOwner TryGetLocal(scoped in Key key, scoped ReadOnlySpan keyWritten, ulong bloom, out bool succeeded) { - var mayHave = _bloom.Contains(bloom); + var mayHave = _filter.MayContain(bloom); // check if the change is in the block if (!mayHave) @@ -1222,28 +1236,14 @@ protected override void CleanUp() _batch.Dispose(); _xorMissed.Dispose(); _prefetcher?.Dispose(); + _filter.Return(Pool); + _ancestorsFilter.Return(Pool); // release all the ancestors foreach (var ancestor in _ancestors) { ancestor.Dispose(); } - - ReturnCacheToPool(); - return; - - void ReturnCacheToPool() - { - var bloom = _bloom; - _bloom = null!; - ref var cache = ref s_bloomCache; - if (cache is null) - { - // Return the cache to be reused - bloom.Clear(); - cache = bloom; - } - } } public override string ToString() => @@ -1281,18 +1281,13 @@ private class CommittedBlockState : RefCountingDisposable /// /// A faster filter constructed on block commit. /// - private readonly Xor8 _xor; + public readonly BitFilter Filter; /// /// Stores information about contracts that should have their previous incarnations destroyed. /// private readonly HashSet? _destroyed; - /// - /// Stores the xor filter of if any. - /// - private readonly Xor8? _destroyedXor; - private readonly Blockchain _blockchain; /// @@ -1302,17 +1297,22 @@ private class CommittedBlockState : RefCountingDisposable private readonly bool _raw; private bool _discardable; - private readonly DelayedMetrics.DelayedCounter _xorMissed; + private readonly DelayedMetrics.DelayedCounter _filterMissed; - public CommittedBlockState(Xor8 xor, HashSet? destroyed, Blockchain blockchain, + public CommittedBlockState(BitFilter filter, HashSet? destroyed, Blockchain blockchain, PooledSpanDictionary committed, Keccak hash, Keccak parentHash, uint blockNumber, bool raw) { - _xor = xor; + Filter = filter; _destroyed = destroyed; - _destroyedXor = _destroyed != null - ? new Xor8(new HashSet(_destroyed.Select(k => GetDestroyedHash(k)))) - : null; + + if (destroyed != null) + { + foreach (var account in destroyed) + { + filter.Add(GetDestroyedHash(account)); + } + } _blockchain = blockchain; _committed = committed; @@ -1321,7 +1321,7 @@ public CommittedBlockState(Xor8 xor, HashSet? destroyed, Blockchain bloc ParentHash = parentHash; BlockNumber = blockNumber; - _xorMissed = _blockchain._bloomMissedReads.Delay(); + _filterMissed = _blockchain._bloomMissedReads.Delay(); } public Keccak ParentHash { get; } @@ -1330,8 +1330,7 @@ public CommittedBlockState(Xor8 xor, HashSet? destroyed, Blockchain bloc public Keccak Hash { get; } - public const ulong NonDestroyable = 0; - public const ulong Destroyable = 1; + private const ulong NonDestroyable = 0; public static ulong GetDestroyedHash(in Key key) { @@ -1342,11 +1341,12 @@ public static ulong GetDestroyedHash(in Key key) if (path.Length != NibblePath.KeccakNibbleCount) return NonDestroyable; - // Return ulonged hash. + // Return ulong hash. return GetDestroyedHash(path.UnsafeAsKeccak); } - private static ulong GetDestroyedHash(in Keccak keccak) => keccak.GetHashCodeUlong() | Destroyable; + private static uint GetDestroyedHash(in Keccak keccak) => + BitOperations.Crc32C((uint)keccak.GetHashCode(), 0xDEADBEEF); /// /// Tries to get the key only from this block, acquiring no lease as it assumes that the lease is taken. @@ -1354,7 +1354,7 @@ public static ulong GetDestroyedHash(in Key key) public ReadOnlySpanOwner TryGetLocal(scoped in Key key, scoped ReadOnlySpan keyWritten, ulong bloom, ulong destroyedHash, out bool succeeded) { - var mayHave = _xor.MayContain(bloom); + var mayHave = Filter.MayContain(bloom); // check if the change is in the block if (!mayHave) @@ -1379,7 +1379,7 @@ public ReadOnlySpanOwner TryGetLocal(scoped in Key key, scoped ReadOnlySpa return new ReadOnlySpanOwner(span, this); } - _xorMissed.Add(1); + _filterMissed.Add(1); // if destroyed, return false as no previous one will contain it if (IsAccountDestroyed(key, destroyedHash)) @@ -1395,22 +1395,17 @@ public ReadOnlySpanOwner TryGetLocal(scoped in Key key, scoped ReadOnlySpa [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsAccountDestroyed(scoped in Key key, ulong destroyed) { - if (_destroyedXor == null || destroyed == NonDestroyable) + if (destroyed == NonDestroyable || _destroyed == null) return false; - return CheckDestroyed(in key, destroyed); - - [MethodImpl(MethodImplOptions.NoInlining)] - bool CheckDestroyed(in Key key, ulong destroyed) - { - return _destroyedXor.MayContain(destroyed) && _destroyed!.Contains(key.Path.UnsafeAsKeccak); - } + return Filter.MayContain(destroyed) && _destroyed.Contains(key.Path.UnsafeAsKeccak); } protected override void CleanUp() { - _xorMissed.Dispose(); + _filterMissed.Dispose(); _committed.Dispose(); + Filter.Return(_blockchain._pool); if (_raw == false && _discardable == false) { @@ -1474,6 +1469,8 @@ private class ReadOnlyState : RefCountingDisposable, IReadOnlyWorldState { private readonly ReadOnlyBatchCountingRefs _batch; private readonly CommittedBlockState[] _ancestors; + private readonly BitFilter? _ancestorsFilter; + private readonly BufferPool? _pool; public ReadOnlyState(ReadOnlyBatchCountingRefs batch) { @@ -1489,6 +1486,15 @@ public ReadOnlyState(Keccak stateRoot, ReadOnlyBatchCountingRefs batch, Committe Hash = stateRoot; } + public ReadOnlyState(Keccak stateRoot, ReadOnlyBatchCountingRefs batch, CommittedBlockState[] ancestors, BitFilter ancestorsFilter, BufferPool pool) + { + _batch = batch; + _ancestors = ancestors; + _ancestorsFilter = ancestorsFilter; + _pool = pool; + Hash = stateRoot; + } + public uint BlockNumber { get; private set; } public Keccak Hash { get; } @@ -1539,21 +1545,26 @@ public ReadOnlySpanOwnerWithMetadata Get(scoped in Key key) /// chain. /// private ReadOnlySpanOwnerWithMetadata TryGet(scoped in Key key, scoped ReadOnlySpan keyWritten, - ulong bloom, - out bool succeeded) + ulong keyHash, out bool succeeded) { - ushort depth = 1; + if (_ancestors.Length > 0) + { + var destroyedHash = CommittedBlockState.GetDestroyedHash(key); - var destroyedHash = CommittedBlockState.GetDestroyedHash(key); + if (_ancestorsFilter == null || _ancestorsFilter.GetValueOrDefault().MayContainAny(keyHash, destroyedHash)) + { + ushort depth = 1; - // walk all the blocks locally - foreach (var ancestor in _ancestors) - { - var owner = ancestor.TryGetLocal(key, keyWritten, bloom, destroyedHash, out succeeded); - if (succeeded) - return owner.WithDepth(depth); + // Walk through the ancestors only if the filter shows that they may contain the value + foreach (var ancestor in _ancestors) + { + var owner = ancestor.TryGetLocal(key, keyWritten, keyHash, destroyedHash, out succeeded); + if (succeeded) + return owner.WithDepth(depth); - depth++; + depth++; + } + } } if (_batch.TryGet(key, out var span)) @@ -1578,6 +1589,8 @@ protected override void CleanUp() { ancestor.Dispose(); } + + _ancestorsFilter?.Return(_pool); } public override string ToString() => @@ -1924,7 +1937,24 @@ public void Dispose() { state.Dispose(); } + _readers.Clear(); } } -} + + /// + /// Creates the combined by or-ing all + /// + /// + /// + private BitFilter CreateAncestorsFilter(CommittedBlockState[] ancestors) + { + var filter = CreateBitFilter(); + foreach (var ancestor in ancestors) + { + filter.OrWith(ancestor.Filter); + } + + return filter; + } +} \ No newline at end of file diff --git a/src/Paprika/Chain/PooledSpanDictionary.cs b/src/Paprika/Chain/PooledSpanDictionary.cs index b0b69853..7feff4e4 100644 --- a/src/Paprika/Chain/PooledSpanDictionary.cs +++ b/src/Paprika/Chain/PooledSpanDictionary.cs @@ -144,7 +144,20 @@ public void CopyTo(PooledSpanDictionary destination, Predicate metadataWhe { if (metadataWhere(kvp.Metadata)) { - Key.ReadType(kvp.Key); + destination.SetImpl(kvp.Key, kvp.Hash, kvp.Value, ReadOnlySpan.Empty, kvp.Metadata, append); + } + } + } + + public void CopyTo(PooledSpanDictionary destination, Predicate metadataWhere, in BitMapFilter filter, bool append = false) + where TAccessor : struct, BitMapFilter.IAccessor + { + foreach (var kvp in this) + { + if (metadataWhere(kvp.Metadata)) + { + Key.ReadFrom(kvp.Key, out var key); + filter.Add(Blockchain.GetHash(key)); destination.SetImpl(kvp.Key, kvp.Hash, kvp.Value, ReadOnlySpan.Empty, kvp.Metadata, append); } } diff --git a/src/Paprika/Data/BitMapFilter.cs b/src/Paprika/Data/BitMapFilter.cs new file mode 100644 index 00000000..62450ca7 --- /dev/null +++ b/src/Paprika/Data/BitMapFilter.cs @@ -0,0 +1,251 @@ +using System.Diagnostics; +using System.Diagnostics.Contracts; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Paprika.Chain; +using Paprika.Store; + +namespace Paprika.Data; + +public static class BitMapFilter +{ + private const int BitsPerByte = 8; + private const int SlotsPerPage = Page.PageSize / sizeof(int); + private const int SlotsPerPageMask = SlotsPerPage - 1; + + public static BitMapFilter CreateOf1(BufferPool pool) => new(new Of1(pool.Rent(true))); + + public static BitMapFilter CreateOf2(BufferPool pool) => new(new Of2(pool.Rent(true), pool.Rent(true))); + + public static BitMapFilter CreateOfN(BufferPool pool, int n) + { + var pages = new Page[n]; + for (var i = 0; i < n; i++) + { + pages[i] = pool.Rent(true); + } + + return new BitMapFilter(new OfN(pages)); + } + + public interface IAccessor + where TAccessor : struct, IAccessor + { + [Pure] + ref int GetSlot(uint hash); + + [Pure] + void Clear(); + + [Pure] + void Return(BufferPool pool); + + int BucketCount { get; } + + [Pure] + void OrWith(in TAccessor other); + } + + public readonly struct Of1(Page page) : IAccessor + { + private readonly Page _page = page; + + public unsafe ref int GetSlot(uint hash) => + ref Unsafe.Add(ref Unsafe.AsRef(_page.Raw.ToPointer()), hash & SlotsPerPageMask); + + public void Clear() => _page.Clear(); + + public void Return(BufferPool pool) => pool.Return(_page); + public int BucketCount => Page.PageSize * BitsPerByte; + + public void OrWith(in Of1 other) => _page.OrWith(other._page); + } + + public readonly struct Of2 : IAccessor + { + private readonly Page _page0; + private readonly Page _page1; + + public Of2(Page page0, Page page1) + { + _page0 = page0; + _page1 = page1; + } + + public unsafe ref int GetSlot(uint hash) + { + const int pageMask = 1; + const int pageMaskShift = 1; + + var page = (hash & pageMask) != pageMask ? _page0 : _page1; + var index = (hash >> pageMaskShift) & SlotsPerPageMask; + + return ref Unsafe.Add(ref Unsafe.AsRef(page.Raw.ToPointer()), index); + } + + public void Clear() + { + _page0.Clear(); + _page1.Clear(); + } + + public void Return(BufferPool pool) + { + pool.Return(_page0); + pool.Return(_page1); + } + + public int BucketCount => Page.PageSize * BitsPerByte * 2; + + public void OrWith(in Of2 other) + { + _page0.OrWith(other._page0); + _page1.OrWith(other._page1); + } + } + + public readonly struct OfN : IAccessor + { + private readonly Page[] _pages; + private readonly byte _pageMask; + private readonly byte _pageMaskShift; + + public OfN(Page[] pages) + { + _pages = pages; + Debug.Assert(BitOperations.IsPow2(pages.Length)); + _pageMask = (byte)(pages.Length - 1); + _pageMaskShift = (byte)BitOperations.Log2((uint)pages.Length); + } + + public unsafe ref int GetSlot(uint hash) + { + var page = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_pages), (int)(hash & _pageMask)); + var index = (hash >> _pageMaskShift) & SlotsPerPageMask; + + return ref Unsafe.Add(ref Unsafe.AsRef(page.Raw.ToPointer()), index); + } + + public void Clear() + { + foreach (var page in _pages) + { + page.Clear(); + } + } + + public void Return(BufferPool pool) + { + foreach (var page in _pages) + { + pool.Return(page); + } + } + + public void OrWith(in OfN other) + { + var count = PageCount; + + Debug.Assert(other.PageCount == count); + + ref var a = ref MemoryMarshal.GetArrayDataReference(_pages); + ref var b = ref MemoryMarshal.GetArrayDataReference(other._pages); + + for (var i = 0; i < count; i++) + { + Unsafe.Add(ref a, i).OrWith(Unsafe.Add(ref b, i)); + } + } + + public int BucketCount => Page.PageSize * BitsPerByte * PageCount; + + private int PageCount => _pageMask + 1; + } +} + +/// +/// Represents a simple bitmap based filter for hashes. +/// +public readonly struct BitMapFilter + where TAccessor : struct, BitMapFilter.IAccessor +{ + private readonly TAccessor _accessor; + private const int BitsPerIntShift = 5; + private const int BitsPerInt = 1 << BitsPerIntShift; + private const int BitMask = BitsPerInt - 1; + + /// + /// Represents a simple bitmap based filter. + /// + public BitMapFilter(TAccessor accessor) + { + _accessor = accessor; + } + + public void Add(ulong hash) => this[hash] = true; + + public bool MayContain(ulong hash) => this[hash]; + + /// + /// Checks whether the filter may contain any of the hashes. + /// + [SkipLocalsInit] + public bool MayContainAny(ulong hash0, ulong hash1) + { + var mixed0 = Mix(hash0); + var slot0 = _accessor.GetSlot(mixed0 >> BitsPerIntShift); + var mixed1 = Mix(hash1); + var slot1 = _accessor.GetSlot(mixed1 >> BitsPerIntShift); + + return ((slot0 & GetBitMask(mixed0)) | (slot1 & GetBitMask(mixed1))) != 0; + } + + public void Clear() => _accessor.Clear(); + + [SkipLocalsInit] + public bool this[ulong hash] + { + get + { + var mixed = Mix(hash); + var mask = GetBitMask(mixed); + var slot = _accessor.GetSlot(mixed >> BitsPerIntShift); + return (slot & mask) == mask; + } + set + { + var mixed = Mix(hash); + var mask = GetBitMask(mixed); + ref var slot = ref _accessor.GetSlot(mixed >> BitsPerIntShift); + + if (value) + { + slot |= mask; + } + else + { + slot &= ~mask; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int GetBitMask(uint mixed) => 1 << (int)(mixed & BitMask); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint Mix(ulong hash) => (uint)((hash >> 32) ^ hash); + + /// + /// Applies or operation with bitmap filter and stores it in this one. + /// + /// + public void OrWith(in BitMapFilter other) + { + _accessor.OrWith(other._accessor); + } + + public int BucketCount => _accessor.BucketCount; + + public void Return(BufferPool pool) => _accessor.Return(pool); +} \ No newline at end of file diff --git a/src/Paprika/Data/PageExtensions.cs b/src/Paprika/Data/PageExtensions.cs new file mode 100644 index 00000000..5a787cdb --- /dev/null +++ b/src/Paprika/Data/PageExtensions.cs @@ -0,0 +1,72 @@ +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using Paprika.Store; + +namespace Paprika.Data; + +/// +/// Provides extensions for that are data related. +/// +public static class PageExtensions +{ + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public static unsafe void OrWith(this Page @this, Page other) + { + const int bitsPerByte = 8; + + ref var a = ref Unsafe.AsRef(@this.Raw.ToPointer()); + ref var b = ref Unsafe.AsRef(other.Raw.ToPointer()); + + if (Vector512.IsHardwareAccelerated) + { + const int size = 512 / bitsPerByte; + + for (UIntPtr i = 0; i < Page.PageSize; i += size) + { + var va = Vector512.LoadUnsafe(ref a, i); + var vb = Vector512.LoadUnsafe(ref b, i); + var vc = Vector512.BitwiseOr(va, vb); + + vc.StoreUnsafe(ref a, i); + } + } + else if (Vector256.IsHardwareAccelerated) + { + const int size = 256 / bitsPerByte; + + for (UIntPtr i = 0; i < Page.PageSize; i += size) + { + var va = Vector256.LoadUnsafe(ref a, i); + var vb = Vector256.LoadUnsafe(ref b, i); + var vc = Vector256.BitwiseOr(va, vb); + + vc.StoreUnsafe(ref a, i); + } + } + else if (Vector128.IsHardwareAccelerated) + { + const int size = 128 / bitsPerByte; + + for (UIntPtr i = 0; i < Page.PageSize; i += size) + { + var va = Vector128.LoadUnsafe(ref a, i); + var vb = Vector128.LoadUnsafe(ref b, i); + var vc = Vector128.BitwiseOr(va, vb); + + vc.StoreUnsafe(ref a, i); + } + } + else + { + const int size = sizeof(long); + + for (var i = 0; i < Page.PageSize / size; i++) + { + ref var va = ref Unsafe.As(ref Unsafe.Add(ref a, i * size)); + var vb = Unsafe.As(ref Unsafe.Add(ref b, i * size)); + + va |= vb; + } + } + } +} \ No newline at end of file diff --git a/src/Paprika/IBatch.cs b/src/Paprika/IBatch.cs index de15078b..138c2919 100644 --- a/src/Paprika/IBatch.cs +++ b/src/Paprika/IBatch.cs @@ -31,6 +31,11 @@ public interface IBatch : IReadOnlyBatch /// Gets the low levels stats of the given batch. /// IBatchStats? Stats { get; } + + /// + /// Performs a time consuming verification when is called that all the pages are reachable. + /// + void VerifyDbPagesOnCommit(); } public interface IBatchStats diff --git a/src/Paprika/Store/AbandonedList.cs b/src/Paprika/Store/AbandonedList.cs index ecb980ae..64ae11f1 100644 --- a/src/Paprika/Store/AbandonedList.cs +++ b/src/Paprika/Store/AbandonedList.cs @@ -178,8 +178,12 @@ private void Register(DbAddress head, IBatchContext batch) } } + // 1. Attach the previously existing abandoned as tail to the current one new AbandonedPage(batch.GetAt(head)).AttachTail(Addresses[maxAt], batch); + // 2. Update the batch id BatchIds[maxAt] = batch.BatchId; + // 3. Set properly the address to the head that has been chained up + Addresses[maxAt] = head; } else { @@ -197,18 +201,24 @@ private void Register(DbAddress head, IBatchContext batch) public void Accept(IPageVisitor visitor, IPageResolver resolver) { + TryAcceptAbandoned(visitor, resolver, Current); + foreach (var addr in Addresses) { - if (addr.IsNull == false) - { - var abandoned = new AbandonedPage(resolver.GetAt(addr)); - visitor.On(abandoned, addr); - - abandoned.Accept(visitor, resolver); - } + TryAcceptAbandoned(visitor, resolver, addr); } } + private static void TryAcceptAbandoned(IPageVisitor visitor, IPageResolver resolver, DbAddress addr) + { + if (addr.IsNull) + return; + + var abandoned = new AbandonedPage(resolver.GetAt(addr)); + visitor.On(abandoned, addr); + abandoned.Accept(visitor, resolver); + } + [Pure] public long GatherTotalAbandoned(IPageResolver resolver) { diff --git a/src/Paprika/Store/AbandonedPage.cs b/src/Paprika/Store/AbandonedPage.cs index f19819cf..1ca5afae 100644 --- a/src/Paprika/Store/AbandonedPage.cs +++ b/src/Paprika/Store/AbandonedPage.cs @@ -63,6 +63,26 @@ public void Accept(IPageVisitor visitor, IPageResolver resolver) } } + public IEnumerable Enumerate() + { + for (var i = 0; i < Data.Count; i++) + { + var top = Data.Abandoned[i]; + if ((top & PackedFlag) == PackedFlag) + { + // is packed, return this and next + var addr = top & ~PackedFlag; + yield return new DbAddress(addr); + yield return new DbAddress(addr + PackedDiff); + } + else + { + // not packed, just return + yield return new DbAddress(top); + } + } + } + public bool TryPeek(out DbAddress addr, out bool hasMoreThanPeeked) { if (Data.Count == 0) diff --git a/src/Paprika/Store/DbAddressSet.cs b/src/Paprika/Store/DbAddressSet.cs new file mode 100644 index 00000000..4dc969bd --- /dev/null +++ b/src/Paprika/Store/DbAddressSet.cs @@ -0,0 +1,134 @@ +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Paprika.Chain; + +namespace Paprika.Store; + +/// +/// A dense representation of db addresses +/// +public class DbAddressSet : IDisposable +{ + private readonly DbAddress _max; + private static readonly BufferPool _pool = new(128, false); + private readonly BitSet[] _bitSets; + + private const int BitsPerByte = 8; + private const int AddressesPerPage = Page.PageSize * BitsPerByte; + private const int MemoryPerPage = AddressesPerPage * Page.PageSize; + + public DbAddressSet(DbAddress max) + { + _max = max; + var pageCount = max / AddressesPerPage + 1; + + _bitSets = new BitSet[pageCount]; + for (var i = 0; i < pageCount; i++) + { + var page = _pool.Rent(false); + var set = new BitSet(page); + _bitSets[i] = set; + set.SetAll(); + } + } + + public IEnumerable EnumerateSet() + { + for (var i = 0; i < _bitSets.Length; i++) + { + var set = _bitSets[i]; + foreach (var index in set.EnumerateSet()) + { + var addr = DbAddress.Page((uint)(i * AddressesPerPage + index)); + if (addr >= _max) + yield break; + + yield return addr; + } + } + } + + public bool this[DbAddress addr] + { + get + { + var (pageNo, i) = Math.DivRem(addr.Raw, AddressesPerPage); + return _bitSets[pageNo][(int)i]; + } + set + { + var (pageNo, i) = Math.DivRem(addr.Raw, AddressesPerPage); + _bitSets[pageNo][(int)i] = value; + } + } + + private readonly struct BitSet(Page page) + { + public bool this[int i] + { + get + { + ref var slot = ref GetSlot(i, out var bitMask); + return (slot & bitMask) == bitMask; + } + set + { + ref var slot = ref GetSlot(i, out var bitMask); + if (value) + { + slot = (byte)(slot | bitMask); + } + else + { + slot = (byte)(slot & ~bitMask); + } + } + } + + public bool AnySet => page.Span.IndexOfAnyExcept((byte)0) != -1; + + public IEnumerable EnumerateSet() + { + if (page.Span.IndexOfAnyExcept((byte)0) == -1) + { + yield break; + } + + for (int i = 0; i < AddressesPerPage; i++) + { + if (this[i]) + { + yield return i; + } + } + } + + private unsafe ref byte GetSlot(int i, out int bitMask) + { + Debug.Assert(i < AddressesPerPage); + + var (atByte, atBit) = Math.DivRem(i, BitsPerByte); + + Debug.Assert(atBit < BitsPerByte); + bitMask = 1 << atBit; + + return ref Unsafe.Add(ref Unsafe.AsRef(page.Raw.ToPointer()), atByte); + } + + public void SetAll() + { + MemoryMarshal.Cast(page.Span).Fill(0xFF_FF_FF_FF_FF_FF_FF_FF); + } + + public void Return(BufferPool pool) => pool.Return(page); + } + + public void Dispose() + { + foreach (var set in _bitSets) + { + set.Return(_pool); + } + } +} \ No newline at end of file diff --git a/src/Paprika/Store/IPageVisitor.cs b/src/Paprika/Store/IPageVisitor.cs index e834021c..b4f6293a 100644 --- a/src/Paprika/Store/IPageVisitor.cs +++ b/src/Paprika/Store/IPageVisitor.cs @@ -16,4 +16,18 @@ IDisposable On(StorageFanOutPage page, DbAddress addr) IDisposable On(LeafOverflowPage page, DbAddress addr); IDisposable On(Merkle.StateRootPage data, DbAddress addr); +} + +public sealed class Disposable : IDisposable +{ + private Disposable() + { + } + + public static readonly IDisposable Instance = new Disposable(); + + public void Dispose() + { + + } } \ No newline at end of file diff --git a/src/Paprika/Store/PagedDb.cs b/src/Paprika/Store/PagedDb.cs index dd1c3c95..2bad3b08 100644 --- a/src/Paprika/Store/PagedDb.cs +++ b/src/Paprika/Store/PagedDb.cs @@ -523,7 +523,7 @@ class Batch : BatchContextBase, IBatch private readonly PagedDb _db; private readonly RootPage _root; private readonly uint _reusePagesOlderThanBatchId; - + private bool _verify = false; private bool _disposed; private readonly Context _ctx; @@ -623,8 +623,16 @@ public async ValueTask Commit(CommitOptions options) // memoize the abandoned so that it's preserved for future uses MemoizeAbandoned(); + if (_verify) + { + using var missing = new MissingPagesVisitor(_root, _db._historyDepth); + _root.Accept(missing, this); + missing.EnsureNoMissing(this); + } + // report metrics - _db.ReportPageCountPerCommit(_written.Count, _metrics.PagesReused, _metrics.PagesAllocated, _abandoned.Count); + _db.ReportPageCountPerCommit(_written.Count, _metrics.PagesReused, _metrics.PagesAllocated, + _abandoned.Count); _db.ReportReads(_metrics.Reads); _db.ReportWrites(_metrics.Writes); @@ -654,6 +662,11 @@ public async ValueTask Commit(CommitOptions options) IBatchStats? IBatch.Stats => Stats; + public void VerifyDbPagesOnCommit() + { + _verify = true; + } + [DebuggerStepThrough] public override Page GetAt(DbAddress address) { @@ -742,7 +755,8 @@ public override void RegisterForFutureReuse(Page page) #if TRACKING_REUSED_PAGES // register at this batch - ref var batchId = ref CollectionsMarshal.GetValueRefOrAddDefault(_db._registeredForReuse, addr, out var exists); + ref var batchId = + ref CollectionsMarshal.GetValueRefOrAddDefault(_db._registeredForReuse, addr, out var exists); if (exists) { throw new Exception( @@ -822,6 +836,68 @@ public void Clear() public void ForceFlush() => _manager.ForceFlush(); } -public interface IReportingReadOnlyBatch : IReporting, IReadOnlyBatch +internal class MissingPagesVisitor : IPageVisitor, IDisposable { + private readonly DbAddressSet _pages; + + public MissingPagesVisitor(RootPage page, byte historyDepth) + { + _pages = new(page.Data.NextFreePage); + + // Mark all roots + for (uint i = 0; i < historyDepth; i++) + { + Mark(DbAddress.Page(i)); + } + } + + public IDisposable On(RootPage page, DbAddress addr) => Mark(addr); + + public IDisposable On(AbandonedPage page, DbAddress addr) + { + foreach (var abandoned in page.Enumerate()) + { + Mark(abandoned); + } + + return Mark(addr); + } + + public IDisposable On(DataPage page, DbAddress addr) => Mark(addr); + + public IDisposable On(FanOutPage page, DbAddress addr) => Mark(addr); + + public IDisposable On(LeafPage page, DbAddress addr) => Mark(addr); + + public IDisposable On(StorageFanOutPage page, DbAddress addr) + where TNext : struct, IPageWithData + => Mark(addr); + + public IDisposable On(LeafOverflowPage page, DbAddress addr) => Mark(addr); + + public IDisposable On(Merkle.StateRootPage data, DbAddress addr) => Mark(addr); + + private IDisposable Mark(DbAddress addr) + { + _pages[addr] = false; + return Disposable.Instance; + } + + public void Dispose() => _pages.Dispose(); + + public void EnsureNoMissing(IReadOnlyBatchContext batch) + { + foreach (var addr in _pages.EnumerateSet()) + { + var page = batch.GetAt(addr); + throw new Exception( + $"The page at {addr} is not reachable from the tree nor from the set of abandoned pages. " + + $"Highly likely it's a leak. The page is {page.Header.PageType} and was written last in batch {page.Header.BatchId} " + + $"while the current batch is {batch.BatchId}"); + } + } } + +public interface IReportingReadOnlyBatch : IReporting, IReadOnlyBatch +{ +} \ No newline at end of file diff --git a/src/Paprika/Store/RootPage.cs b/src/Paprika/Store/RootPage.cs index 288a5811..d98e5043 100644 --- a/src/Paprika/Store/RootPage.cs +++ b/src/Paprika/Store/RootPage.cs @@ -101,9 +101,9 @@ public void Accept(IPageVisitor visitor, IPageResolver resolver) new Merkle.StateRootPage(resolver.GetAt(stateRoot)).Accept(visitor, resolver, stateRoot); } + Data.Ids.Accept(visitor, resolver); Data.Storage.Accept(visitor, resolver); - - // Data.AbandonedList.Accept(visitor, resolver); + Data.AbandonedList.Accept(visitor, resolver); } ///