Skip to content

Commit

Permalink
refactor: switch to streamvbyte coding with arm64 support
Browse files Browse the repository at this point in the history
  • Loading branch information
variar committed Nov 18, 2024
1 parent 7abb414 commit 48fdbf2
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 62 deletions.
27 changes: 3 additions & 24 deletions 3rdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(_TMP_CPM_USE_LOCAL_PACKAGES ${CPM_USE_LOCAL_PACKAGES})
cpmaddpackage("gh:simdutf/[email protected]")
cpmaddpackage("gh:foonathan/[email protected]")
cpmaddpackage("gh:RoaringBitmap/[email protected]")
cpmaddpackage("gh:lemire/[email protected]")

if(APPLE)
cpmaddpackage(
Expand Down Expand Up @@ -364,29 +365,6 @@ if(KLOGG_USE_SENTRY)
endif()
endif(KLOGG_USE_SENTRY)

cpmaddpackage(
NAME
simdcomp
GITHUB_REPOSITORY
lemire/simdcomp
GIT_TAG
009c67807670d16f8984c0534aef0e630e5465a4
DOWNLOAD_ONLY
YES
)
if(simdcomp_ADDED)
add_library(simdcomp STATIC
${simdcomp_SOURCE_DIR}/src/avxbitpacking.c
${simdcomp_SOURCE_DIR}/src/simdfor.c
${simdcomp_SOURCE_DIR}/src/simdcomputil.c
${simdcomp_SOURCE_DIR}/src/simdbitpacking.c
${simdcomp_SOURCE_DIR}/src/simdintegratedbitpacking.c
${simdcomp_SOURCE_DIR}/src/simdpackedsearch.c
${simdcomp_SOURCE_DIR}/src/simdpackedselect.c
)
target_include_directories(simdcomp PUBLIC ${simdcomp_SOURCE_DIR}/include)
endif()

set(klogg_cpm_targets
xxhash
Catch2
Expand All @@ -408,7 +386,8 @@ set(klogg_cpm_targets
crashpad_compat
crashpad_util
mini_chromium
simdcomp
streamvbyte
FastPFor
)
foreach(target ${klogg_cpm_targets})
if(TARGET ${target})
Expand Down
2 changes: 1 addition & 1 deletion src/logdata/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ target_link_libraries(
kdtoolbox
robin_hood
simdutf
simdcomp
streamvbyte
klogg_mimalloc_wrapper
)

Expand Down
8 changes: 2 additions & 6 deletions src/logdata/include/compressedlinestorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,12 @@ class CompressedLinePositionStorage {
void uncompress_last_block();
struct BlockMetadata {
OffsetInFile firstLineOffset{};
uint8_t packetBitWidth{};
size_t packetStorageOffset{};
};

klogg::vector<BlockMetadata> blocks_;

struct alignas( 16 ) AlignedStorage {
std::array<uint8_t, 16> d;
};
klogg::vector<AlignedStorage> packedLinesStorage_;
klogg::vector<uint8_t> packedLinesStorage_;
size_t packedLinesStorageUsedSize_ = 0;

klogg::vector<OffsetInFile> currentLinesBlock_;
klogg::vector<uint32_t> currentLinesBlockShifted_;
Expand Down
50 changes: 19 additions & 31 deletions src/logdata/src/compressedlinestorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#include "linetypes.h"
#include "log.h"

#include <simdcomp.h>
#include <streamvbyte.h>
#include <streamvbytedelta.h>

static constexpr size_t SimdIndexBlockSize = 128;

Expand Down Expand Up @@ -93,16 +94,16 @@ void CompressedLinePositionStorage::compress_current_block()
{
BlockMetadata& block = blocks_.emplace_back();
block.firstLineOffset = currentLinesBlock_.front();
block.packetBitWidth
= static_cast<uint8_t>( simdmaxbitsd1( 0, currentLinesBlockShifted_.data() ) );

const size_t packedLinesSize = block.packetBitWidth;
packedLinesStorage_.resize( packedLinesStorage_.size() + packedLinesSize );
block.packetStorageOffset = packedLinesStorage_.size() - packedLinesSize;
const size_t packedLinesSize = streamvbyte_max_compressedbytes( SimdIndexBlockSize );
packedLinesStorage_.resize( packedLinesStorageUsedSize_ + packedLinesSize );
block.packetStorageOffset = packedLinesStorageUsedSize_;

simdpackd1( 0, currentLinesBlockShifted_.data(),
(__m128i*)( packedLinesStorage_.data() + block.packetStorageOffset ),
block.packetBitWidth );
const size_t packedBytes
= streamvbyte_delta_encode( currentLinesBlockShifted_.data(), SimdIndexBlockSize,
packedLinesStorage_.data() + block.packetStorageOffset, 0 );

packedLinesStorageUsedSize_ += packedBytes;

currentLinesBlock_.clear();
currentLinesBlockShifted_.clear();
Expand All @@ -125,18 +126,8 @@ OffsetInFile CompressedLinePositionStorage::at( LineNumber index ) const

const BlockMetadata& block = blocks_[ blockIndex ];
std::array<uint32_t, SimdIndexBlockSize> unpackedBlock;
if ( canUseSimdSelect_ ) {
unpackedBlock[ indexInBlock ] = simdselectd1(
0,
reinterpret_cast<const __m128i*>( &packedLinesStorage_[ block.packetStorageOffset ] ),
block.packetBitWidth, static_cast<int>( indexInBlock ) );
}
else {
simdunpackd1(
0,
reinterpret_cast<const __m128i*>( &packedLinesStorage_[ block.packetStorageOffset ] ),
unpackedBlock.data(), block.packetBitWidth );
}
streamvbyte_delta_decode( &packedLinesStorage_[ block.packetStorageOffset ],
unpackedBlock.data(), SimdIndexBlockSize, 0 );

return block.firstLineOffset + OffsetInFile( unpackedBlock[ indexInBlock ] );
}
Expand All @@ -155,9 +146,8 @@ void CompressedLinePositionStorage::uncompress_last_block()
currentLinesBlockShifted_.resize( SimdIndexBlockSize );
const BlockMetadata& block = blocks_.back();

simdunpackd1(
0, reinterpret_cast<const __m128i*>( &packedLinesStorage_[ block.packetStorageOffset ] ),
currentLinesBlockShifted_.data(), block.packetBitWidth );
streamvbyte_delta_decode( &packedLinesStorage_[ block.packetStorageOffset ],
currentLinesBlockShifted_.data(), SimdIndexBlockSize, 0 );

std::transform( currentLinesBlockShifted_.begin(), currentLinesBlockShifted_.end(),
currentLinesBlock_.begin(), [ &block ]( uint32_t pos ) {
Expand Down Expand Up @@ -208,19 +198,17 @@ klogg::vector<OffsetInFile> CompressedLinePositionStorage::range( LineNumber fir
result.reserve( count.get() );

if ( firstBlockIndex == blocks_.size() ) {
std::copy( currentLinesBlock_.begin() + static_cast<int64_t>(indexInFirstBlock),
currentLinesBlock_.begin() + static_cast<int64_t>(indexInLastBlock + 1),
std::copy( currentLinesBlock_.begin() + static_cast<int64_t>( indexInFirstBlock ),
currentLinesBlock_.begin() + static_cast<int64_t>( indexInLastBlock + 1 ),
std::back_inserter( result ) );
}
else {
size_t lastBlockToUnpack = std::min( lastBlockIndex, blocks_.size() - 1 );
for ( size_t blockIndex = firstBlockIndex; blockIndex <= lastBlockToUnpack; ++blockIndex ) {
const BlockMetadata& block = blocks_[ blockIndex ];
std::array<uint32_t, SimdIndexBlockSize> unpackedBlock;
simdunpackd1( 0,
reinterpret_cast<const __m128i*>(
&packedLinesStorage_[ block.packetStorageOffset ] ),
unpackedBlock.data(), block.packetBitWidth );
streamvbyte_delta_decode( &packedLinesStorage_[ block.packetStorageOffset ],
unpackedBlock.data(), SimdIndexBlockSize, 0 );
const size_t copyFromIndex = blockIndex == firstBlockIndex ? indexInFirstBlock : 0u;
const size_t copyToIndex
= blockIndex == lastBlockIndex ? indexInLastBlock + 1 : unpackedBlock.size();
Expand All @@ -234,7 +222,7 @@ klogg::vector<OffsetInFile> CompressedLinePositionStorage::range( LineNumber fir

if ( lastBlockIndex == blocks_.size() ) {
std::copy( currentLinesBlock_.begin(),
currentLinesBlock_.begin() + static_cast<int64_t>(indexInLastBlock + 1),
currentLinesBlock_.begin() + static_cast<int64_t>( indexInLastBlock + 1 ),
std::back_inserter( result ) );
}
}
Expand Down

0 comments on commit 48fdbf2

Please sign in to comment.