Skip to content

Commit

Permalink
packed row group range read
Browse files Browse the repository at this point in the history
Signed-off-by: shaoting-huang <[email protected]>
  • Loading branch information
shaoting-huang committed Dec 27, 2024
1 parent b968ad4 commit 8b9f281
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions cpp/include/milvus-storage/packed/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,36 +41,28 @@ using RowOffsetMinHeap =

class PackedRecordBatchReader : public arrow::RecordBatchReader {
public:
// Test only
// Read a file with default buffer size
PackedRecordBatchReader(arrow::fs::FileSystem& fs,
const std::string& path,
const std::shared_ptr<arrow::Schema> schema,
const int64_t buffer_size = DEFAULT_READ_BUFFER_SIZE);

// Test only
// Read a range of row groups from a file with default buffer size
PackedRecordBatchReader(arrow::fs::FileSystem& fs,
const std::string& path,
const std::shared_ptr<arrow::Schema> schema,
const size_t start_row_group,
const size_t end_row_group,
const int64_t buffer_size = DEFAULT_READ_BUFFER_SIZE);

// Read all files with default buffer size
PackedRecordBatchReader(arrow::fs::FileSystem& fs,
const std::vector<std::string>& paths,
const std::shared_ptr<arrow::Schema> schema,
const std::vector<ColumnOffset>& column_offsets,
const std::set<int>& needed_columns,
const int64_t buffer_size = DEFAULT_READ_BUFFER_SIZE);

PackedRecordBatchReader(arrow::fs::FileSystem& fs,
const std::vector<std::string>& paths,
const std::shared_ptr<arrow::Schema> schema,
const std::vector<ColumnOffset>& column_offsets,
const std::set<int>& needed_columns,
const std::vector<size_t>& start_row_groups,
const std::vector<size_t>& end_row_groups,
const int64_t buffer_size = DEFAULT_READ_BUFFER_SIZE);

std::shared_ptr<arrow::Schema> schema() const override;

arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch>* batch) override;
Expand All @@ -80,6 +72,14 @@ class PackedRecordBatchReader : public arrow::RecordBatchReader {
arrow::Status Close() override;

private:
PackedRecordBatchReader(arrow::fs::FileSystem& fs,
const std::vector<std::string>& paths,
const std::shared_ptr<arrow::Schema> schema,
const std::vector<ColumnOffset>& column_offsets,
const std::set<int>& needed_columns,
const std::vector<size_t>& start_row_groups,
const std::vector<size_t>& end_row_groups,
const int64_t buffer_size = DEFAULT_READ_BUFFER_SIZE);
// Advance buffer to fill the expected buffer size
arrow::Status advanceBuffer();
std::vector<const arrow::Array*> collectChunks(int64_t chunksize) const;
Expand Down

0 comments on commit 8b9f281

Please sign in to comment.