Skip to content
This repository has been archived by the owner on Jan 16, 2024. It is now read-only.

Commit

Permalink
Bundle 9.0.0-1 (2022-08-03)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeroen committed Aug 3, 2022
1 parent 974f78d commit 27f5cb1
Show file tree
Hide file tree
Showing 135 changed files with 9,580 additions and 2,475 deletions.
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,41 @@
# arrow 8.0.0-1
# arrow 9.0.0-1

- mingw-w64-i686-arrow-8.0.0-1-any.pkg.tar.xz
- mingw-w64-i686-arrow-9.0.0-1-any.pkg.tar.xz
- mingw-w64-i686-aws-sdk-cpp-1.7.365-1-any.pkg.tar.xz
- mingw-w64-i686-brotli-1.0.9-4-any.pkg.tar.xz
- mingw-w64-i686-openssl-1.1.1.m-9800-any.pkg.tar.xz
- mingw-w64-i686-lz4-1.8.2-1-any.pkg.tar.xz
- mingw-w64-i686-re2-20200801-1-any.pkg.tar.xz
- mingw-w64-i686-snappy-1.1.7-2-any.pkg.tar.xz
- mingw-w64-i686-bzip2-1.0.8-1-any.pkg.tar.xz
- mingw-w64-i686-curl-7.64.1-9202-any.pkg.tar.xz
- mingw-w64-i686-libssh2-1.10.0-9800-any.pkg.tar.xz
- mingw-w64-i686-thrift-0.13.0-1-any.pkg.tar.xz
- mingw-w64-i686-zstd-1.4.4-1-any.pkg.tar.xz
- mingw-w64-i686-libutf8proc-2.4.0-2-any.pkg.tar.xz
- mingw-w64-x86_64-arrow-8.0.0-1-any.pkg.tar.xz
- mingw-w64-x86_64-arrow-9.0.0-1-any.pkg.tar.xz
- mingw-w64-x86_64-aws-sdk-cpp-1.7.365-1-any.pkg.tar.xz
- mingw-w64-x86_64-brotli-1.0.9-4-any.pkg.tar.xz
- mingw-w64-x86_64-openssl-1.1.1.m-9800-any.pkg.tar.xz
- mingw-w64-x86_64-lz4-1.8.2-1-any.pkg.tar.xz
- mingw-w64-x86_64-re2-20200801-1-any.pkg.tar.xz
- mingw-w64-x86_64-snappy-1.1.7-2-any.pkg.tar.xz
- mingw-w64-x86_64-bzip2-1.0.8-1-any.pkg.tar.xz
- mingw-w64-x86_64-curl-7.64.1-9202-any.pkg.tar.xz
- mingw-w64-x86_64-libssh2-1.10.0-9800-any.pkg.tar.xz
- mingw-w64-x86_64-thrift-0.13.0-1-any.pkg.tar.xz
- mingw-w64-x86_64-zstd-1.4.4-1-any.pkg.tar.xz
- mingw-w64-x86_64-libutf8proc-2.4.0-2-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-arrow-8.0.0-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-arrow-9.0.0-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-aws-sdk-cpp-1.7.365-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-brotli-1.0.9-4-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-openssl-1.1.1.m-9800-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-lz4-1.8.2-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-re2-20200801-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-snappy-1.1.7-2-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-bzip2-1.0.8-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-curl-7.64.1-9202-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-libssh2-1.10.0-9800-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-thrift-0.13.0-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-zstd-1.4.4-1-any.pkg.tar.xz
- mingw-w64-ucrt-x86_64-libutf8proc-2.4.0-2-any.pkg.tar.xz
2 changes: 1 addition & 1 deletion include/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ class ARROW_EXPORT StructArray : public Array {
// Return a shared pointer in case the requestor desires to share ownership
// with this array. The returned array has its offset, length and null
// count adjusted.
std::shared_ptr<Array> field(int pos) const;
const std::shared_ptr<Array>& field(int pos) const;

const ArrayVector& fields() const;

Expand Down
2 changes: 1 addition & 1 deletion include/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class ARROW_EXPORT ArrayBuilder {
/// \brief Append a range of values from an array.
///
/// The given array must be the same type as the builder.
virtual Status AppendArraySlice(const ArrayData& array, int64_t offset,
virtual Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) {
return Status::NotImplemented("AppendArraySlice for builder for ", *type());
}
Expand Down
4 changes: 2 additions & 2 deletions include/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
auto bitmap = array.GetValues<uint8_t>(0, 0);
auto offsets = array.GetValues<offset_type>(1);
Expand Down Expand Up @@ -516,7 +516,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
Status AppendEmptyValue() final;
Status AppendEmptyValues(int64_t length) final;

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
return AppendValues(
array.GetValues<uint8_t>(1, 0) + ((array.offset + offset) * byte_width_), length,
Expand Down
9 changes: 5 additions & 4 deletions include/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,11 @@ class DictionaryBuilderBase : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) final {
// Visit the indices and insert the unpacked values.
const auto& dict_ty = internal::checked_cast<const DictionaryType&>(*array.type);
const typename TypeTraits<T>::ArrayType dict(array.dictionary);
// See if possible to avoid using ToArrayData here
const typename TypeTraits<T>::ArrayType dict(array.dictionary().ToArrayData());
ARROW_RETURN_NOT_OK(Reserve(length));
switch (dict_ty.index_type()->id()) {
case Type::UINT8:
Expand Down Expand Up @@ -490,10 +491,10 @@ class DictionaryBuilderBase : public ArrayBuilder {
protected:
template <typename c_type>
Status AppendArraySliceImpl(const typename TypeTraits<T>::ArrayType& dict,
const ArrayData& array, int64_t offset, int64_t length) {
const ArraySpan& array, int64_t offset, int64_t length) {
const c_type* values = array.GetValues<c_type>(1) + offset;
return VisitBitBlocks(
array.buffers[0], array.offset + offset, length,
array.buffers[0].data, array.offset + offset, length,
[&](const int64_t position) {
const int64_t index = static_cast<int64_t>(values[position]);
if (dict.IsValid(index)) {
Expand Down
28 changes: 15 additions & 13 deletions include/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ class BaseListBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
const offset_type* offsets = array.GetValues<offset_type>(1);
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
if (!validity || bit_util::GetBit(validity, array.offset + row)) {
ARROW_RETURN_NOT_OK(Append());
int64_t slot_length = offsets[row + 1] - offsets[row];
ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(*array.child_data[0],
ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(array.child_data[0],
offsets[row], slot_length));
} else {
ARROW_RETURN_NOT_OK(AppendNull());
Expand Down Expand Up @@ -296,18 +296,20 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {

Status AppendEmptyValues(int64_t length) final;

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
const int32_t* offsets = array.GetValues<int32_t>(1);
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
if (!validity || bit_util::GetBit(validity, array.offset + row)) {
ARROW_RETURN_NOT_OK(Append());
const int64_t slot_length = offsets[row + 1] - offsets[row];
// Add together the inner StructArray offset to the Map/List offset
int64_t key_value_offset = array.child_data[0].offset + offsets[row];
ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
*array.child_data[0]->child_data[0], offsets[row], slot_length));
array.child_data[0].child_data[0], key_value_offset, slot_length));
ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
*array.child_data[0]->child_data[1], offsets[row], slot_length));
array.child_data[0].child_data[1], key_value_offset, slot_length));
} else {
ARROW_RETURN_NOT_OK(AppendNull());
}
Expand Down Expand Up @@ -425,12 +427,12 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {

Status AppendEmptyValues(int64_t length) final;

Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) final {
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
if (!validity || bit_util::GetBit(validity, array.offset + row)) {
ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(
*array.child_data[0], list_size_ * (array.offset + row), list_size_));
array.child_data[0], list_size_ * (array.offset + row), list_size_));
ARROW_RETURN_NOT_OK(Append());
} else {
ARROW_RETURN_NOT_OK(AppendNull());
Expand Down Expand Up @@ -532,13 +534,13 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
for (int i = 0; static_cast<size_t>(i) < children_.size(); i++) {
ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(*array.child_data[i],
ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(array.child_data[i],
array.offset + offset, length));
}
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
ARROW_RETURN_NOT_OK(Reserve(length));
UnsafeAppendToBitmap(validity, array.offset + offset, length);
return Status::OK();
Expand Down
6 changes: 3 additions & 3 deletions include/arrow/array/builder_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {

Status Append(std::nullptr_t) { return AppendNull(); }

Status AppendArraySlice(const ArrayData&, int64_t, int64_t length) override {
Status AppendArraySlice(const ArraySpan&, int64_t, int64_t length) override {
return AppendNulls(length);
}

Expand Down Expand Up @@ -279,7 +279,7 @@ class NumericBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
return AppendValues(array.GetValues<value_type>(1) + offset, length,
array.GetValues<uint8_t>(0, 0), array.offset + offset);
Expand Down Expand Up @@ -513,7 +513,7 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {

Status AppendValues(int64_t length, bool value);

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
return AppendValues(array.GetValues<uint8_t>(1, 0), length,
array.GetValues<uint8_t>(0, 0), array.offset + offset);
Expand Down
4 changes: 2 additions & 2 deletions include/arrow/array/builder_union.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
return offsets_builder_.Append(offset);
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override;

Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
Expand Down Expand Up @@ -239,7 +239,7 @@ class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
/// is called, and all other child builders must have null or empty value appended.
Status Append(int8_t next_type) { return types_builder_.Append(next_type); }

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override;
};

Expand Down
132 changes: 132 additions & 0 deletions include/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,15 @@

#include "arrow/buffer.h"
#include "arrow/result.h"
#include "arrow/type.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

class Array;

// When slicing, we do not know the null count of the sliced range without
// doing some computation. To avoid doing this eagerly, we set the null count
// to -1 (any negative number will do). When Array::null_count is called the
Expand Down Expand Up @@ -242,8 +246,136 @@ struct ARROW_EXPORT ArrayData {
std::shared_ptr<ArrayData> dictionary;
};

/// \brief A non-owning Buffer reference
struct ARROW_EXPORT BufferSpan {
// It is the user of this class's responsibility to ensure that
// buffers that were const originally are not written to
// accidentally.
uint8_t* data = NULLPTR;
int64_t size = 0;
// Pointer back to buffer that owns this memory
const std::shared_ptr<Buffer>* owner = NULLPTR;
};

/// \brief EXPERIMENTAL: A non-owning ArrayData reference that is cheaply
/// copyable and does not contain any shared_ptr objects. Do not use in public
/// APIs aside from compute kernels for now
struct ARROW_EXPORT ArraySpan {
const DataType* type = NULLPTR;
int64_t length = 0;
mutable int64_t null_count = kUnknownNullCount;
int64_t offset = 0;
BufferSpan buffers[3];

// 16 bytes of scratch space to enable this ArraySpan to be a view onto
// scalar values including binary scalars (where we need to create a buffer
// that looks like two 32-bit or 64-bit offsets)
uint64_t scratch_space[2];

ArraySpan() = default;

explicit ArraySpan(const DataType* type, int64_t length) : type(type), length(length) {}

ArraySpan(const ArrayData& data) { // NOLINT implicit conversion
SetMembers(data);
}
explicit ArraySpan(const Scalar& data) { FillFromScalar(data); }

/// If dictionary-encoded, put dictionary in the first entry
std::vector<ArraySpan> child_data;

/// \brief Populate ArraySpan to look like an array of length 1 pointing at
/// the data members of a Scalar value
void FillFromScalar(const Scalar& value);

void SetMembers(const ArrayData& data);

void SetBuffer(int index, const std::shared_ptr<Buffer>& buffer) {
this->buffers[index].data = const_cast<uint8_t*>(buffer->data());
this->buffers[index].size = buffer->size();
this->buffers[index].owner = &buffer;
}

const ArraySpan& dictionary() const { return child_data[0]; }

/// \brief Return the number of buffers (out of 3) that are used to
/// constitute this array
int num_buffers() const;

// Access a buffer's data as a typed C pointer
template <typename T>
inline T* GetValues(int i, int64_t absolute_offset) {
return reinterpret_cast<T*>(buffers[i].data) + absolute_offset;
}

template <typename T>
inline T* GetValues(int i) {
return GetValues<T>(i, this->offset);
}

// Access a buffer's data as a typed C pointer
template <typename T>
inline const T* GetValues(int i, int64_t absolute_offset) const {
return reinterpret_cast<const T*>(buffers[i].data) + absolute_offset;
}

template <typename T>
inline const T* GetValues(int i) const {
return GetValues<T>(i, this->offset);
}

bool IsNull(int64_t i) const {
return ((this->buffers[0].data != NULLPTR)
? !bit_util::GetBit(this->buffers[0].data, i + this->offset)
: this->null_count == this->length);
}

bool IsValid(int64_t i) const {
return ((this->buffers[0].data != NULLPTR)
? bit_util::GetBit(this->buffers[0].data, i + this->offset)
: this->null_count != this->length);
}

std::shared_ptr<ArrayData> ToArrayData() const;

std::shared_ptr<Array> ToArray() const;

std::shared_ptr<Buffer> GetBuffer(int index) const {
const BufferSpan& buf = this->buffers[index];
if (buf.owner) {
return *buf.owner;
} else if (buf.data != NULLPTR) {
// Buffer points to some memory without an owning buffer
return std::make_shared<Buffer>(buf.data, buf.size);
} else {
return NULLPTR;
}
}

void SetSlice(int64_t offset, int64_t length) {
this->offset = offset;
this->length = length;
if (this->type->id() != Type::NA) {
this->null_count = kUnknownNullCount;
} else {
this->null_count = this->length;
}
}

/// \brief Return null count, or compute and set it if it's not known
int64_t GetNullCount() const;

bool MayHaveNulls() const {
// If an ArrayData is slightly malformed it may have kUnknownNullCount set
// but no buffer
return null_count != 0 && buffers[0].data != NULLPTR;
}
};

namespace internal {

void FillZeroLengthArray(const DataType* type, ArraySpan* span);

/// Construct a zero-copy view of this ArrayData with the given type.
///
/// This method checks if the types are layout-compatible.
Expand Down
Loading

0 comments on commit 27f5cb1

Please sign in to comment.