From ebb0aa070877d3171655c16136300659abca4284 Mon Sep 17 00:00:00 2001 From: Norbert Orzechowicz <1921950+norberttech@users.noreply.github.com> Date: Wed, 25 Oct 2023 19:29:23 +0200 Subject: [PATCH] Added possibility to encode pages as dictionary (#646) * Added possibility to encode pages as dictionary * Static analyze fixes --- .../resources/python/generators/primitives.py | 4 +- .../Parquet/Data/Converter/TimeConverter.php | 22 +- .../src/Flow/Parquet/Data/DataConverter.php | 11 +- .../Exception/DataConversionException.php | 7 + .../parquet/src/Flow/Parquet/ParquetFile.php | 7 +- .../Parquet/ParquetFile/ColumnPageHeader.php | 2 + .../Page/Header/DictionaryPageHeader.php | 9 + .../Parquet/ParquetFile/Page/PageHeader.php | 2 +- .../ParquetFile/RowGroup/ColumnChunk.php | 5 - .../Parquet/ParquetFile/RowGroupBuilder.php | 7 +- .../RowGroupBuilder/ColumnChunkBuilder.php | 75 ++-- .../RowGroupBuilder/PageBuilder.php | 11 + .../DataPageBuilder.php} | 73 ++-- .../PageBuilder/DictionaryPageBuilder.php | 144 +++++++ .../RowGroupBuilder/PageContainer.php | 5 +- .../RowGroupBuilder/PagesBuilder.php | 33 ++ src/lib/parquet/src/Flow/Parquet/Writer.php | 4 +- .../Parquet/Tests/Fixtures/primitives.parquet | Bin 24685 -> 24723 bytes .../Tests/Integration/IO/ListsWritingTest.php | 63 +++ .../Integration/IO/SimpleTypesReadingTest.php | 4 +- .../Integration/IO/SimpleTypesWritingTest.php | 372 ++++++++++++++++++ .../Tests/Integration/IO/WriterTest.php | 88 ----- 22 files changed, 767 insertions(+), 181 deletions(-) create mode 100644 src/lib/parquet/src/Flow/Parquet/Exception/DataConversionException.php create mode 100644 src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder.php rename src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/{DataPagesBuilder.php => PageBuilder/DataPageBuilder.php} (71%) create mode 100644 src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder/DictionaryPageBuilder.php create mode 100644 src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PagesBuilder.php create mode 100644 src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/ListsWritingTest.php create mode 100644 src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php delete mode 100644 src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/WriterTest.php diff --git a/src/lib/parquet/resources/python/generators/primitives.py b/src/lib/parquet/resources/python/generators/primitives.py index e92de3e87..b046a809a 100644 --- a/src/lib/parquet/resources/python/generators/primitives.py +++ b/src/lib/parquet/resources/python/generators/primitives.py @@ -26,7 +26,7 @@ class Color(Enum): json_col = pd.Series([json.dumps({'key': random.randint(1, 10)}) for _ in range(n_rows)], dtype='string') date_col = pd.Series([datetime.now().date() + timedelta(days=i) for i in range(n_rows)], dtype='object') timestamp_col = pd.Series([pd.Timestamp(datetime.now() + timedelta(seconds=i * 10)) for i in range(n_rows)], dtype='datetime64[ns]') -time_col = pd.Series([time(hour=i % 24, minute=(i * 2) % 60, second=(i * 3) % 60) for i in range(n_rows)], dtype='object') +time_col = pd.Series([time(hour=(i+1) % 24, minute=((i+1) * 2) % 60, second=((i+1) * 3) % 60) for i in range(n_rows)], dtype='object') uuid_col = pd.Series([str(uuid.uuid4()) for _ in range(n_rows)], dtype='string') enum_col = pd.Series([random.choice(list(Color)).name for _ in range(n_rows)], dtype='string') float_col = pd.Series([random.uniform(0, 100) for _ in range(n_rows)], dtype='float32') @@ -40,7 +40,7 @@ class Color(Enum): json_nullable_col = pd.Series([json.dumps({'key': random.randint(1, 10)}) if i % 2 == 0 else None for i in range(n_rows)], dtype='string') date_nullable_col = pd.Series([datetime.now().date() + timedelta(days=i) if i % 2 == 0 else None for i in range(n_rows)], dtype='object') timestamp_nullable_col = pd.Series([pd.Timestamp(datetime.now() + timedelta(seconds=i * 10)) if i % 2 == 0 else None for i in range(n_rows)], dtype='object') -time_nullable_col = pd.Series([time(hour=i % 24, minute=(i * 2) % 60, second=(i * 3) % 60) if i % 2 == 0 else None for i in range(n_rows)], dtype='object') +time_nullable_col = pd.Series([time(hour=(i+1) % 24, minute=((i+1) * 2) % 60, second=((i+1) * 3) % 60) if i % 2 == 0 else None for i in range(n_rows)], dtype='object') uuid_nullable_col = pd.Series([str(uuid.uuid4()) if i % 2 == 0 else None for i in range(n_rows)], dtype='string') enum_nullable_col = pd.Series([random.choice(list(Color)).name if i % 2 == 0 else None for i in range(n_rows)], dtype='string') float_nullable_col = pd.Series([random.uniform(0, 100) if i % 2 == 0 else None for i in range(n_rows)], dtype='float32') diff --git a/src/lib/parquet/src/Flow/Parquet/Data/Converter/TimeConverter.php b/src/lib/parquet/src/Flow/Parquet/Data/Converter/TimeConverter.php index 21a7261b6..ac7ca9cfb 100644 --- a/src/lib/parquet/src/Flow/Parquet/Data/Converter/TimeConverter.php +++ b/src/lib/parquet/src/Flow/Parquet/Data/Converter/TimeConverter.php @@ -18,7 +18,7 @@ public function fromParquetType(mixed $data) : \DateInterval public function isFor(FlatColumn $column, Options $options) : bool { - if ($column->type() === PhysicalType::INT32 && $column->logicalType()?->name() === LogicalType::TIME) { + if ($column->type() === PhysicalType::INT64 && $column->logicalType()?->name() === LogicalType::TIME) { return true; } @@ -35,8 +35,8 @@ public function toParquetType(mixed $data) : int */ private function toDateInterval(int $microseconds) : \DateInterval { - $seconds = (int) \floor($microseconds / 1000000); - $remainingMicroseconds = $microseconds % 1000000; + $seconds = (int) \floor($microseconds / 100000000); + $remainingMicroseconds = $microseconds % 100000000; $minutes = (int) \floor($seconds / 60); $remainingSeconds = $seconds % 60; @@ -64,7 +64,7 @@ private function toDateInterval(int $microseconds) : \DateInterval $interval->y = 0; $interval->m = 0; $interval->d = 0; - $interval->f = ($remainingMicroseconds / 1000000); + $interval->f = ($remainingMicroseconds / 100000000); return $interval; } @@ -81,13 +81,13 @@ private function toInt(\DateInterval $interval) : int $microseconds = 0; - $microseconds += $interval->y * 365 * 24 * 60 * 60 * 1000000; // years to microseconds - $microseconds += $interval->m * 30 * 24 * 60 * 60 * 1000000; // months to microseconds (approx) - $microseconds += $interval->d * 24 * 60 * 60 * 1000000; // days to microseconds - $microseconds += $interval->h * 60 * 60 * 1000000; // hours to microseconds - $microseconds += $interval->i * 60 * 1000000; // minutes to microseconds - $microseconds += $interval->s * 1000000; // seconds to microseconds - $microseconds += (int) (($interval->f) * 1000000); // microseconds + $microseconds += $interval->y * 365 * 24 * 60 * 60 * 100000000; // years to microseconds + $microseconds += $interval->m * 30 * 24 * 60 * 60 * 100000000; // months to microseconds (approx) + $microseconds += $interval->d * 24 * 60 * 60 * 100000000; // days to microseconds + $microseconds += $interval->h * 60 * 60 * 100000000; // hours to microseconds + $microseconds += $interval->i * 60 * 100000000; // minutes to microseconds + $microseconds += $interval->s * 100000000; // seconds to microseconds + $microseconds += (int) (($interval->f) * 100000000); // microseconds return $microseconds; } diff --git a/src/lib/parquet/src/Flow/Parquet/Data/DataConverter.php b/src/lib/parquet/src/Flow/Parquet/Data/DataConverter.php index cee45e3a1..fbbbce2fe 100644 --- a/src/lib/parquet/src/Flow/Parquet/Data/DataConverter.php +++ b/src/lib/parquet/src/Flow/Parquet/Data/DataConverter.php @@ -8,6 +8,7 @@ use Flow\Parquet\Data\Converter\Int64DateTimeConverter; use Flow\Parquet\Data\Converter\Int96DateTimeConverter; use Flow\Parquet\Data\Converter\TimeConverter; +use Flow\Parquet\Exception\DataConversionException; use Flow\Parquet\Options; use Flow\Parquet\ParquetFile\Schema\FlatColumn; @@ -56,7 +57,15 @@ public function fromParquetType(FlatColumn $column, mixed $data) : mixed if ($converter->isFor($column, $this->options)) { $this->cache[$column->flatPath()] = $converter; - return $converter->fromParquetType($data); + try { + return $converter->fromParquetType($data); + } catch (\Throwable $e) { + throw new DataConversionException( + "Failed to convert data from parquet type for column '{$column->flatPath()}'. {$e->getMessage()}", + 0, + $e + ); + } } } diff --git a/src/lib/parquet/src/Flow/Parquet/Exception/DataConversionException.php b/src/lib/parquet/src/Flow/Parquet/Exception/DataConversionException.php new file mode 100644 index 000000000..86ce578cc --- /dev/null +++ b/src/lib/parquet/src/Flow/Parquet/Exception/DataConversionException.php @@ -0,0 +1,7 @@ +schema()->columnsFlat() as $column) { foreach ($this->viewChunksPages($column) as $pageHeader) { - yield new ColumnPageHeader($column, $pageHeader); + yield $pageHeader; } } } @@ -350,7 +349,7 @@ private function readStruct(NestedColumn $structColumn, bool $isCollection = fal } /** - * @return \Generator + * @return \Generator */ private function viewChunksPages(FlatColumn $column) : \Generator { @@ -359,7 +358,7 @@ private function viewChunksPages(FlatColumn $column) : \Generator foreach ($this->getColumnChunks($column) as $columnChunks) { foreach ($columnChunks as $columnChunk) { foreach ($viewer->view($columnChunk, $column, $this->stream) as $pageHeader) { - yield $pageHeader; + yield new ColumnPageHeader($column, $columnChunk, $pageHeader); } } } diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/ColumnPageHeader.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/ColumnPageHeader.php index 00e6bbdc9..fca41d2c9 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/ColumnPageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/ColumnPageHeader.php @@ -3,12 +3,14 @@ namespace Flow\Parquet\ParquetFile; use Flow\Parquet\ParquetFile\Page\PageHeader; +use Flow\Parquet\ParquetFile\RowGroup\ColumnChunk; use Flow\Parquet\ParquetFile\Schema\FlatColumn; final class ColumnPageHeader { public function __construct( public readonly FlatColumn $column, + public readonly ColumnChunk $columnChunk, public readonly PageHeader $pageHeader, ) { } diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/Header/DictionaryPageHeader.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/Header/DictionaryPageHeader.php index c8d6e6d92..d8dec273c 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/Header/DictionaryPageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/Header/DictionaryPageHeader.php @@ -29,6 +29,15 @@ public function encoding() : Encodings return $this->encoding; } + public function toThrift() : \Flow\Parquet\Thrift\DictionaryPageHeader + { + return new \Flow\Parquet\Thrift\DictionaryPageHeader([ + 'encoding' => $this->encoding->value, + 'num_values' => $this->valuesCount, + 'is_sorted' => false, + ]); + } + public function valuesCount() : int { return $this->valuesCount; diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/PageHeader.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/PageHeader.php index bd5136f0f..6e7b28baf 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/PageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Page/PageHeader.php @@ -118,7 +118,7 @@ public function toThrift() : \Flow\Parquet\Thrift\PageHeader 'crc' => null, 'data_page_header' => $this->dataPageHeader?->toThrift(), 'data_page_header_v2' => null, - 'dictionary_page_header' => null, + 'dictionary_page_header' => $this->dictionaryPageHeader?->toThrift(), 'index_page_header' => null, ]); } diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroup/ColumnChunk.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroup/ColumnChunk.php index 949bafd3c..c96edf755 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroup/ColumnChunk.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroup/ColumnChunk.php @@ -124,11 +124,6 @@ public function pageOffset() : int return $offset; } - public function rootName() : string - { - return $this->path[0]; - } - public function totalCompressedSize() : int { return $this->totalCompressedSize; diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder.php index 532f19fa8..e797b4adc 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder.php @@ -49,10 +49,9 @@ public function flush(int $fileOffset) : RowGroupContainer $chunkContainers = []; foreach ($this->chunkBuilders as $chunkBuilder) { - foreach ($chunkBuilder->flush($fileOffset) as $chunkContainer) { - $fileOffset += \strlen($chunkContainer->binaryBuffer); - $chunkContainers[] = $chunkContainer; - } + $chunkContainer = $chunkBuilder->flush($fileOffset); + $fileOffset += \strlen($chunkContainer->binaryBuffer); + $chunkContainers[] = $chunkContainer; } $buffer = ''; diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/ColumnChunkBuilder.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/ColumnChunkBuilder.php index 6f438f9ce..9dbbc7884 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/ColumnChunkBuilder.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/ColumnChunkBuilder.php @@ -3,8 +3,10 @@ namespace Flow\Parquet\ParquetFile\RowGroupBuilder; use Flow\Parquet\Data\DataConverter; +use Flow\Parquet\Exception\RuntimeException; use Flow\Parquet\ParquetFile\Compressions; use Flow\Parquet\ParquetFile\Encodings; +use Flow\Parquet\ParquetFile\Page\Header\Type; use Flow\Parquet\ParquetFile\RowGroup\ColumnChunk; use Flow\Parquet\ParquetFile\Schema\FlatColumn; @@ -21,45 +23,60 @@ public function addRow(mixed $data) : void $this->data[] = $data; } + public function flush(int $fileOffset) : ColumnChunkContainer + { + return $this->createColumnChunkContainer( + (new PagesBuilder($this->dataConverter))->build($this->column, $this->data), + $fileOffset + ); + } + /** - * @return array + * @param array $pageContainers */ - public function flush(int $fileOffset) : array + private function createColumnChunkContainer(array $pageContainers, int $offset) : ColumnChunkContainer { - $offset = $fileOffset; - $columnChunkContainers = []; + $buffer = ''; + $encodings = []; + $valuesCount = 0; + $size = 0; + $dictionaryPageSize = null; + $dictionaryPageOffset = null; + $pageOffset = $offset; - $pageContainer = (new DataPagesBuilder($this->data))->build($this->column, $this->dataConverter); + foreach ($pageContainers as $pageContainer) { + if ($pageContainer->pageHeader->type() === Type::DICTIONARY_PAGE) { + if ($dictionaryPageSize !== null) { + throw new RuntimeException('There can be only one dictionary page in column chunk'); + } - $columnChunkContainers[] = $this->createColumnChunkContainer($pageContainer, $offset); - $offset += $pageContainer->size(); + $dictionaryPageOffset = $pageOffset; + $dictionaryPageSize = $pageContainer->size(); + } - $this->data = []; + $buffer .= $pageContainer->pageHeaderBuffer . $pageContainer->pageBuffer; + $encodings[] = $pageContainer->pageHeader->encoding()->value; + $valuesCount += \count($pageContainer->values); + $size += $pageContainer->size(); + $pageOffset += $pageContainer->size(); + } - return $columnChunkContainers; - } + $encodings = \array_values(\array_unique($encodings)); + $encodings = \array_map(static fn (int $encoding) => Encodings::from($encoding), $encodings); - /** - * @psalm-suppress PossiblyNullArgument - */ - private function createColumnChunkContainer(PageContainer $pageContainer, int $offset) : ColumnChunkContainer - { return new ColumnChunkContainer( - $pageContainer->pageHeaderBuffer . $pageContainer->pageDataBuffer, + $buffer, new ColumnChunk( - $this->column->type(), - Compressions::UNCOMPRESSED, - /** @phpstan-ignore-next-line */ - $pageContainer->pageHeader->dataValuesCount(), - $offset, - $this->column->path(), - [ - Encodings::PLAIN, - ], - \strlen($pageContainer->pageDataBuffer) + \strlen($pageContainer->pageHeaderBuffer), - \strlen($pageContainer->pageDataBuffer) + \strlen($pageContainer->pageHeaderBuffer), - dictionaryPageOffset: null, - dataPageOffset: $offset, + type: $this->column->type(), + codec: Compressions::UNCOMPRESSED, + valuesCount: $valuesCount, + fileOffset: $offset, + path: $this->column->path(), + encodings: $encodings, + totalCompressedSize: $size, + totalUncompressedSize: $size, + dictionaryPageOffset: $dictionaryPageOffset, + dataPageOffset: ($dictionaryPageOffset) ? $offset + $dictionaryPageSize : $offset, indexPageOffset: null, ) ); diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder.php new file mode 100644 index 000000000..1107febbb --- /dev/null +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder.php @@ -0,0 +1,11 @@ +shred($this->rows, $column->maxDefinitionsLevel()); + $shredded = (new Dremel())->shred($rows, $column->maxDefinitionsLevel()); $rleBitPackedHybrid = new RLEBitPackedHybrid(); @@ -46,17 +50,39 @@ public function build(FlatColumn $column, DataConverter $dataConverter) : PageCo $pageWriter->append($definitionsBuffer); } - $valuesBuffer = ''; - $valuesBuffer = $this->writeData($column, $valuesBuffer, $shredded, $dataConverter); - $pageWriter->append($valuesBuffer); + if ($this->dictionary === null) { + $valuesBuffer = ''; + $this->writeData($column, $valuesBuffer, $shredded, $dataConverter); + $pageWriter->append($valuesBuffer); + } else { + $indices = []; + + foreach ($shredded->values as $value) { + $index = \array_search($value, $this->dictionary, true); + + if (!\is_int($index)) { + throw new RuntimeException('Value "' . $value . '" not found in dictionary'); + } + + $indices[] = $index; + } + + $valuesBuffer = ''; + $indicesBitWidth = BitWidth::fromArray($indices); + $indicesWriter = new BinaryBufferWriter($valuesBuffer); + $indicesWriter->writeVarInts32([$indicesBitWidth]); + $rleBitPackedHybrid->encodeHybrid($indicesWriter, $indices); + + $pageWriter->append($valuesBuffer); + } $pageHeader = new PageHeader( Type::DATA_PAGE, \strlen($pageBuffer), \strlen($pageBuffer), dataPageHeader: new DataPageHeader( - Encodings::PLAIN, - $this->valuesCount($this->rows), + $this->dictionary ? Encodings::PLAIN_DICTIONARY : Encodings::PLAIN, + \count($shredded->values), ), dataPageHeaderV2: null, dictionaryPageHeader: null, @@ -66,29 +92,15 @@ public function build(FlatColumn $column, DataConverter $dataConverter) : PageCo return new PageContainer( $pageHeaderBuffer->getBuffer(), $pageBuffer, + $shredded->values, $pageHeader ); } - public function valuesCount(array $rows) : int - { - $valuesCount = 0; - - foreach ($rows as $row) { - if (\is_array($row)) { - $valuesCount += $this->valuesCount($row); - } elseif ($row !== null) { - $valuesCount++; - } - } - - return $valuesCount; - } - /** * @psalm-suppress PossiblyNullArgument */ - private function writeData(FlatColumn $column, string $valuesBuffer, DataShredded $shredded, DataConverter $dataConverter) : string + private function writeData(FlatColumn $column, string &$valuesBuffer, DataShredded $shredded, DataConverter $dataConverter) : void { $values = []; @@ -107,15 +119,16 @@ private function writeData(FlatColumn $column, string $valuesBuffer, DataShredde (new BinaryBufferWriter($valuesBuffer))->writeInts32($values); break; - case null; - (new BinaryBufferWriter($valuesBuffer))->writeInts32($values); + case null: + (new BinaryBufferWriter($valuesBuffer))->writeInts32($values); - break; + break; } break; case PhysicalType::INT64: switch ($column->logicalType()?->name()) { + case LogicalType::TIME: case LogicalType::TIMESTAMP: (new BinaryBufferWriter($valuesBuffer))->writeInts64($values); @@ -166,7 +179,5 @@ private function writeData(FlatColumn $column, string $valuesBuffer, DataShredde default: throw new \RuntimeException('Writing physical type "' . $column->type()->name . '" is not implemented yet'); } - - return $valuesBuffer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder/DictionaryPageBuilder.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder/DictionaryPageBuilder.php new file mode 100644 index 000000000..ad55edd10 --- /dev/null +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageBuilder/DictionaryPageBuilder.php @@ -0,0 +1,144 @@ +writeData($column, $dictionaryBuffer, $dictionary, $dataConverter); + $pageWriter->append($dictionaryBuffer); + + $pageHeader = new PageHeader( + Type::DICTIONARY_PAGE, + \strlen($pageBuffer), + \strlen($pageBuffer), + dataPageHeader: null, + dataPageHeaderV2: null, + dictionaryPageHeader: new DictionaryPageHeader( + Encodings::PLAIN, + \count($dictionary) + ), + ); + $pageHeader->toThrift()->write(new TCompactProtocol($pageHeaderBuffer = new TMemoryBuffer())); + + return new PageContainer( + $pageHeaderBuffer->getBuffer(), + $pageBuffer, + $dictionary, + $pageHeader + ); + } + + /** + * @psalm-suppress PossiblyNullArgument + */ + private function writeData(FlatColumn $column, string &$valuesBuffer, array $rawValues, DataConverter $dataConverter) : string + { + $values = []; + + foreach ($rawValues as $value) { + $values[] = $dataConverter->toParquetType($column, $value); + } + + switch ($column->type()) { + case PhysicalType::BOOLEAN: + (new BinaryBufferWriter($valuesBuffer))->writeBooleans($values); + + break; + case PhysicalType::INT32: + switch ($column->logicalType()?->name()) { + case LogicalType::DATE: + (new BinaryBufferWriter($valuesBuffer))->writeInts32($values); + + break; + case null; + (new BinaryBufferWriter($valuesBuffer))->writeInts32($values); + + break; + } + + break; + case PhysicalType::INT64: + switch ($column->logicalType()?->name()) { + case LogicalType::TIMESTAMP: + (new BinaryBufferWriter($valuesBuffer))->writeInts64($values); + + break; + case null: + (new BinaryBufferWriter($valuesBuffer))->writeInts64($values); + + break; + } + + break; + case PhysicalType::FLOAT: + (new BinaryBufferWriter($valuesBuffer))->writeFloats($values); + + break; + case PhysicalType::DOUBLE: + (new BinaryBufferWriter($valuesBuffer))->writeDoubles($values); + + break; + case PhysicalType::FIXED_LEN_BYTE_ARRAY: + switch($column->logicalType()?->name()) { + case LogicalType::DECIMAL: + /** @phpstan-ignore-next-line */ + (new BinaryBufferWriter($valuesBuffer))->writeDecimals($values, $column->typeLength(), $column->precision(), $column->scale()); + + break; + + default: + throw new \RuntimeException('Writing logical type "' . ($column->logicalType()?->name() ?: 'UNKNOWN') . '" is not implemented yet'); + } + + break; + case PhysicalType::BYTE_ARRAY: + switch ($column->logicalType()?->name()) { + case LogicalType::JSON: + case LogicalType::UUID: + case LogicalType::STRING: + (new BinaryBufferWriter($valuesBuffer))->writeStrings($values); + + break; + + default: + throw new \RuntimeException('Writing logical type "' . ($column->logicalType()?->name() ?: 'UNKNOWN') . '" is not implemented yet'); + } + + break; + + default: + throw new \RuntimeException('Writing physical type "' . $column->type()->name . '" is not implemented yet'); + } + + return $valuesBuffer; + } +} diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageContainer.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageContainer.php index 7e9e2cdb9..c2db97412 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageContainer.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PageContainer.php @@ -8,13 +8,14 @@ final class PageContainer { public function __construct( public readonly string $pageHeaderBuffer, - public readonly string $pageDataBuffer, + public readonly string $pageBuffer, + public readonly array $values, public readonly PageHeader $pageHeader ) { } public function size() : int { - return \strlen($this->pageHeaderBuffer) + \strlen($this->pageDataBuffer); + return \strlen($this->pageHeaderBuffer) + \strlen($this->pageBuffer); } } diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PagesBuilder.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PagesBuilder.php new file mode 100644 index 000000000..8131af6b8 --- /dev/null +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/PagesBuilder.php @@ -0,0 +1,33 @@ + + */ + public function build(FlatColumn $column, array $rows) : array + { + if ($column->logicalType()?->name() === LogicalType::STRING) { + $dictionaryPageContainer = (new DictionaryPageBuilder())->build($column, $this->dataConverter, $rows); + + return [ + $dictionaryPageContainer, + (new DataPageBuilder($dictionaryPageContainer->values))->build($column, $this->dataConverter, $rows), + ]; + } + + return [(new DataPageBuilder())->build($column, $this->dataConverter, $rows)]; + } +} diff --git a/src/lib/parquet/src/Flow/Parquet/Writer.php b/src/lib/parquet/src/Flow/Parquet/Writer.php index ba21cedcc..126f2d1d8 100644 --- a/src/lib/parquet/src/Flow/Parquet/Writer.php +++ b/src/lib/parquet/src/Flow/Parquet/Writer.php @@ -3,6 +3,7 @@ namespace Flow\Parquet; use Flow\Parquet\Data\DataConverter; +use Flow\Parquet\Exception\InvalidArgumentException; use Flow\Parquet\Exception\RuntimeException; use Flow\Parquet\ParquetFile\Metadata; use Flow\Parquet\ParquetFile\RowGroupBuilder; @@ -26,8 +27,9 @@ public function __construct(private Options $options = new Options()) */ public function write(string $path, Schema $schema, iterable $rows) : void { + // This will be later replaced with append if (\file_exists($path)) { - \unlink($path); + throw new InvalidArgumentException("File {$path} already exists"); } $stream = \fopen($path, 'wb'); diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Fixtures/primitives.parquet b/src/lib/parquet/tests/Flow/Parquet/Tests/Fixtures/primitives.parquet index 7ca0e5efaceb45039ba78e76b5dbc745b57e551d..1769d1f19947beaf568ef58eb3e9facd250cd510 100644 GIT binary patch literal 24723 zcmd5_30xKB+CK*Yj~n805EK&>PE?G}zPRL^5d;khaRJSeSw(Xpmn3f%R^HGS&8%$4 zvNE$WZ+dN2Gt(@~cDuL5wtFqBOJ!wc_5GiLqlk3B>MMUA_J5z*=Y8Mj|19r2bEcAL zDHo0p*CmG6Bo>8t2E&2QfxyyM2bTl`_@g2YXa&Rr2|yyy8VCVxfVMz8pgqt5NCG+n z$v`In*9&w3x&mP!1?UE(0%<^Zpa+l+^aM@oCC#8)bDFGSgHvy}g4h`hw(e0=Q=#XiCe>bMZLL&KT3YKE_m=lv zaizEIJTtxUj@+UT29{)v810^2e)qHq|DIowbG0-1w#qdROg->q6?I4;vby6@ zl_dEpnK*sU)TyR5)osvCZ9>Xr36ZxtL{_zha*p0K<=85;sGKeB!yAvfiaMyHLog8R z5DLTrt$=tS0Z0T|10kRd&=zP1vNkB&+8R!Ib2D$)UfiRE)bOTa>G@v`s14svY z0w)2zfD9lL=neD%vVd$L0-Oxw0H*+_0;d7F00Z;|`T;D!0X!f8A|L@Wpa3eM0sVmi zz(8OSa5^v;(19pm07HO0ARibC6aa<5Fkm=v22ccy0E&S#ff8UOPzsC!MgwDju|OG6 z4vYiF17`sffU|*ffQdi_V0H)%BsJ0>`2_lF$zgng4j`&7gBc2h*h+o7l;uUd<_(WVH9)Uyg{R3@+fq`vW$NS77 z)(~TeEyNUJ2{DA&LChdl5F>~U!~|jiF@UrW%@3?-(4OyBXz_U05{eO`iLRZT7PjK- z-|cFC$~dY#ay7Ep%++iPrEE-yEbA57*#lbq)m3Uii-B-XIM?@`frjW845kJt?)egp z=voBDQovhx`IKwAJiGVu&wjXlOEB_upY(7bEjuxg23I#@#?%IOO--qayxMzU>aiC| z!=d;W*U+JSIMV!nLAXddC5R)^j`-*FgrEJtR*~DXkKC*S-K<$}oS!l!@^DteU5{R> z>0RT)%e#iZZ(S72#TK{CB~zU>d*dvQz@lxRJNwI;KJoVPMah9Q$sOc;H(qnuRmnd!|faWbz;N)8%%0%O3Kr1 zBaiou)J4*NshTa=b$obn+wju(SiZL~p*i7+>_l{sJYH>+Hn5T0KCgXB6Px=YSLc7J z5NJnBg-APEDkR#`QX$ihmI{S-v{b0HqoqQl9Y+dPlbh;ZJ_(_Cwsbl-Ho#`l!$B@c(jY+r_7o$y@Bh?<5QNmitOtjS*sD(f3XHF_RUYha4q_# zv)?xtAJa9G-!;_qfIuChp+nTh4&iAl_0>T6m|jt-S1Q7zn~W=8aN)pcR)@5$|rE{v>E(O^&v#tCg3p$^N?u$IBA8mdIYJRhT_ExKuf5b(u931&@ z5b@&|Z`xu%v@q+i14F8vz_Y+E;5p!VU^nmr@FK7WcnNqJcm;SBcnx?RcmvoAyb1gR z_$Tld@HX%c@Gh_qcn^3V_yG72_z3tH@NZy0@G{~@r}4fJR^<~zldAJE8-OKiMT{OA`TIMh&#j^;tcVHxI#Q3ju1bH z8^jCZ1o45mKs+E0kp4;cq<7Le>6>&-dL|u{eo42aSJElzlXOXXJT8)?#?$6T4PwZD zbPXE(-N9t}$fGLhKq_hGDjFsocAgnoS_Iu3y=Tg?HE2;cm~`0H{zy(-mw2#OtJU*o zad+F-bW?X+yl^#S{l+WaTm!lHwi}rHAy+THbHRglw(ZMUj)9 zh1BQpGoORZIs5cyc0(rT5AVJQ@=)&B=`Ta>OtqeR6>?o*ddeG+EB0S7+4fV&rQJR{ z=X1!y)?cizhfMwIaN<{xfq#b1`WkZoQ(YhY7IM#;j8@-6)?J@lb{MkuBK5wXAgjE* zp99V{-O7tgMzw;>=|AC~1jyvxl|QtGJk)0PnQbBWeR1)f?IG8_eZ{v)kSn&|a7HJ{ zn)~m(y$fXJbq{_OhAf%?SV1ae#^fD0cZUp(eC0pskoD@jLwZ5(%{XvFCgke&-yY}# zx%6P19)YZWD`|NSWX03n_n!(`xHdA7flOV&FYN~zs5$*Z4s!pb;i?F^=ghHJ%8;8B zYo7{P+h@8s0J5sxyxKvK z)|>&k&fER!2*{e!H*-oLEBk+RVJT!u?=SX@hE&`Bm{kTD`X+S2ILP|9yY4;a!MP^@6;fE@Z{zlJh1(vSTJ}pA4Bgs4{ITWFUL?wCRxh+t+NZ zg4}cPitsGRx_viHo&#CC^Uf#cLRPJNu+u!q@)eJ{7eeM-w&RhDAd{!Q(qRGQp;7Oe zmqP9vaA3ni$en$@ZBq-mqGPLb7DLv2o3#E)$d){JD7G^-6f+-q#um1oKu>;&b#w-y z&Fsn3+*z~DX;nw2fFA7_d3a)EXDKERes$$rbRmt&M#H4C|Ab>_`OUrusUPw;-*ar~ z@?3+*_~3otQ&d0rz3(9kFYY{#JOg{islG=@?PmC%VE@0y`5s`;`ty9tue)TCZ}GLo zD}76^>i&pt;rb_ge9O++xZk(vLkk1#$ddPsO#K^K@XnsBZ@DWzEATD0=8=iMrB*JS z>szQgW{Gc^8JYL^78&~NN#7FpKK{CIfm>=n_bqSr=)^p-xazEvJ|#=5sF!>TD|~!} zZ&_@u>swTye7QTN0>>Pit&-`_fOMS-Id~=h}^vb8!`wTC+dY8{^b^QB2qeG{D?=!jn z>rNYq!F#u!;xqU7I2DTRN)5%7g=o>j*8fY~x@_uE0(L$^z-GaVrm;ei6;mQF*kIzX zu2zdoY$2Eh9_aR}MFW)y{Z?iouBnNJ>}?n^`R1a%JI}) z&;N~{qE$UnmqJg^PI_(j1@xrj`A-Yq%7mO3-*YfMEz3#&VkJH8cwN{Yp9^`}88=R% zr(?W5Z5KW9*f%fA&=ZfVmxY(o)3AYSKK_!PdVI3=i7`5)zU;;|Lm)rDcUlK}>UGQ3 zXcayA82)DX732_sLo3Ty#zEfwNNzmUd-O|zNi>X&`}oyo+e5Ak+?z}FJ|}hl5_$?a zb#BI5lHHdaOrrYlynE+)^rYteCvSNbW4=Jv>s1QX|K-o}Zlb3(H3?}y($l`aJwKaB zPx;;ww?5Sua&ggh*?#@g&znb2^{y84-=?QF1B=phzy9`TchFP36@S^9K=mJb?RAy( ztIMeVW3RmMQF=NSzpA7s)qnkCSvB-jw&ImT?_<;$ zNILL*KGpy6xRtBvDeRQ)bK3d!|D>cM9qZHMvbXhuOz-|(wqJj4cWngg)x%aEI0Z6d z&M9UfU#%YY5IrrsWNFW?RBv|m7w0RG?`+=wCOt9x^rXq(ki1DAG>+{(CZfjO(fRR0NARadu$YvV`Pt! zJVx#qsbgf0kvK-)M$)zr$C{|T(bH2LJANyu`uoRUmj{oMuLF^<#Td4FVKDN}MaRw-oImdQYK{W+j&Q%#v5?#g;6rNE$M`bNn^gLM% zR@a}$X|Cehs>wK<>EU`3$7rmhGNxlIoGE&^tV`FEBu-^**JOAHH4|maV>DN^7|CKe znPV+UaRoX~;U!0A6@jr_lVe1SV;M~mWJYlv)#MeKb=&0B&v#YT))Z9R<~)ZHS=V7y zTb39{Qbb$!Sjp5>x|+rdnk-5dqlp&oDw#H8a+b;nXuU0JrowR!9p~{p&k458xSWc+ zN*s^Y3Jzmxp2d0&4m4Rh&b4I0G=tj$Ua%zKCe*7u$5@K3Xr{$#4y$FRF2#*pS72>M zQWcgFRRw*4zG4(d#@V8$u$Gb)tgf|L%aUb{u^dHVM3+}^Ax>a4i4|p5kW7yif+b4@ z-W62aQyE;|Mfb`kql$vY2!d?cmW;;OTvn!DiwRUswOK|{CES!%aZ^cPQGd;HEP=C4 z#R`^ml{MBC+29!!oQ-=bngbp15+it~X-TZC*nB>#a=fg1p5rm5h#nAwH|tms8D3LW zMRrLIxKu80LVK2LGd$-;%Zn7Koii1NvmH(qIZt*RE|{}a5+qi0Ese2R7uEMXut;G= zM&mVEbtRrUIV-rc#&&UgQD$t>HqmFc%$S11GL~k84UTEqr)(-$CC5?FH;gQyKSbGv zRxL{n-j&Wuf*^UWlOO6TsUolO!2=b%VB@u(>o)=745EV1a+)|@* z8v0LUWDWI(eM$^YN12>aD>)YDiV}KMbOexxlNpusY-ry$T}$*hRY<7M zWj)gZ$8AP)#Av8U6&cI)c(j*SVZaVrEC+YiqS~x0fKn{a!GJvl_405n0X(%hPh|x= z|4^-JO036g8Y8P5NZ|@F5D9*Uvo+r16&xnvC`;yWYd9TMQgI`jv!QO$gKv;jN#<>n zvlFUodDBu&mvlT5}`S&rdkQ;;>@ zb`(Cla*rjNmf&h4!@&a4U9fVKlLdwcbwmNq!1H)ATwXE-(FAWCiM7lvg;0{JSg;RC za24>_nc_F$wv&%rm1p6GpMNDKl-jdd2nqxE=cR3^&*^bM!;FHS47wnSjY4vS%&AzPGv=x z$Fhti3Msp?jZCQas zLyX?~NxCDcs%s+0*m9u+U+N05BA(;X<0g9CG(jGdHD#9-oK88ciMXk0s1L`PPEu_f z9WjCzc?O(zJPGE+w}Jn0RGyXL7iGZ#fj!*Jv=m&7htaZ<)hUic|K$#E{9yeQnPi{bHE^x zM?}*Qj>YJ6s)bs>W_VF>9ggEC@9)YZMN_R{eTB#4GA7a(gh(WU$bxtW(a&<-6}7e{ zfF}qn5>iDlSGG}K2|0xZi&6y}E(>1T!6m?IB#MZM2&HJhW}~-V(@`81b#R>Q)MQQ) zG%tEyyyUE`F4ABKritk0c$TNa>TFxOD^Lr?+Lj_aj3)|+d@7it;x>%!!eP3uWpP46 z^+3_KMGK)0i5#*DkzcTGpnw2_%vEDG6ZhA+=<1LqnV???vE#kxibUj|2+AJIID*Go zo{QWsr7D!n%J6jNxgp0CSJoARbtATb_tikw1Z6RL|;PUd*9L$*a0zCH(m1#Z~27?y>l1ca@d zLL#drh1;a_G7>S{a~D@vaU~wiqJc7ClcG|d$BLwSfUCo6hYSXpu*cW}vI50%cpKWwkvPi{ zBvjbd%>6|k@)nClW&(o4fZ)g^C80XkmTe8mD$7W&Y*q#Ar2Qe4gN8kH7fclHhYQwa zfjTM@LY3i?Zfl<|T0HqoC%QNTb^xEDInZ$q5^3IoFJ~m@y+a{Q(-u^~g9)N*!LcGV zD*2&W5DRftLz1Xk-~o^9--4%PcpK@m1gmh^?fY^CMbIwCExGwxW{J(3` z07&Dk=pGia6YgGBIENRc?Ru^&st&A`1AXPKsR1Wgjg$z+AiE}yp^xbzWs?O-u>=)$ zP%(%Sc?4D!Lo$RJ%l0f}v$maI&Wb8;@~jKL>X>}etsx!JfWn?*3Z{sRUUNTYb0`nw zZG;rfT(GW0ME(wE46?wXBD3LDSruBCOOgXQS-&D7mr}Eu^f4v(E%mX3RPRL1HywTk zWB%C7Sn+e_Om-S3oW6{vEImE)uWiA|r?o+PLFJEZUW@Yqb-lv3^@_d2wWp&$9pF+n zs=_NF4pStjOSO;k~hUOTAVUWOscs7QUoS^Z%Vzw!%hz*vk!uhKz>sSaOrl`0FgLp3ma;$aZrnu0%j#3S@$Qb$45 zrW6o;f@}*x8FM5En5r3@+oHS!9YuFT#)VlE3{EVJRWx2fVxo95=4z-NChM9qxnN`q z=oiogEkX+Cfs`2QiGpPLgH_FTWO!mEs46me5u-|kcn35yWezvxRg4ztdLoh$7^#5z z!GY6U8zM8*D1($3hB*>gM#sVZOS}vU$Ozcr0SmsseIu}9U`{boZJ2w*_!j<~=3UVL z-~ly3gBif^5h*zZ6FWX%Fy%o5bqJY_Ku z!k7r7YIqmS`jJxo<1`H7c?4A0FWHADPzptpGoEeINXtYPNB5N_lXEm&0I3{~)4*5@ z6G$*79mtT1PX+I%rp4xgx|n@Z6s=}Dq#=`!v-tpI1mKAqDqV_!6{&9 zhcP<3j#0ohQK7+v;@=kmNAfU)2cI!g5>?Eom@Tu0-$RZpet? z|KM6w36?BrCMN3;#`p$axf%z*0mrSX$Z;_o$F!~uJ2D+`QBqV4{Tu3s=|Ri_V>*B` zIng4!R51Sr?}FJ)773RkANN2*c92l~iU69L^=oJgE&lF@9zT83BmwF`2~e|#9yPsF zzRZm5+8>Nu`7AP_Kepm6W;`gLVS8#k9gSo9)Y6goF-u&JWaIAAu|tO~jvG7du90csQxY?8bP`rwS35os zIsAE(=Eu9!=S*wJ|6UEIycLYpe-@0?edgzXO|JdFSGuu|N9r7F1S*{uR-0DZIkwWh zkGxo{C&YCeof6Clo?3EKX2q1@34zF~Uo@#}2dZl`RlOWc*%gfZqdpi}T<@#uf2(xk zqfq=F_QGQ8eb_MW7|))`}lKOX(*>J8Dkmyb8%FWnSfvSfX<+vKsv;ny!WR(-x8 z`bM8c#%=k}Mt@j8&j`Fb#ptpy+31;-rS~d56kXC`jPchC78o1m-C^8t+ccxiT~9{u zDszl;OSc&RUfao-UiDJ+{`{zMI&KK}t^48?wR=uMqr&Tg=r#y5z+ z3q}rp8N_?4zgX86&FHvyh$rj2qnA9l)i|)Y#@P760e$~z8;mY%7aKPXzSMa8v(3it zH&+_Zd{G_^K6GyM{r5K;Syw!5lwEs`p%z?jEGz77Z1{4Kap<)tjI%24HlCV(i}Cmf z&hX4zjo|)eM)~}par4L9jBhU7XgqrB#nHS^h8PEKKP~$Gw%d%H-UE!bYfFsU^PV;a zwEiG^*{ntS%gZMj%L2oV;uY(Sx{sF_KkVudWl{$lZP#3D^c5EyvmP2`tSKLFRH!!? ze;-q8WZ06?I_WRb1EG6KkIjF%OjS*drwij*cKp#bXz_Qu^II;7JF1o>UoFj?M8jLe zdurk$D}E&X9KCO|TC}L27GAyTHEX0&Jnj_zVm;~P;&+GXHKiXX^?yHKFMfDs;yv#b z>uW~OZL__$RPXZqIm0^Lm8r{bypjIOdn5F^*%SXhuuZ0Zvbgq^b@Ph!Z4b;WXUazF zA3U(A*YZe--m3S!TWZrz*T1}T{Gi&KM(P!3=5)+^zErn5{rtwoE6eoxd#~fhOexUk zUofvK5AU$BVY6$u0MLk+Oa>KldP9tR9IH84cFiK^u_nD9@s~} z?v6!+vYr{ES7fW#>d6KAeJS3ANw{_`{-`1|PORIp^CM!}WU}ec{|Eo*SZ< z{-fuXvY+$yk7_TOY{ZY$Z=LtZ^b2kD&E`MhMyP~CkgO`o&3O#jb4 zSrY?$M(F=27k)dDWQBO9tz2)~!Bxh{@H9o_usi_>iu5T=DbMp;<-xbC)mt ztH;xnqg+EJ|6^j~?^oi!!;`33e5Jywm?@4Ne*bx$byx?=5m@v_oW z^oju;AGqta!Fu{4>HM*`W$4SFxUt6uu2BEW=XUo|pO4lvuKnB64p(t{@U%I%U-9%v zeg1U^c5V1+q<*gbMdjNYdgyV>F1mBSGD^2je(mE#OVroDwx!p-ndN$D`omvW4wQ7` z$MjRadZA2jy=35Hr*9MVgx9A(P*)h$)6=Sx#a3nd6K_0oI54ii9(e8P7vJkysQ+ic zB`ZRAmFk;Td~?aaFAR z6>ork>dhN}SeIF@t{n3?4UnEA$^)kSk)qm<1N)EiJ>;I^%2>-o&lwNatN&foDLv-u&Id5F`#7MpSw3-h;VEgLc z)z1}vGfvY#W7GQoys1EcHRECB(TB_QOD^if+^`~F7x!#FwRG?pz4x@|7j64@o__Vf zN-ycAQF`Uly9$Qw8=z-iy6h}|k)pRhaP>v!O~C7&#TQl8gbyZoqx1tGkFH-lWP~oy{OjvIHx=rupDH=~(u0F^F~u9d`I}Nb<^JK@#Ojgy z>)gEi{{1yP;;{Mej=Hu?zozf1VAlzy`a>6l2Hdv2SidoOK<}!{qWT2~UhnbUvNFBz zmRX-}(8ubvOSj#;H9A_)>U!(+=X*=~SLb%SZjE?`K4ss-{q7#h>c(X~7rg#0M~rP2 zt3t7^2VWZ-jcC!r*8fZFT7GL20(NX5V6))W@D}mvo8uzyEGH)Z>T0#f#Qm+qTaNxv z{9vmUsC_DZYNnNkqRZDPB8O094MEYn6N=<-P{aq@){hB2hNYhL258`IptFr4`&tyK z^p;mNT6+@aVXl-JF$uNC^E}Ybf#BI6PKX~mZC@v4o&U&3>G4{2Kf>i z#k7AGMH0QxN@Z@(sFrk3=k*JnG+C?fRPpTwakvV#w%_M+EZL;nam!RvEdorUFg z@1W@ZS+J}6Fc!K!gd%96=n)(f2nAox=|%5Rx2{4FA9VEie`CS_AW$zJ3nvY0$q&;A zEpjN8M4%fy@G$Z06TM6m2}C^AeG9dAd`=~fhpM99uJqlhDej1Y@@(hJt@ z=x%96DEeH9qJ1kAU8%b5enyeJ07aXpj`wVSe{S%*HLeqU+A-@m=XYn~>((_y}V zo4K=t2`TmXjL-vdku7Uty8Xqvw4~d#!#$)xYT_)r(w%08MSOT8JFf8~QVq+^zFvfn zLp2^C<6v5D_RSHzD2|Q(XFeJhkNpToAlx3i8$Vi*9VjBx^*1&D@q5#K{;$59ZaDUL z(6K#Kb^n*B+3ky_(sgOWvA=_i?V*Yv{i)N`D?6tT#0fZscC_#llx9cxFP2Zeuv62s zj(y*^$xdpp|6njyPug(oL|M~4{v1-%4aZK@#P)=6kpzEu76{XFv!Plb+!h-X8)9I? zO7nmi2q%TxhT~FW-Xgm#KK#`+cLJ5v6%BijBVMz^@S(A^^t8j>|M0Jn{fXbuiu~yx zJc^vSuk!rfuj~AuKa>;sgFizO`Ln)-@yCBb;!pf2L*$SCU47(F`&D@4g#2(ca$ERO4)lw9CEp|^wjpz($X`_`ZwhHsdXLu(Vr3>8jVIr0`zxAG&(nG zOrmX*qC+P6+YBQb9YIS&qS0X_u^y9HnL%6f4P4dIbMl^4p+StFpaDUu0YD}{V zv`Pm{QDf$5jmJmLBljQEa{Fm(!C17L^Q`G(XITY98`=o@5?U$y@E ztiNY`iGW+@#p*lM@1zE$H0duo*5I77splTs=4f;(o!_t7`N#D)Rl4DP|E5LrsK3X> z`fEt3ztvZgUq62}T1>(zm^%AdJ!4}7|FAHkp=Cq!8ugbyyyg1)_h-#Au7SS|{6p8o z_=ii)ZMgGfYLCyq8Gio{$F8RRmru1UEQMW+o#zw`oox?o%E4&V27UeW=jKI^w6D^? z2pItxX6(MCHhr1J0BOvU-SFZ z`J%sPObJes=f&*d7x+kRGv+yk!>g=mcqz7t9ZaJ0wPxoZ$4B4(8_$PY&X|Ie)O-}= zKMbQmUoH0cxM|w?Rx_PPi~aN2X6HB8w@*#~e7~jSk484|p}_a2*qRR~jz1^?fBvGs z6s5i~s;ucF`Qm2xE~EDP%%Kn!jZTZLj~wQA@^Sr5TMMSnaq`Cb4q$TZct6BNqZh>1 z8_z$^|5C5`=a0DHSYH6XH}qdLI*MvSjT_m>C!74ePyQo~`i1LLjEr5MPC-{708WY; za~q-<|1e N&=I*p3I2Bk{{vauNN{-1Y}OG1cWt)KGy2cPrKKIfeGd6#o$ z?pPWf#s(9D)k(okNhQG=f6&+7=c|f8b(i0VKPuvYcpw2t1d@OjKmce7Bm=F06reSb z3bX;*0_^}?&({G+1A;(0&=Cj$89*nXGmr^%0lEU+fbKvR&;!T@a)4YQ3|s`{0T%<8 z0G9&!Km_Os^a2=w1vr2Q1V98NKn4^*1$qO0fWE+Gz~w+cKm($H4)g~KfI?sZFc2sL z1_8yu6+j6v7#IRv36ugufnmUKU<5D{7zK<5#sFi1alm+B0&o>@HE<0u5ikG~uz;YC z)bpx8pG4bWQZOkqlt4Q|q#1N;UZXW^uxqV&h#J4YMdw9}CTe-1)XJO9-DOJGs!Cgb zbWG1x%iI?}s<^zms;uCW`-f~?Ic4vZr?1|&YU+T;-k!dF&Gh{re{hhxePIuL8 znEBGw-VYqyIJ@}qaa)gUp8N8r^PfMqt%6h@UeYo%7?&aXlGYbJ92In)FOMCjF9bNw1_+(&t3L1klI@!SZu7NSo_5Xy+v~tYP!zY1FpP zBfI#wB__4ex~CKvnMLdKOa9ikG-vP#=c+LePn+=J!ijmy?a6D)Hf@>u@pI+WA-?d& zHVrCC^;B}r^f^6`EDfu9VS*ov z`CA8kaX>tf03-rQKnoxMv;>lYRzM2S8b}4&0BwPGKzpDAkOl;Sbf6;;0y2P3KxZHm z=mK;Fx&hsRET9LF4dej1Kp40P$OA40E&(nD@_`7@6X*pn01I#c4+ww=NPr9|fC}^m z`T%`_%Ye&)et-r<0UhWM6aa<50AL_c1PlU-fh&L#U@$NQxDqG@h62NY;lK!BBrpmX z4U7TC0^@-3zy#nb;A-F+U?O0&4)i58(jWN*`fJW%1Y$cwWJ2B_97i)-vFk!PkefSY z1``@N5J{*b>83X654Q|I)iL~T+ssfiRs4a9H%nTIZ)ZJ975pMNOME4+5>JVv#82WT z@sc=6d?YRs4~c`sKjI$ojyOkrBd!t8h-1Vr;ui6WI7NISE)kEwp@iPPmVRH~mMs!I z<`8R$F~k;P3bBM3LhK-B5G#lg#0Fvlv49vr+K1-*POQ_O=T>O(eAg0)5u%Z* ziAJQAK(Q3?f};+%@47Uupw%6(z1%ANVs>WGmyw&~%Ydt!F=J{SyQZd>hY$DY8#?zQ z88{UG;_5n-3x=EC&kq+#r}%M1###TInfSf;7av}md-i6n>1IuOV{ZDC@RK=pcWt;- zGcH{6<+VJ!KNN`Z0%u()fBim_8BD47*GYBvuQRD5>FF;f zhj;c2SBEoysG7~#bwcp=#rSQ#M<_}e%S)Hzq3$FB+1k=16{!YI@>G};lu63^O(Yw2_ zuqQHGmO~*^p-{n7AyL7h2&fBNbt%_aW?|zK#f9jV`N2hv2SH$3a_qwD!ceQ&GET)U zFRSfa9~2Tgx8 ziwCG~sUPWncvs(sE=rg(YsU0CuCGi;Ul|`h);qjeC9eNq4SuL^R{T)k@Lu0|-QsnI z*Eb?{i`QR5tb;UkP@^tIYtc0mgCiOZE}*X;tQW>SXQNj|VnUm?Tz@gFb>yU*ZyI~g z%=nITv+udJe8r;SWw=MUMr-JU*3<`0qT1>B^!MY!_w@_Ee;JYF2XESJXEZbQ@B+qF zF9LgkeZYR826zcL0K5$R5qJf76?hFe2)qs)0^R@)18)Lv0e=FH0B-~D0DlIK0)GMC z1^x;g1C9gl0e=JD2R;D)4txll06qdv0v`jP0G|S%0iOf4z!$)mz(0Ufz(0YnfUkkm zz&F6Rz`uYqz`ud-fbRjCJMaT>Ks=BDBmzl53m^cr1d@SPKnlk0(1qs0o{Qtpa+l**CAL|h^s5r>FB#2w-dafbLpTp^wiM~EN94dMlHg7`pOARZ70NdKgJ z(mUy#^i8@ZJ(G?}zoc8zE9sQ?7C zqHgBl(3RoUCD2X7J=4#vL9@ES%)^0{vx#wa(y4Coq21wIMr2fVT=XIL&M3&zH!klv z7E*bpc*%Ikz$2sHx(c#(xp~P%$Rl&7-)KT^H0K|(A*+gS2~UEo;8x6^3^}pW+E=fI zEQ;IIV>)E$v)|7xhxEO*`@l@d6VJZVbq?gghu)q!4|4mRAMCpxvhw;bIxT=KH~u~S zM#wRPTE4IdQe%T1Z-UIr$hvkZS~2?%_d+TY7qwai87NtH&1%S6e%0^RKFLs}J3-ZX(SNgsUx$DKZ ztNskRaq|c7{{^yYHO$X`B&JlINQ zKZD%X?{s~$BSlQyQA+=x4e)Aj1y#HBr@fparla?*~4)XMn zRj>Oxp)W0Xx-OORUkr=CzC z^8$O4`am}4xdXACp+L-h;2E3QdI3H8GP7YCq2=t!)0|nejcMg)=Ye*#2|ssD_|P!S zApGdcH|s*`la0EWW$!V^k#C}&2dQ{{tmio<{^mN*V=&Wh^E^dJUFUfSU*glAXE=EH zpyv^`SD)}aLG@JMe$N9ap&6d#2N=)dwZj8FOV6vm#eXP%2o<-NRc*L{h zZErm1S@5a{-tsKBY{qAv#g_I-@;welDlNKt7TWy{(X-6j^@BZ&JTl$!EOA%wg`Nd& zOuE;zysAT+J&T*T{zcEyil!g+ER5-W%CoFcQi@6zb>huz&yo(V>*HC__G!aC%c)kc z^(>~m1BP#dJHcOT<0-cIb7*6Iw$CPt(eutxl(>XkFi&eLX$#_~R!< zN3$U3Cv@pYPtEc&zgU+C`4+!7As_PQEAF2}PsuoI+5vj>_`n#TXFxU{*a$NI;}N5{aUj-T24esW&r`OQazN5nKJv||BFS$3@tAFNo^Xci{a-r}q z^z^20NrvXt-}-btJ6-^SS+M^pVr zEnBc70Wx7jX&0*h6VK!Cug5@o%|Ka)zW2Sss2Sn+U<-&e&u}7mFmA?;nRzYAcx(1=lk^Z)OYp4CrE6W zvmr?JpKxbIMGMH}hf4ko{x=)PV&rQyzl}I{{#IP(LXAk)ENpNi3FJo3hOljI!mrK` ze=-e@O`HfrrF@F*B6EXh~^9wP55c2~u`;Xau%-&;m z9<%S5UB~P>X2&u6joEF?USoC|v(IDob{VtBm>tIKFJ^Z!dyCmw%)VlF6|<+99mVV? zW;Zc=iP=faK4Nwevxk@+#Q49fp7$}n$9Nv&cZ}CDKF4?*<8O?&^?Yq6dc|C2BbFk1 zoxc?<{mG-Q+i&$Z@UAuS?)=ELIS^jE)E|ELMq*&SdjDgcnq}aw%Y)1NG>pT#Rg;6Z zZQHdOg^dWD%tZu65F&;nsu9j+Y>Bm0)?nQne_kaki?S-Q5m6AGh@d)ZL}d&i!nihX z$`;GWtl%%L9my%OEL)rqkyIYnR25uKuq0evkOj*TWKrShwt{K6vLL$=Su$~)EU*z( zcEpHmvz%!#hV8fl9VaQAWwQQkTM`XiPB0>di>sT4%}Wd;8H{B5D{A++uEom+4v}5X z!sS#uVsNG!ku8^zO~ICU(V@$67Ux=q!$x?96C;9P7!k$dQ7D$-3cQVmDRi97%C0G^ zPQU* zEV`V@2sY2je8e%JFTpa+h$3=aMB!K{(>7#>V@PSLZ7ZzBMR=LVT{+2&7>X%VlSNyV zac@q@(kcy6VQdy%E4U6cg|zu4xF2WXXac$vOUtO3`s;#pO^J z>=bOnjhMFM;4F!EMbqX)&heL~skR}pN>+!7HhNBAR0c=!ZiG_=*I*^lQH0#>j4C+} zs;EXx$K<1@3;2lQvM>zEa4kk=MALD)L$4ew3FuCTu{nQ5frNfln1}>ButnBn6uIzp z?RW!~Mkgx~gB8)QvSUR|7G#5NIZ5Ea7@j3xLr%%nD#0JscF{lsh6ptY4*H*Qz(|I%P2N&O zHuqSSB*}^);3!jJ(Cdr~yE0`VVmpRw3aTpdmXwoKmM`(9YC?sKX`BAqBIwdEWURqH z;TsHIQnJdbEXEYT6)xg%E`#o8g@|ffILtwVM0AK`@Hzg{#pSb2+m3K57ziiApkrYH zjN!V9WTPG~>ZM8wOqPe;IgAOrW67RGxEa-SM8;%oL$Py=6O|^zqasFx<;lwk5(Bp& z@ezWcYB^;09DCj3kac$faOQs-#b0RuO08dpjv4&A3!E{uIkKjBft11B1 zXLU)DRK@ha6aXD_x0gDwUPEEf^OlS5vKVO6Uc4=j7Zeu$3s*M$6*&qEmm%2bhv?dn zV50%1xGpb;l?|B_7~bJ?Ljgx}{okda-}&gG#jW}(@FQ@6oW=8^=y2-Nwk4eGqQ4CA z*o1iqj>1RO=*E17lNru&RCp2CPrl_!FaXd+GR0`1h=rC-Nq`>{B#A+D7%s6ilQ$eg zwj9p?@fg&ahoePx!3+*}f*(>{BR7;LONzmAi}{Hbi+h4{5!}IsKXDjAHdKa}3zEF ztAr<$(GW1fRG<|T#8DkkkrQQ6k|c+>-NL{G$23)igMa4WTEJt)jVQcgM0TGJS#aWv z!4!_^?!a7K$KfLDCbxxw8GLjjl!O{79HNtLvbmuIM1?&8fsd?fHv%qDgl)N^h42K; z@{Gz$a&D*ue$5eWI7(O{ZpNuF2Y)0FEfj58FjQMvIx*iwEPysZ0>d=Am!S0qx+jA8 z$8wy-Fb?ZvZJ!EWqMdNaqQrscDg)1EB0%s8y1Oy}<4CN|Qw_o4IR>tPGgg(rK48fv z1A~!x)lfNGW)rI`d7j}Vn}a9iSrzJI-H5?E=yR605Q+s}Ruiikmor6&1=kR1M1O4o zy4;dsKdvpXstsyexmA^p%}K7Ph!J!b!lMC8hcAblm*8k6&Q%4WFp%%shACJo;)5jl zD~b>X6lgjk!KuPvOkT1IH=>nr$_i`4)Krx|TfpO3gyn>*tvS6;t$Rg|cZAD^O z#2Gar$})&7qp9GC5>Yuuav8-%H_7DRMMFXwfP~xyL|z&Q+lCh@|YUN>KR4@X&U|Qc*V+M(bGcAz-|K3`U5E3YaLEii+53s1X;L zsf5HvaQQ)j1V=g^YD?W{Mo;Gl zq$({lS$Kb_)l3hy3L(o-RMt73mGx|nAkjg7$%0;v`cRE+!nwm^IxtE^ zf5GID99p0{uS8elO^xGkMmK7_!GiL*2fCCZh!GvmK=+a*!An`H>7ve+oFoY>4-e$R zFhC|Ah9iOnXe-iC8)=_gIEE2$ya9KFU|_ka)yV}oiV{hl1M!g0Azx0c&c{fE5kxpn z$KoHVPJJ*%fj@>C$@S7mg!fnH8#ar|yD&S|*&QlE-UYM9br~0-A2Gx-m$t2RL<4>b z5z$3bf||o6s)#K}oh-|dRhDC|!bSNk2R=*4P8B?^U)8zWkn?4i>#=D3;?YW-~KPk}ogk#|#KVIX`B+ z1qXhiW}!Cj$8UR(J&o7e1AL3Ov_b6sPT zRR`PDKH!ZyIV3FqYGDixD@Djbe{irHglLS{5z$>-73o}JZ9bB541F=NgLKhf znh#AVCgP~gh(gw5DcEcKD^A75A$N6YA|S#U=vPE;C>7axMC2HT6EL+QOCog|<8VlX zF~5QA9bO(W4ZVRh5MCJ>nn`0A?>N!qFlB-S4UQ2_z>oz^!1bU{jOH*6B1#@fWEopr z24glJBLs|~Frj2&#A+iq#E4dsFedkuWt$ir(-eS24pTq|g=qkA%i?63$wAuZkwkO^ z@=<2Q^6rbl5?v2L3pp34z#u{LuIHGbEKySQe7S}3f(gGzBt|}joDC6z?u*99t_O6G?35l_wT%`Rt4!1O128@s}pM>Pa5=5`Ps1++Y z=rCv;tz_Z3!3iWFyn&&sWf&MD)AitXSp-_7);1!yU|U)lzCuqX)v%0^dbkh9Azpw3|o++g92sD9X zOtcL44AYNr~&tujX?%2gNiJ;IB8Cf6ZeK8+{e9FbY^PXMcRnDA7 zn)RlB>r_jGY0xe5pC94WVOT%;`Nn~hjS`yHl+ZMJzOkrB`iiXZeJA|k?Jps-`FShe zY-Ym*g%bP>g?@Gznvj6T=_Mlz>B;fHVTFZ5;;}HSumD{;AXwb^m0Vw5Jhq1t=x7|1 zT}emcCo6HC?QvCOHkSFTa+ge5a`XL*Z;w`IlnyAXZg1fI+s_&`Kf#$kXIfo8ci5kP z)F0mYsXzS5Ctg0+=-U6e()D#bukNUHK~QO2Y5Uwt_c;6Ju$CCtCdV!dU3|%H-NzQ# zE0TQS>Mt7AwKdhXiK-6y(+~K=57+v`Z+-5m>OWPw{+%8C>h^)F_!uOA8&prF~ z^8Bau?3Av$y0%d7{CQHplKCs5BL-cmfA_(&`cq2^^{-psrvFdzUOmU!sNa3(gZjzA z*?Nz?Pw2_{EA;i>KB0I2-Cer)`ON6}uV0Qjt)A9{XO`+!Gv18;&li(*@zxvkYcGC4 zuX=fnZqB(+&rN+Pdf9EP{^{_1J=5HyCq4CAbik6m(U)JjC_4X!G5U!eqx3C(1ikwW z^P@K`-J)N##?yPczJFsi?@!Rgv%QHskx9Xd=w~8 zA3mv%{L^gxy3jjOdFRtQGjXndbDSIf?Qb5^b0#VMUfWuwZGZk9?exX1^_NN_`kr2U z^i2;uuHSe3d41Mf^ZNaDLxujxogYUhe|w#7uK$BR`JR<}uk%_syTBr!80!ebLO&zZ&ISe)XzH^o~b2=*I%1_1-TiI+t;|{=ujR^z%YMD1mMn zN}!YI72Sq4Xz?ez^An5X8q|{NsildNsC!L#&7!#Q`)5c$4fkzQi)QuH%!|U$e1CK9 zMQ*9KWsCaihK$kLUtil+vi0c_?c2BF-z%F?q*ZOa_MbkfzxKjMS(QseqqIj$F77b3 zRjIZwq<-}Hd`%mF@NfJFw?s7g$+9S4I9fZ=yY$QD1^L>9nXkX{a*tx|;}az{nMGG> zX|Iaz;XT>f9W%#m@AFWx7QF4m15QZR##krq>!1BwzrNOAM~v?`Lc94?#*Qt29H0#> zzNF@vLnE}++uA?5=kJ0xA*aW?Z?zqvZ5Vl=)fr}}HvhWCC#Hy4+Ubh!8mt=pL| zdnA<=Y0JhvGvf6kLHk7b^_tsX%F?cA_s!ANS4uSNgPiAYdSZxny7khK)oQq=u3f!r z^v+W4Z#}dnOYY>flQok@{Qc`z+L=B2lX6c+n;d%n=JCHDs(n;++rG@A5nA5TA#1Mw zx{LPV@8mg$-YwR$59Vf!xu;O;v;EAorR~OO`vwoHSiFCPmK%QaZ`WNvK+Bt1IqkWd zMXhsH&m-%v)wRU;9!S2ac(`_tIW_*;oB`U_(FgbMyIIm&Z+ZRgiu@khqt=u66b>4y znU~DJ=z)n*ZFKLB(+;>L+M6xekK}(1)8u9IpIb9!xHh>c{?u#ND%wq_2KH7H3bhpW zWafLhS8A_p%KrXXn^D^MJ?rjVq72n0cH8^sn!PXe8*#(Ot+sUNt$j1_*N>09NYR#L z?-%>*7_L1w_VD))K3uBJncew!zwaK^uIbt84{wfQwS#$M<5C_QsMY+t$M|^%3$^gb zJ@czuo$9CkZI9dQmSXMCf8Vk7`0fGP*AGqzjp)%w^J5P2h2IX=+P;>2{pU+cwaadp zx_jkQ12o?)b6)K*u9tS~r4Mz!xVNUh+hWVnQF+?5Grxc2yA6ff@0Ofglr){w9-UM6 z>{U;W(mGtae}1P4L$u%TJoeuGkM+~mKL2jtFAqeue@wc2eCazQwRMwkj1QeE)$Uk* z-_!jT7io8W+GBOfA4WCn!2>a+gaR@8!IL+$^#XeIcUQo5B*%VzXwR5)_L~Oyxj0@E zuBwU)Z@4KA@3{Qv$~Wsm&VNz(_Kzz&W$x~)y*_vE@RsGnwQ&b0t;jwxSWEFQ&&?mN zYu_(jF!|$0hG{+S?%B1^<}OlV7WV5oBG_b>{qWV z*0$t5+H3PxO?$B08-=f>muSB(>HWdbjbpT;TL%v--WS$(?pyg@KYOrtX7u7s<8Lq0 z)_qoUS#))g_Q;Nc_`7~HR9i6gjz9U625RMh*)6?$Jgl8QG3xW>zLDCykvj%;E|)dm zi~Lpd*ACMZKXaG-$I;rMPY0=c{X@0IA8ovQ%Lzq$Uz+^!9Um2I?>zbVJENSa)@^U; z&g2OLwP%tZ&QD#hYjZAd`z+sWl$JWLxB5ouFm23^M;_n$*ih}e=e~OKJx$WuhE6UG zti3|pde6?xNtce+W|d~YtnVD5Eu8Y(BVF<|O?;ruqAoWM)#OJW?>ytUuFXu~EH`g_w?dkxjL-eg{v?_8;^yKgLB$`Z7x ztqwl;=715}l(8G5mD7i6#n&A8ce^Ks6JwivOCu2L!cZWl1VoEww*FsY*ReH?2-v2M zfK7r|-D|?9R>y@mtt2M?=xQ~~#1k!on;L!&eky(%0AJ{C6j0-{9wR?!kK24istr zjIyrtv5+F6=k}q8?eygGZfv1`_t0s)!XdF{zTs*e_9T`XWUgq(b1omm-H1Dl4x)h zz%#mz9sHBiGO&`dz|ZwPfrU?5C%i92!oL)5_ndWXyih6hu8rV( zYQ4j4VLR63X-54g6m}%1FG&vH{%~CQxwYW=IW;`5#?5m3RDAH;v!CVo)?a%i8kR}A z&6tRy1HHK1`cV|^wxUS<2t}$#I1x)d=(XV-(sl=WkvRAzid^dNR=+~gjdld`e%vZsx5KhuO6*e?GM+b zIn`#I=`8v}NwcgnXPOZd@JWr_xcbjS)h##qE)hQaRDXbkgK4?R*F^9NI5v78@Tgln z_wyRQU9`ZId{aJhCectc68*e!G$IY=lRCVv?rMW33Q|Y?2q4haG zZj9}riZ}dzY3j1}nSF5rPN5ym{H~U{+T~8 z68@#Wa`E$j8{!xI(n9#>{;_=c7yGGq_=5ZzG<;!y@)!Q4e;O3N{lfhSB>ex$8z$lZ z-yRJ8Opj`Q?x!f>Ut%IW{EJTGhA+rWPx!(P=fl6;XexXG()#qjZ>CSXh6Cx#?ubJ! zmysDti4Gff<>=mZd48z6O)vVBqXVMR=um+Eu82nGMfFLvZBn%VByXFpN27yjsed#& zh$Ply5-T%kOQDV{TuPT2JhTu;4#F|f(X`&5)(Zq&Hd;ZKDV|h- z#O9U3Mu#OY4E=F-`3Ei%LAzf-Dt@o$(0=j8*4i$AZ!49up*fm1#)l0!zVUf47 zKTgC772Y3J4EG;5dPuoBdcdqP9Dchqu%c}2w2G}EhP!|3g)n_biyw>DLO*RLPME}iH>-u9*w14UR z|NZ`Sse$8WncM*8+gTOShmf!b zPMv+Op0Tlxe^?k?*RlZx_4+F;ZodBB{TX94TgTrz{-JAP{KKW@)!lhAwa4S%46px- zv8!?a6;dsWhQTgI&9?^*m~9Pc%)w~X0)4&n=M_ZHwy(^)2pItxX6(sJPsmhSvrg*})__Uu|;!d3^Njzy5rv<%%gdNhw4@{zKR6^wn&CkDaE@HJj)> zI>bAlX>xv3eS6gO&i7hM{%B|&9|n5<6k7}7#PJ6u;Llt1mZH=*dbv4$C^w|Zy+>1f zJ?2n|ibki!)`t%AI{Cc*rmX{~&an%|dJbT6?07H4MWffp*6Ytd&;L@dc;^qk{#;)G zzSs3%G&-DWLX8_*&nJugy+{7DjrxJ>Q;dvVpH4wnAOKE^>htQN7;US+zPIQLbuTw& yUN^^?9kI&GBgV{`Gv;-l>&%=rdB*haEZ>vq$zak2vu8i!f4(-z6-x0x8u))(_ \array_map(static fn ($i) => $faker->numberBetween(0, Consts::PHP_INT32_MAX), \range(1, \random_int(2, 10))), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertSame( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_list_of_strings() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(NestedColumn::list('list_of_strings', ListElement::string())); + + $faker = Factory::create(); + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'list_of_strings' => \array_map(static fn ($i) => $faker->text(10), \range(1, \random_int(2, 10))), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertSame( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } +} diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesReadingTest.php b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesReadingTest.php index 3b2df0569..5cc892499 100644 --- a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesReadingTest.php +++ b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesReadingTest.php @@ -407,7 +407,7 @@ public function test_reading_time_column() : void $count = 0; foreach ($file->values(['time']) as $row) { - $this->assertIsInt($row['time']); + $this->assertInstanceOf(\DateInterval::class, $row['time']); $count++; } $this->assertSame(100, $count); @@ -426,7 +426,7 @@ public function test_reading_time_nullable_column() : void foreach ($file->values(['time_nullable']) as $rowIndex => $row) { if ($rowIndex % 2 === 0) { - $this->assertIsInt($row['time_nullable']); + $this->assertInstanceOf(\DateInterval::class, $row['time_nullable']); } else { $this->assertNull($row['time_nullable']); } diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php new file mode 100644 index 000000000..f2e741402 --- /dev/null +++ b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php @@ -0,0 +1,372 @@ + (bool) $i % 2 == 0, + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertSame( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_bool_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_date_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::date('date')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'date' => \DateTimeImmutable::createFromMutable($faker->dateTimeThisYear)->setTime(0, 0, 0, 0), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_date_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_decimal_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::decimal('decimal')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'decimal' => \round($faker->randomFloat(5), 2), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_decimal_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_double_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::double('double')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'double' => $faker->randomFloat(), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_double_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_enum_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_float_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::float('float')); + + $inputData = \array_merge(...\array_map(static function (int $i) : array { + return [ + [ + 'float' => 10.25, + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_float_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_int32_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::int32('int32')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'int32' => $faker->numberBetween(0, Consts::PHP_INT32_MAX), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_int32_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_int64() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::int64('int64')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'int64' => $faker->numberBetween(0, Consts::PHP_INT64_MAX), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_int64_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_json_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::json('json')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'json' => \json_encode(['street' => $faker->streetName, 'city' => $faker->city, 'country' => $faker->country, 'zip' => $faker->postcode], JSON_THROW_ON_ERROR), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_json_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_string_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::string('string')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'string' => $faker->text(50), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_string_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_time_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::time('time')); + + $inputData = \array_merge(...\array_map(static function (int $i) : array { + return [ + [ + 'time' => (new \DateTimeImmutable('2023-01-01 00:00:00 UTC'))->diff(new \DateTimeImmutable('2023-01-01 15:45:00 UTC')), + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_time_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_timestamp_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::dateTime('dateTime')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'dateTime' => $faker->dateTimeThisYear, + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_timestamp_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } + + public function test_writing_uuid_column() : void + { + $path = \sys_get_temp_dir() . '/test-writer' . \uniqid('parquet-test-', true) . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::uuid('uuid')); + + $faker = Factory::create(); + + $inputData = \array_merge(...\array_map(static function (int $i) use ($faker) : array { + return [ + [ + 'uuid' => $faker->uuid, + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + $this->assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } + + public function test_writing_uuid_nullable_column() : void + { + $this->markTestSkipped('Not implemented yet'); + } +} diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/WriterTest.php b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/WriterTest.php deleted file mode 100644 index 9be263134..000000000 --- a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/WriterTest.php +++ /dev/null @@ -1,88 +0,0 @@ - $faker->boolean, - 'int32' => $faker->numberBetween(0, Consts::PHP_INT32_MAX), - 'int64' => $faker->numberBetween(0, PHP_INT_MAX), - 'float' => 10.25, - 'double' => $faker->randomFloat(), - 'decimal' => \round($faker->randomFloat(5), 2), - 'string' => $faker->text(50), - 'date' => \DateTimeImmutable::createFromMutable($faker->dateTime)->setTime(0, 0, 0, 0), - 'datetime' => \DateTimeImmutable::createFromMutable($faker->dateTime), - 'list_of_datetimes' => [ - \DateTimeImmutable::createFromMutable($faker->dateTime), - \DateTimeImmutable::createFromMutable($faker->dateTime), - \DateTimeImmutable::createFromMutable($faker->dateTime), - ], - 'map_of_ints' => [ - 'a' => $faker->numberBetween(0, Consts::PHP_INT32_MAX), - 'b' => $faker->numberBetween(0, Consts::PHP_INT32_MAX), - 'c' => $faker->numberBetween(0, Consts::PHP_INT32_MAX), - ], - 'list_of_strings' => \array_map(static fn (int $i) => $faker->text(50), \range(0, \random_int(1, 10))), - 'struct_flat' => [ - 'id' => $i, - 'name' => 'name_' . \str_pad((string) $i, 5, '0', STR_PAD_LEFT), - ], - ], - ]; - }, \range(1, 100)); - - $inputData = \array_merge(...$inputData); - - $writer->write($path, $schema, $inputData); - - $reader = new Reader(); - $file = $reader->read($path); - - $this->assertEquals( - $inputData, - \iterator_to_array($file->values()), - ); - } -}