Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance for Rows: dropRight, partitionBy & sortBy #676

Merged
merged 2 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions phpbench.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/",
"src/core/etl/tests/Flow/ETL/Tests/Benchmark/"
],
"runner.progress": "dots",
"runner.retry_threshold": 5,
"runner.php_config": { "memory_limit": "1G" },
"runner.iterations": 3,
"storage.xml_storage_path": "var/phpbench"
}
18 changes: 5 additions & 13 deletions src/core/etl/src/Flow/ETL/Rows.php
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,7 @@ public function drop(int $size) : self

public function dropRight(int $size) : self
{
$rows = $this->rows;

for ($i = 0; $i < $size; $i++) {
\array_pop($rows);
}

return new self(...$rows);
return new self(...\array_slice($this->rows, 0, -$size));
}

/**
Expand Down Expand Up @@ -556,7 +550,7 @@ public function partitionBy(string|Reference $entry, string|Reference ...$entrie
* @var array<string, mixed> $partitionsData
*/
foreach ($cartesianProduct($partitions) as $partitionsData) {
$rows = $this->filter(function (Row $row) use ($partitionsData) : bool {
$rows = \array_filter($this->rows, function (Row $row) use ($partitionsData) : bool {
/**
* @var mixed $value
*/
Expand All @@ -569,8 +563,8 @@ public function partitionBy(string|Reference $entry, string|Reference ...$entrie
return true;
});

if ($rows->count()) {
$partitionedRows[] = new PartitionedRows($rows, ...Partition::fromArray($partitionsData));
if ($rows) {
$partitionedRows[] = new PartitionedRows(new self(...$rows), ...Partition::fromArray($partitionsData));
}
}

Expand Down Expand Up @@ -672,11 +666,9 @@ public function sortAscending(string|EntryReference $ref) : self
*/
public function sortBy(EntryReference ...$refs) : self
{
$sortBy = References::init(...$refs)->reverse();

$rows = $this;

foreach ($sortBy->all() as $ref) {
foreach (\array_reverse($refs) as $ref) {
$rows = $ref->sort() === SortOrder::ASC ? $rows->sortAscending($ref) : $rows->sortDescending($ref);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,72 +5,55 @@
use function Flow\ETL\DSL\array_to_rows;
use Faker\Factory;
use Flow\ETL\Row\Factory\NativeEntryFactory;
use PhpBench\Attributes\BeforeMethods;
use PhpBench\Attributes\Groups;
use PhpBench\Attributes\Iterations;
use PhpBench\Attributes\Revs;
use PhpBench\Attributes\ParamProviders;

#[Iterations(5)]
#[Groups(['building_blocks'])]
final class NativeEntryFactoryBench
{
private array $rowsArray = [];
#[ParamProviders('provideRows')]
public function bench_entry_factory(array $params) : void
{
array_to_rows($params['rows'], new NativeEntryFactory());
}

public function setUp() : void
public function provideRows() : \Generator
{
$faker = Factory::create();

$this->rowsArray = \array_map(
static fn (int $i) : array => [
'order_id' => $faker->uuid,
'created_at' => $faker->dateTimeThisYear->format(\DateTimeInterface::RFC3339),
'updated_at' => $faker->dateTimeThisMonth->format(\DateTimeInterface::RFC3339),
'cancelled_at' => ($i % 10) === 0 ? $faker->dateTimeThisMonth->format(\DateTimeInterface::RFC3339) : null,
'active' => !(($i % 20) === 0),
'total_price' => $faker->randomFloat(2, 0, 500),
'discount' => $faker->randomFloat(2, 0, 50),
'customer' => [
'name' => $faker->firstName,
'last_name' => $faker->lastName,
'email' => $faker->email,
],
'address' => [
'street' => $faker->streetAddress,
'city' => $faker->city,
'zip' => $faker->postcode,
'country' => $faker->country,
'location' => [
'lat' => $faker->latitude,
'lng' => $faker->longitude,
],
$callback = static fn (int $i) : array => [
'order_id' => $faker->uuid,
'created_at' => $faker->dateTimeThisYear->format(\DateTimeInterface::RFC3339),
'updated_at' => $faker->dateTimeThisMonth->format(\DateTimeInterface::RFC3339),
'cancelled_at' => ($i % 10) === 0 ? $faker->dateTimeThisMonth->format(\DateTimeInterface::RFC3339) : null,
'active' => !(($i % 20) === 0),
'total_price' => $faker->randomFloat(2, 0, 500),
'discount' => $faker->randomFloat(2, 0, 50),
'customer' => [
'name' => $faker->firstName,
'last_name' => $faker->lastName,
'email' => $faker->email,
],
'address' => [
'street' => $faker->streetAddress,
'city' => $faker->city,
'zip' => $faker->postcode,
'country' => $faker->country,
'location' => [
'lat' => $faker->latitude,
'lng' => $faker->longitude,
],
'notes' => \array_map(
static fn ($i) => $faker->sentence,
\range(1, $faker->numberBetween(1, 5))
),
],
\range(1, 10_000)
);
}
'notes' => \array_map(
static fn ($i) => $faker->sentence,
\range(1, $faker->numberBetween(1, 5))
),
];

#[BeforeMethods(['setUp'])]
#[Revs(5)]
public function bench_10k_rows() : void
{
array_to_rows($this->rowsArray, new NativeEntryFactory());
}
yield '10k' => ['rows' => \array_map($callback, \range(1, 10_000))];

#[BeforeMethods(['setUp'])]
#[Revs(5)]
public function bench_1k_rows() : void
{
array_to_rows(\array_slice($this->rowsArray, 0, 1_000), new NativeEntryFactory());
}
yield '5k' => ['rows' => \array_map($callback, \range(1, 5000))];

#[BeforeMethods(['setUp'])]
#[Revs(5)]
public function bench_5k_rows() : void
{
array_to_rows(\array_slice($this->rowsArray, 0, 5_000), new NativeEntryFactory());
yield '1k' => ['rows' => \array_map($callback, \range(1, 1000))];
}
}
34 changes: 8 additions & 26 deletions src/core/etl/tests/Flow/ETL/Tests/Benchmark/RowsBench.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@
use Flow\ETL\Row;
use Flow\ETL\Row\Entry\StringEntry;
use Flow\ETL\Rows;
use PhpBench\Attributes\BeforeMethods;
use PhpBench\Attributes\Groups;
use PhpBench\Attributes\Iterations;
use PhpBench\Attributes\Revs;

#[Iterations(5)]
#[BeforeMethods('setUp')]
#[Revs(2)]
#[Groups(['building_blocks'])]
final class RowsBench
{
private Rows $reducedRows;

private Rows $rows;

public function __construct()
public function setUp() : void
{
$this->rows = Rows::fromArray(
\array_merge(...\array_map(static fn () : array => [
Expand All @@ -39,71 +40,62 @@ public function __construct()
);
}

#[Revs(5)]
public function bench_chunk_10_on_10k() : void
{
foreach ($this->rows->chunks(10) as $chunk) {

}
}

#[Revs(5)]
public function bench_diff_left_1k_on_10k() : void
{
$this->rows->diffLeft($this->reducedRows);
}

#[Revs(5)]
public function bench_diff_right_1k_on_10k() : void
{
$this->rows->diffRight($this->reducedRows);
}

#[Revs(5)]
public function bench_drop_1k_on_10k() : void
{
$this->rows->drop(1000);
}

#[Revs(5)]
public function bench_drop_right_1k_on_10k() : void
{
$this->rows->dropRight(1000);
}

#[Revs(5)]
public function bench_entries_on_10k() : void
{
foreach ($this->rows->entries() as $entries) {

}
}

#[Revs(5)]
public function bench_filter_on_10k() : void
{
$this->rows->filter(fn (Row $row) : bool => $row->valueOf('random') === true);
}

#[Revs(5)]
public function bench_find_on_10k() : void
{
$this->rows->find(fn (Row $row) : bool => $row->valueOf('random') === true);
}

#[Revs(5)]
#[Revs(10)]
public function bench_find_one_on_10k() : void
{
$this->rows->findOne(fn (Row $row) : bool => $row->valueOf('random') === true);
}

#[Revs(5)]
#[Revs(10)]
public function bench_first_on_10k() : void
{
$this->rows->first();
}

#[Revs(5)]
public function bench_flat_map_on_1k() : void
{
$this->reducedRows->flatMap(fn (Row $row) : array => [
Expand All @@ -112,73 +104,63 @@ public function bench_flat_map_on_1k() : void
]);
}

#[Revs(5)]
public function bench_map_on_10k() : void
{
$this->rows->map(fn (Row $row) : Row => $row->rename('random', 'whatever'));
}

#[Revs(5)]
public function bench_merge_1k_on_10k() : void
{
$this->rows->merge($this->reducedRows);
}

#[Revs(5)]
public function bench_partition_by_on_10k() : void
{
$this->rows->partitionBy(ref('from'));
}

#[Revs(5)]
public function bench_remove_on_10k() : void
{
$this->rows->remove(1001);
}

#[Revs(5)]
public function bench_sort_asc_on_1k() : void
{
$this->reducedRows->sortAscending(ref('random'));
}

#[Revs(5)]
public function bench_sort_by_on_1k() : void
{
$this->reducedRows->sortBy(ref('random'));
}

#[Revs(5)]
public function bench_sort_desc_on_1k() : void
{
$this->reducedRows->sortDescending(ref('random'));
}

#[Revs(5)]
public function bench_sort_entries_on_1k() : void
{
$this->reducedRows->sortEntries();
}

#[Revs(5)]
public function bench_sort_on_1k() : void
{
$this->reducedRows->sort(fn (Row $row, Row $nextRow) : int => $row->valueOf('random') <=> $nextRow->valueOf('random'));
}

#[Revs(5)]
#[Revs(10)]
public function bench_take_1k_on_10k() : void
{
$this->rows->take(1000);
}

#[Revs(5)]
#[Revs(10)]
public function bench_take_right_1k_on_10k() : void
{
$this->rows->takeRight(1000);
}

#[Revs(5)]
public function bench_unique_on_1k() : void
{
$this->rows->unique();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@
use Flow\ETL\FlowContext;
use Flow\ETL\Rows;
use Flow\ETL\Transformer\RenameEntryTransformer;
use PhpBench\Attributes\BeforeMethods;
use PhpBench\Attributes\Groups;
use PhpBench\Attributes\Iterations;
use PhpBench\Attributes\Revs;

#[Iterations(5)]
#[BeforeMethods('setUp')]
#[Groups(['transformer'])]
final class RenameEntryTransformerBench
{
private FlowContext $context;

private Rows $rows;

public function __construct()
public function setUp() : void
{
$this->rows = Rows::fromArray(
\array_merge(...\array_map(static function () : array {
Expand All @@ -34,7 +33,6 @@ public function __construct()
$this->context = new FlowContext(Config::default());
}

#[Revs(5)]
public function bench_transform_10k_rows() : void
{
(new RenameEntryTransformer('from', 'to'))->transform($this->rows, $this->context);
Expand Down