Skip to content

Commit

Permalink
Merge pull request #1 from yuriyzubov/support_tiffstack
Browse files Browse the repository at this point in the history
Use dask scheduler to write tiff tiles in parallel to zarr array. Tiffstack and 3d tiff volumes are now both supported as inputs.
  • Loading branch information
yuriyzubov authored Jan 16, 2025
2 parents 5c300a4 + 641abdf commit fd03d4e
Show file tree
Hide file tree
Showing 8 changed files with 1,584 additions and 252 deletions.
1,232 changes: 1,232 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ imagecodecs = "^2024.1.1"
asciitree = "^0.3.3"
pyyaml = "^6.0.1"
black = "^24.10.0"
dask-jobqueue = "0.8.2"

[tool.poetry.group.dev.dependencies]
zarr = "^2.16.1"
Expand Down
172 changes: 0 additions & 172 deletions src/conversion_lib.py

This file was deleted.

9 changes: 0 additions & 9 deletions src/n5_attrs_template.json

This file was deleted.

71 changes: 0 additions & 71 deletions src/tif_to_zarr.py

This file was deleted.

90 changes: 90 additions & 0 deletions src/tiff_stack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from tifffile import imread
import numpy as np
import zarr
import os
from dask.distributed import Client, wait
import time
import dask.array as da
from natsort import natsorted
from glob import glob
from tiff_volume import TiffVolume


class TiffStack(TiffVolume):

def __init__(
self,
src_path: str,
axes: list[str],
scale: list[float],
translation: list[float],
units: list[str],
):
"""Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr.
Args:
input_filepath (str): path to source tiff file.
"""
self.src_path = src_path
self.stack_list = natsorted(glob(os.path.join(src_path, "*.tif*")))
probe_image_store = imread(
os.path.join(src_path, self.stack_list[0]), aszarr=True
)
probe_image_arr = da.from_zarr(probe_image_store)

self.dtype = probe_image_arr.dtype
self.shape = [len(self.stack_list)] + list(probe_image_arr.shape)

# metadata
self.zarr_metadata = {
"axes": axes,
"translation": translation,
"scale": scale,
"units": units,
}

def write_tile_slab_to_zarr(
self, chunk_num: int, zarray: zarr.Array, src_volume: list
):

# check if the slab is at the array boundary or not
if chunk_num + zarray.chunks[0] > zarray.shape[0]:
slab_thickness = zarray.shape[0] - chunk_num
else:
slab_thickness = zarray.chunks[0]

slab_shape = [slab_thickness] + list(zarray.shape[-2:])
np_slab = np.empty(slab_shape, zarray.dtype)

# combine tiles into a slab with thickness equal to the chunk size in z direction
for slab_index in np.arange(chunk_num, chunk_num + slab_thickness, 1):
try:
image_tile = imread(src_volume[slab_index])
except:
print(
f"Tiff tile with index {slab_index} is not present in tiff stack."
)
np_slab[slab_index - chunk_num, :, :] = image_tile

# write a tiff stack slab into a zarr array
zarray[chunk_num : chunk_num + zarray.chunks[0], :, :] = np_slab

# parallel writing of tiff stack into zarr array
def write_to_zarr(self, zarray: zarr.Array, client: Client):
chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0])
print(chunks_list)

start = time.time()
fut = client.map(
lambda v: self.write_tile_slab_to_zarr(v, zarray, self.stack_list),
chunks_list,
)
print(
f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
)

# wait for all the futures to complete
result = wait(fut)
print(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")

return 0
Loading

0 comments on commit fd03d4e

Please sign in to comment.