diff --git a/.typos.toml b/.typos.toml
index 694ed501..1c3a4859 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -1,5 +1,14 @@
 [files]
-extend-exclude = ["*.csv", "sign-language-detection-yolov5/*", "orbit-user-analysis/steps/report.py", "customer-satisfaction/pipelines/deployment_pipeline.py", "customer-satisfaction/streamlit_app.py", "nba-pipeline/Building and Using An MLOPs Stack With ZenML.ipynb", "customer-satisfaction/tests/data_test.py"]
+extend-exclude = [
+    "*.csv",
+    "sign-language-detection-yolov5/*",
+    "orbit-user-analysis/steps/report.py",
+    "customer-satisfaction/pipelines/deployment_pipeline.py",
+    "customer-satisfaction/streamlit_app.py",
+    "nba-pipeline/Building and Using An MLOPs Stack With ZenML.ipynb",
+    "customer-satisfaction/tests/data_test.py",
+    "end-to-end-computer-vision/**/*.ipynb"
+]
 
 [default.extend-identifiers]
 #  HashiCorp = "HashiCorp"
@@ -14,6 +23,9 @@ lenght = "lenght"
 preprocesser = "preprocesser"
 Preprocesser = "Preprocesser"
 Implicitly = "Implicitly"
+fo = "fo"
+mapp = "mapp"
+polution = "polution"
 
 [default]
 locale = "en-us"
diff --git a/end-to-end-computer-vision/.gitignore b/end-to-end-computer-vision/.gitignore
index e76e4bbf..15a9d546 100644
--- a/end-to-end-computer-vision/.gitignore
+++ b/end-to-end-computer-vision/.gitignore
@@ -3,3 +3,4 @@
 data/
 runs/
 **/tmp*
+runs_dir
diff --git a/end-to-end-computer-vision/README.md b/end-to-end-computer-vision/README.md
index 658da4bd..f936a8e8 100644
--- a/end-to-end-computer-vision/README.md
+++ b/end-to-end-computer-vision/README.md
@@ -4,6 +4,8 @@ This is a project that demonstrates an end-to-end computer vision pipeline using
 ZenML. The pipeline is designed to be modular and flexible, allowing for easy
 experimentation and extension.
 
+![diagram.png](_assets/diagram.png)
+
 The project showcases the full lifecycle of a computer vision project, from data
 collection and preprocessing to model training and evaluation. The pipeline also
 incorporates a human-in-the-loop (HITL) component, where human annotators can
@@ -12,36 +14,230 @@ label images to improve the model's performance, as well as feedback using
 
 The project uses the [Ship Detection
 dataset](https://huggingface.co/datasets/datadrivenscience/ship-detection) from
-[DataDrivenScience](https://datadrivenscience.com/) on the Hugging Face Hub, which contains images of ships in
-satellite imagery. The goal is to train a model to detect ships in the images.
-Note that this isn't something that our YOLOv8 model is particularly good at out
-of the box, so it serves as a good example of how to build a pipeline that can
-be extended to other use cases.
+[DataDrivenScience](https://datadrivenscience.com/) on the Hugging Face Hub,
+which contains images of ships in satellite imagery. The goal is to train a
+model to detect ships in the images. Note that this isn't something that our
+YOLOv8 model is particularly good at out of the box, so it serves as a good
+example of how to build a pipeline that can be extended to other use cases.
+
+This project needs some infrastructure and tool setup to work. Here is a list of
+things that you'll need to do.
 
-## Run this pipeline
+## ZenML
 
-### Setup
+We recommend using our [ZenML Cloud offering](https://cloud.zenml.io/) to get a
+deployed instance of zenml:
 
-You'll need to run the following:
+### Set up your environment
 
 ```bash
-zenml integration install label_studio torch gcp mlflow -y
 pip install -r requirements.txt
-pip uninstall wandb
+zenml integration install label_studio torch gcp mlflow -y
+pip uninstall wandb  # This comes in automatically
+```
+
+And to use the Albumentations and annotation plugins in the last step, you'll
+need to install them:
+
+```bash
+fiftyone plugins download https://github.com/jacobmarks/fiftyone-albumentations-plugin
+
+fiftyone plugins download https://github.com/voxel51/fiftyone-plugins --plugin-names @voxel51/annotation
 ```
 
-You can also set the following environment variables:
+You should also set up the following environment variables:
 
 ```bash
 export DATA_UPLOAD_MAX_NUMBER_FILES=1000000
 export WANDB_DISABLED=True
 ```
 
-And to use the Albumentations and annotation plugins, you'll need to install
-them:
+### Connect to your deployed ZenML instance
 
 ```bash
-fiftyone plugins download https://github.com/jacobmarks/fiftyone-albumentations-plugin
+zenml connect --url <INSERT_ZENML_URL_HERE>
+```
 
-fiftyone plugins download https://github.com/voxel51/fiftyone-plugins --plugin-names @voxel51/annotation
+## Cloud Provider
+
+We will use GCP in the commands listed below, but it will work for other cloud
+providers.
+
+### Follow our guide to set up your credential for GCP
+
+[Set up a GCP service
+connector](https://docs.zenml.io/stacks-and-components/auth-management/gcp-service-connector)
+
+### Set up a bucket to persist your training data
+
+### Set up a bucket to use as artifact store within ZenML
+
+[Learn how to set up a GCP artifact store stack component within zenml
+here](https://docs.zenml.io/stacks-and-components/component-guide/artifact-stores)
+### Set up vertex for pipeline orchestration
+
+[Learn how to set up a Vertex orchestrator stack component within zenml
+here](https://docs.zenml.io/stacks-and-components/component-guide/orchestrators/vertex)
+### For training on accelerators like GPUs/TPUs set up Vertex
+
+[Learn how to set up a Vertex step operator stack component within zenml
+here](https://docs.zenml.io/stacks-and-components/component-guide/step-operators/vertex)
+### Set up Container Registry
+
+[Learn how to set up a google cloud container registry component within zenml
+here](https://docs.zenml.io/stacks-and-components/component-guide/container-registries/gcp)
+
+## Label Studio
+
+### [Start Label Studio locally](https://labelstud.io/guide/start)
+### [Follow these ZenML instructions to set up Label Studio as a stack component](https://docs.zenml.io/stacks-and-components/component-guide/annotators/label-studio)
+### Create a project within Label Studio and name it `ship_detection_gcp`
+### [Set up Label Studio to use external storage](https://labelstud.io/guide/storage) 
+use the first bucket that you created to data persistence
+
+## ZenML Stacks
+
+### Local Stack
+
+The local stack should use the `default` orchestrator, a gcp remote artifact
+store that we'll call `gcp_artifact_store` here and a local label-studio
+annotator that we'll refer to as `label_studio_local`.
+
+```bash
+# Make sure to replace the names with the names that you choose for your setup
+zenml stack register <local_stack> -o default -a <gcp_artifact_store> -an <label_studio_local>
+```
+
+### Remote Stack
+
+The remote stack should use the `vertex_orchestrator` , a `gcp_artifact_store`,
+a `gcp_container_registry` and a `vertex_step_operator`.
+
+
+```bash
+# Make sure to replace the names with the names that you choose for your setup
+zenml stack register <gcp_stack> -o <vertex_orchestrator> -a <gcp_artifact_store> -c <gcp_container_registry> -s <vertex_step_operator>
 ```
+
+The project consists of the following pipelines:
+
+## data_ingestion_pipeline
+
+This pipeline downloads the [Ship Detection
+dataset](https://huggingface.co/datasets/datadrivenscience/ship-detection). This
+dataset contains some truly huge images with a few hundred million pixels. In
+order to make these useable, we break down all source images into manageable
+tiles with a maximum height/width of 1000 pixels. After this preprocessing is
+done, the images are uploaded into a cloud bucket and the ground truth
+annotations are uploaded to a local Label Studio instance.
+
+### Configure this pipeline
+The configuration file for this pipeline lives at `./configs/ingest_data.yaml`.
+Make sure in particular to change `data_source` to point at the GCP bucket which
+is dedicated to be the storage location for the data. Also make sure to adjust
+the `ls_project_id` to correspond to the id of your project within Label Studio.
+
+### Run this pipeline
+
+Label Studio should be up and running for the whole duration of this pipeline
+run.
+
+```bash
+zenml stack set <local_stack>
+python run.py --ingest
+```
+
+## data_export_pipeline
+
+This pipeline exports the annotations from Label Studio and loads it into the
+ZenML artifact store to make them accessible to downstream pipelines.
+
+### Configure this pipeline
+The configuration file for this pipeline lives at `./configs/data_export.yaml`.
+Make sure in particular to change `dataset_name` to reflect the name of the
+dataset within Label Studio.
+
+### Run this pipeline
+
+Label Studio should be up and running for the whole duration of this pipeline
+run.
+
+```bash
+zenml stack set <local_stack>
+python run.py --export
+```
+
+## training_pipeline
+
+This pipeline trains a YOLOv8 object detection model. 
+
+### Configure this pipeline
+You can choose to run this pipeline locally or on the cloud. These two options
+use two different configuration files. For local training:
+`./configs/training_pipeline.yaml`. For training on the cloud:
+`./configs/training_pipeline_remote_gpu.yaml`. Make sure `data_source` points to
+your cloud storage bucket.
+
+### Run this pipeline
+
+This pipeline requires the associated model (see the model section of the
+configuration yaml file) to have a version in the `staging` stage. In order to
+promote the model produced by the latest run of the `data_export_pipeline`, run
+the following code:
+
+```bash
+zenml model version update <MODEL_NAME> latest -s staging 
+```
+
+For local training run the following code:
+
+```bash
+zenml stack set <local_stack>
+python run.py --training --local
+```
+
+For remote training run the following code:
+
+```bash
+zenml stack set <remote_stack>
+python run.py --training
+```
+
+## inference_pipeline
+
+This pipeline performs inference on the object detection model.
+
+### Configure this pipeline
+You can configure this pipeline at the following yaml file
+`./configs/inference_pipeline.yaml`.  Make sure `data_source` points to your
+cloud storage bucket that contains images that you want to perform batch
+inference on
+
+### Run this pipeline
+
+This pipeline requires the associated model (see the model section of the
+configuration yaml file) to have a version in the `production` stage. In order
+to promote the model produced by the latest run of the `training_pipeline`, run
+the following code:
+
+```bash
+zenml model version update <MODEL_NAME> staging -s production 
+```
+
+```bash
+zenml stack set <local_stack>
+python run.py --inference
+```
+
+
+## Analyze and Curate your data through FiftyOne
+
+Now to close the loop, we will import the predictions into FiftyOne. All you'll
+need to do is run:
+
+```bash
+python run.py --fiftyone
+```
+
+Within FiftyOne, you can now analyze all the predictions and export them back to
+Label Studio for finetuned labeling and retraining.
diff --git a/end-to-end-computer-vision/_assets/diagram.png b/end-to-end-computer-vision/_assets/diagram.png
new file mode 100644
index 00000000..d02f5213
Binary files /dev/null and b/end-to-end-computer-vision/_assets/diagram.png differ
diff --git a/end-to-end-computer-vision/bus.jpg b/end-to-end-computer-vision/bus.jpg
new file mode 100644
index 00000000..b43e3111
Binary files /dev/null and b/end-to-end-computer-vision/bus.jpg differ
diff --git a/end-to-end-computer-vision/configs/data_export_alexej.yaml b/end-to-end-computer-vision/configs/data_export.yaml
similarity index 71%
rename from end-to-end-computer-vision/configs/data_export_alexej.yaml
rename to end-to-end-computer-vision/configs/data_export.yaml
index 22f2464e..62d3157c 100644
--- a/end-to-end-computer-vision/configs/data_export_alexej.yaml
+++ b/end-to-end-computer-vision/configs/data_export.yaml
@@ -2,7 +2,7 @@ enable_cache: False
 
 # pipeline configuration
 parameters:
-  dataset_name: "ship_detection_gcp" # "ship_detection"
+  dataset_name: "ship_detection_gcp"  # This is the name of the dataset in label studio
 
 # Configuration of the Model Control Plane
 model:
diff --git a/end-to-end-computer-vision/configs/data_export_alex.yaml b/end-to-end-computer-vision/configs/data_export_alex.yaml
deleted file mode 100644
index c73b4d80..00000000
--- a/end-to-end-computer-vision/configs/data_export_alex.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-enable_cache: False
-
-# pipeline configuration
-parameters:
-  dataset_name: "FiftyOne_ships_wuxqa5"
-
-# configuration of the Model Control Plane
-model:
-  name: ShipDetector
-  license: Apache 2.0
-  description: Object Detection Model.
-  tags: ["object detection"]
diff --git a/end-to-end-computer-vision/configs/fiftyone.yaml b/end-to-end-computer-vision/configs/fiftyone.yaml
deleted file mode 100644
index e94f5e98..00000000
--- a/end-to-end-computer-vision/configs/fiftyone.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-enable_cache: False
-
-settings:
-  docker:
-    apt_packages:
-      - ffmpeg
-      - libsm6
-      - libxext6
-    required_integrations:
-      - gcp
-      - github
-    requirements:
-      - ultralytics
-      - fiftyone
-
-# configuration of the Model Control Plane
-# model:
-#   name: ShipDetector
-#   license: Apache 2.0
-#   description: Object Detection Model.
-#   tags: ["object detection"]
-#   version: production
diff --git a/end-to-end-computer-vision/configs/cloud_inference.yaml b/end-to-end-computer-vision/configs/inference_pipeline.yaml
similarity index 81%
rename from end-to-end-computer-vision/configs/cloud_inference.yaml
rename to end-to-end-computer-vision/configs/inference_pipeline.yaml
index 650dc901..f6dd22d8 100644
--- a/end-to-end-computer-vision/configs/cloud_inference.yaml
+++ b/end-to-end-computer-vision/configs/inference_pipeline.yaml
@@ -19,7 +19,7 @@ steps:
     enable_cache: False
     enable_step_logs: False
     parameters:
-      inference_data_source: "gs://zenml-internal-artifact-store/inference_cv_webinar"
+      inference_data_source:   # Insert your bucket path here where the inference images live e.g. "gs://foo/bar"
 
 # configuration of the Model Control Plane
 model:
diff --git a/end-to-end-computer-vision/configs/ingest_data.yaml b/end-to-end-computer-vision/configs/ingest_data.yaml
index 4f294ab5..df8c48df 100644
--- a/end-to-end-computer-vision/configs/ingest_data.yaml
+++ b/end-to-end-computer-vision/configs/ingest_data.yaml
@@ -1,10 +1,11 @@
 
 steps:
-  download_dataset_from_hf:
+  download_and_tile_dataset_from_hf:
     enable_cache: True
+    enable_step_logs: False
     parameters:
       dataset: "datadrivenscience/ship-detection"
-      data_source: "gs://zenml-internal-artifact-store/label_studio_cv_webinar"
+      data_source: # Insert your bucket path here where the training images will live e.g. "gs://foo/bar"
   upload_labels_to_label_studio:
     enable_cache: False
     parameters:
diff --git a/end-to-end-computer-vision/configs/training.yaml b/end-to-end-computer-vision/configs/training_pipeline.yaml
similarity index 76%
rename from end-to-end-computer-vision/configs/training.yaml
rename to end-to-end-computer-vision/configs/training_pipeline.yaml
index 6b8d470e..f49ba554 100644
--- a/end-to-end-computer-vision/configs/training.yaml
+++ b/end-to-end-computer-vision/configs/training_pipeline.yaml
@@ -7,10 +7,10 @@ steps:
   train_model:
     enable_cache: False
     parameters:
-      data_source: "gs://zenml-internal-artifact-store/label_studio_cv_webinar"
-      batch_size: 16
-      imgsz: 640
-      epochs: 30
+      data_source: # Insert your bucket path here where the training images lives e.g. "gs://foo/bar"
+      batch_size: 8
+      imgsz: 720
+      epochs: 1
 
 settings:
   docker:
diff --git a/end-to-end-computer-vision/configs/training_gpu.yaml b/end-to-end-computer-vision/configs/training_pipeline_remote_gpu.yaml
similarity index 78%
rename from end-to-end-computer-vision/configs/training_gpu.yaml
rename to end-to-end-computer-vision/configs/training_pipeline_remote_gpu.yaml
index 5b5a3e4b..1ee643ab 100644
--- a/end-to-end-computer-vision/configs/training_gpu.yaml
+++ b/end-to-end-computer-vision/configs/training_pipeline_remote_gpu.yaml
@@ -21,14 +21,14 @@ steps:
     step_operator: gcp_a100
     enable_step_logs: False
     parameters:
-      data_source: "gs://zenml-internal-artifact-store/label_studio_cv_webinar"
+      data_source:  # Insert your bucket path here where the training images lives e.g. "gs://foo/bar"
       batch_size: 8
-      imgsz: 480
-      epochs: 5000
+      imgsz: 720
+      epochs: 50000
       is_quad_gpu_env: True
     settings:
       step_operator.vertex:
-        accelerator_type: NVIDIA_TESLA_T4 # NVIDIA_TESLA_A100 # see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType
+        accelerator_type: NVIDIA_TESLA_T4 # see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType
         accelerator_count: 4
         disk_size_gb: 25
       docker:
diff --git a/end-to-end-computer-vision/data/.gitignore b/end-to-end-computer-vision/data/.gitignore
new file mode 100644
index 00000000..86d0cb27
--- /dev/null
+++ b/end-to-end-computer-vision/data/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
diff --git a/end-to-end-computer-vision/data/README.md b/end-to-end-computer-vision/data/README.md
new file mode 100644
index 00000000..b6be7050
--- /dev/null
+++ b/end-to-end-computer-vision/data/README.md
@@ -0,0 +1 @@
+This directory serves as a place to store and access temporary datafiles.
\ No newline at end of file
diff --git a/end-to-end-computer-vision/materializers/label_studio_export_materializer.py b/end-to-end-computer-vision/materializers/label_studio_export_materializer.py
new file mode 100644
index 00000000..356edc2a
--- /dev/null
+++ b/end-to-end-computer-vision/materializers/label_studio_export_materializer.py
@@ -0,0 +1,99 @@
+#  Copyright (c) ZenML GmbH 2022. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the PyTorch DataLoader materializer."""
+
+import os
+import tempfile
+from typing import TYPE_CHECKING, Any, ClassVar, Type
+
+from zenml.io import fileio
+from zenml.materializers.base_materializer import BaseMaterializer
+
+DEFAULT_FILENAME = "annotations.zip"
+
+if TYPE_CHECKING:
+    from label_studio_sdk import Project
+
+
+class LabelStudioAnnotationExport:
+
+    def __init__(self, dataset: "Project" = None, filepath: str = None):
+        """
+        Initialize LabelStudioAnnotationExport object with optional parameters.
+
+        Parameters:
+        dataset: Label-studio dataset object.
+        filepath: A string representing the file path. Defaults to None.
+        """
+        self.dataset = dataset
+        self.filepath = filepath
+
+    def download_annotations(self):
+        """Downloads the annotations from label-studio to the local fs."""
+        tmpfile_ = tempfile.NamedTemporaryFile(dir="data", delete=False)
+        tmpdirname = os.path.basename(tmpfile_.name)
+        self.filepath = os.path.join(tmpdirname, DEFAULT_FILENAME)
+        self.dataset.export_tasks(
+            export_type="YOLO",
+            export_location=self.filepath,
+            download_resources=False,
+        )
+
+
+class LabelStudioAnnotationMaterializer(BaseMaterializer):
+    """Base class for Label Studio annotation models."""
+
+    FILENAME: ClassVar[str] = DEFAULT_FILENAME
+    SKIP_REGISTRATION: ClassVar[bool] = True
+    ASSOCIATED_TYPES = (LabelStudioAnnotationExport,)
+
+    def load(self, data_type: Type[Any]) -> LabelStudioAnnotationExport:
+        """Loads the serialized JSON file containing the annotations.
+
+        Args:
+            data_type: A ultralytics YOLO type.
+
+        Returns:
+            A ultralytics YOLO object.
+        """
+        # Recreate the filepath of the file
+        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
+
+        # Create a temporary file
+        tmpfile_ = tempfile.NamedTemporaryFile(
+            dir="data", delete=False, suffix=".zip"
+        )
+
+        # Copy from artifact store to temporary file
+        fileio.copy(filepath, tmpfile_.name, overwrite=True)
+
+        # Re-instantiate the LabelStudioAnnotationExport model
+        dataset = LabelStudioAnnotationExport(filepath=tmpfile_.name)
+
+        return dataset
+
+    def save(self, dataset: LabelStudioAnnotationExport) -> None:
+        """Creates a JSON serialization for a label studio YOLO dataset model.
+
+        Args:
+            dataset: A label studio YOLO dataset model.
+        """
+        # Downloads the annotations into the local fs
+        dataset.download_annotations()
+
+        # create the destination path for the exported annotations
+        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
+
+        # copies the files from local fs into the artifact store
+        fileio.copy(dataset.filepath, filepath)
diff --git a/end-to-end-computer-vision/materializers/ultralytics_materializer.py b/end-to-end-computer-vision/materializers/ultralytics_materializer.py
new file mode 100644
index 00000000..fb4c47e4
--- /dev/null
+++ b/end-to-end-computer-vision/materializers/ultralytics_materializer.py
@@ -0,0 +1,69 @@
+#  Copyright (c) ZenML GmbH 2022. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the PyTorch DataLoader materializer."""
+
+import os
+import tempfile
+from typing import Any, ClassVar, Type
+
+from ultralytics import YOLO
+from zenml.integrations.pytorch.materializers.pytorch_module_materializer import (
+    PyTorchModuleMaterializer,
+)
+from zenml.io import fileio
+
+DEFAULT_FILENAME = "obj.pt"
+
+
+class UltralyticsMaterializer(PyTorchModuleMaterializer):
+    """Base class for ultralytics YOLO models."""
+
+    FILENAME: ClassVar[str] = DEFAULT_FILENAME
+    SKIP_REGISTRATION: ClassVar[bool] = True
+    ASSOCIATED_TYPES = (YOLO,)
+
+    def load(self, data_type: Type[Any]) -> YOLO:
+        """Reads a ultralytics YOLO model from a serialized JSON file.
+
+        Args:
+            data_type: A ultralytics YOLO type.
+
+        Returns:
+            A ultralytics YOLO object.
+        """
+        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
+
+        # Create a temporary folder
+        with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
+            temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
+
+            # Copy from artifact store to temporary file
+            fileio.copy(filepath, temp_file)
+            model = YOLO(temp_file)
+
+            return model
+
+    def save(self, model: YOLO) -> None:
+        """Creates a JSON serialization for a ultralytics YOLO model.
+
+        Args:
+            model: A ultralytics YOLO model.
+        """
+        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
+
+        # Make a temporary phantom artifact
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as f:
+            model.save(f.name)
+            # Copy it into artifact store
+            fileio.copy(f.name, filepath)
diff --git a/end-to-end-computer-vision/pipelines/__init__.py b/end-to-end-computer-vision/pipelines/__init__.py
index 4dc295a9..6ee60a58 100644
--- a/end-to-end-computer-vision/pipelines/__init__.py
+++ b/end-to-end-computer-vision/pipelines/__init__.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
-from .data_export import export_for_training
-
-# from .training import training
+from .data_export import data_export_pipeline
+from .data_ingestion import data_ingestion_pipeline
+from .inference import inference_pipeline
+from .training import training_pipeline
diff --git a/end-to-end-computer-vision/pipelines/data_export.py b/end-to-end-computer-vision/pipelines/data_export.py
index 344eadb9..3adf50c1 100644
--- a/end-to-end-computer-vision/pipelines/data_export.py
+++ b/end-to-end-computer-vision/pipelines/data_export.py
@@ -23,7 +23,7 @@
 
 
 @pipeline
-def export_for_training(dataset_name: str = "polution"):
+def data_export_pipeline(dataset_name: str = "polution"):
     """Loads data from Label studio.
 
     Args:
diff --git a/end-to-end-computer-vision/pipelines/data_ingestion.py b/end-to-end-computer-vision/pipelines/data_ingestion.py
index e270179d..b1df9c6b 100644
--- a/end-to-end-computer-vision/pipelines/data_ingestion.py
+++ b/end-to-end-computer-vision/pipelines/data_ingestion.py
@@ -17,13 +17,13 @@
 from zenml import pipeline
 from zenml.logger import get_logger
 
-from steps.download_from_hf import download_dataset_from_hf
+from steps.process_hf_dataset import process_hf_dataset
 from steps.upload_to_label_studio import upload_labels_to_label_studio
 
 logger = get_logger(__name__)
 
 
 @pipeline
-def data_ingestion():
-    labels_dict = download_dataset_from_hf()
+def data_ingestion_pipeline():
+    labels_dict = process_hf_dataset()
     upload_labels_to_label_studio(labels_dict)
diff --git a/end-to-end-computer-vision/pipelines/inference.py b/end-to-end-computer-vision/pipelines/inference.py
index 77db9ef1..8191f399 100644
--- a/end-to-end-computer-vision/pipelines/inference.py
+++ b/end-to-end-computer-vision/pipelines/inference.py
@@ -23,10 +23,6 @@
 
 
 @pipeline
-def inference():
+def inference_pipeline():
     """Uses FiftyOne for inference on a dataset."""
     create_fiftyone_dataset()
-
-
-if __name__ == "__main__":
-    inference.with_options(config_path="configs/cloud_inference.yaml")()
diff --git a/end-to-end-computer-vision/pipelines/training.py b/end-to-end-computer-vision/pipelines/training.py
index 3e4edf41..eb31416d 100644
--- a/end-to-end-computer-vision/pipelines/training.py
+++ b/end-to-end-computer-vision/pipelines/training.py
@@ -25,7 +25,7 @@
 
 
 @pipeline
-def training(model_checkpoint: str = "yolov8l.pt"):
+def training_pipeline(model_checkpoint: str = "yolov8l.pt"):
     """Trains a model on a dataset.
 
     Args:
@@ -41,7 +41,3 @@ def training(model_checkpoint: str = "yolov8l.pt"):
         model=model,
         dataset=dataset,
     )
-
-    # promote_model(metrics)
-
-    # predict_image(trained_model)
diff --git a/end-to-end-computer-vision/requirements.txt b/end-to-end-computer-vision/requirements.txt
index 6de734b1..2bcee708 100644
--- a/end-to-end-computer-vision/requirements.txt
+++ b/end-to-end-computer-vision/requirements.txt
@@ -1,14 +1,12 @@
-zenml[server]>=0.55.2
+zenml[server]>=0.55.3
 notebook
 scikit-learn<1.3
 pyarrow
-wandb
 seaborn
 xgboost
 ultralytics
 torch
-huggingface_hub>=0.20.0 # needed for fiftyone imports of HF datasets
-# fiftyone @ git+https://github.com/voxel51/fiftyone.git@develop # fiftyone v0.24.0
+huggingface_hub>=0.20.0
 fiftyone
 datasets
 albumentations
diff --git a/end-to-end-computer-vision/run.py b/end-to-end-computer-vision/run.py
index c5af3e28..b4035f30 100644
--- a/end-to-end-computer-vision/run.py
+++ b/end-to-end-computer-vision/run.py
@@ -17,15 +17,14 @@
 from uuid import UUID
 
 import click
-from zenml import Model
 from zenml.client import Client
 from zenml.enums import ModelStages
 from zenml.logger import get_logger
 
-from pipelines.data_export import export_for_training
-from pipelines.data_ingestion import data_ingestion
-from pipelines.inference import inference
-from pipelines.training import training
+from pipelines.data_export import data_export_pipeline
+from pipelines.data_ingestion import data_ingestion_pipeline
+from pipelines.inference import inference_pipeline
+from pipelines.training import training_pipeline
 from utils.constants import PREDICTIONS_DATASET_ARTIFACT_NAME, ZENML_MODEL_NAME
 
 logger = get_logger(__name__)
@@ -37,7 +36,7 @@
 @click.option(
     "--ingest",
     "-ig",
-    "ingest_data_pipeline",
+    "ingest_data",
     is_flag=True,
     default=False,
     help="Whether to run the data ingestion pipeline, that takes the dataset"
@@ -55,7 +54,7 @@
 @click.option(
     "--training",
     "-t",
-    "training_pipeline",
+    "train",
     is_flag=True,
     default=False,
     help="Whether to run the pipeline that trains the model.",
@@ -63,7 +62,7 @@
 @click.option(
     "--inference",
     "-i",
-    "inference_pipeline",
+    "batch_inference",
     is_flag=True,
     default=False,
     help="Whether to run the pipeline that performs inference.",
@@ -74,15 +73,7 @@
     "fiftyone",
     is_flag=True,
     default=False,
-    help="Whether to launch the FiftyOne app pipeline.",
-)
-@click.option(
-    "--stack",
-    "-s",
-    "stack",
-    required=False,
-    type=click.Choice(["alexej", "hamza", "alex"]),
-    help="The stack to use for the pipeline.",
+    help="Whether to launch the FiftyOne app.",
 )
 @click.option(
     "--local",
@@ -90,64 +81,104 @@
     "train_local",
     is_flag=True,
     default=False,
-    help="Whether to train local.",
+    help="Whether to train local or an a remote orchestrator/ step operator.",
 )
 def main(
-    ingest_data_pipeline: bool = False,
+    ingest_data: bool = False,
     export_pipeline: bool = False,
-    training_pipeline: bool = False,
-    inference_pipeline: bool = False,
+    train: bool = False,
+    batch_inference: bool = False,
     fiftyone: bool = False,
-    stack: UUID = "alexej",
     train_local: bool = False,
 ):
-    # TODO: remove all this :)
-    if stack == "hamza":
-        stack_id = UUID("cca5eaf7-0309-413d-89ff-1cd371b7d27c")
-    elif stack == "alex":
-        stack_id = UUID("fcf840ac-addd-4de3-a3e4-a1015f7bb96c")
-    else:
-        stack_id = UUID("7cda3cec-6744-48dc-8bdc-f102242a26d2")
-
     client = Client()
 
-    if ingest_data_pipeline:
-        client.activate_stack(stack_id)
-        data_ingestion.with_options(config_path="configs/ingest_data.yaml")()
+    if ingest_data:
+        if not client.active_stack.orchestrator.config.is_local:
+            raise RuntimeError(
+                "The implementation of this pipeline "
+                "requires that you are running on a local "
+                "machine with data being persisted in the local "
+                "filesystem across multiple steps. Please "
+                "switch to a stack that contains a local "
+                "orchestrator and a local label-studio "
+                "annotator. See the README for more information "
+                "on this setup."
+            )
+
+        data_ingestion_pipeline.with_options(
+            config_path="configs/ingest_data.yaml"
+        )()
 
     if export_pipeline:
-        client.activate_stack(stack_id)
+        if not client.active_stack.orchestrator.config.is_local:
+            raise RuntimeError(
+                "The implementation of this pipeline "
+                "requires that you are running on a local "
+                "machine with a running instance of label-studio "
+                "configured in the stack as annotator."
+                " Please switch to a stack that contains a local "
+                "orchestrator and a local label-studio "
+                "annotator. See the README for more information "
+                "on this setup."
+            )
 
         # Export data from label studio
-        export_for_training.with_options(
-            config_path="configs/data_export_alexej.yaml"
+        data_export_pipeline.with_options(
+            config_path="configs/data_export.yaml"
         )()
 
-        # Promote Model to staging
-        latest_model = Model(name=ZENML_MODEL_NAME, version=ModelStages.LATEST)
-        latest_model.set_stage(stage=ModelStages.STAGING, force=True)
-
-    if training_pipeline and train_local:
-        client.activate_stack(stack_id)
+    if train:
+        try:
+            client.get_model_version(
+                model_name_or_id=ZENML_MODEL_NAME,
+                model_version_name_or_number_or_id=ModelStages.STAGING,
+            )
+        except KeyError:
+            raise RuntimeError(
+                "This pipeline requires that there is a version of its "
+                "associated model in the `STAGING` stage. Make sure you run "
+                "the `data_export_pipeline` at least once to create the Model "
+                "along with a version of this model. After this you can "
+                "promote the version of your choice, either through the "
+                "frontend or with the following command: "
+                f"`zenml model version update {ZENML_MODEL_NAME} latest "
+                f"-s staging`"
+            )
+
+        if train_local:
+            config_path = "configs/training_pipeline.yaml"
+        else:
+            config_path = "configs/training_pipeline_remote_gpu.yaml"
 
         # Train model on data
-        training.with_options(config_path="configs/training.yaml")()
-
-    if training_pipeline and not train_local:
-        client.activate_stack(REMOTE_STACK_ID)
-
-        # Train model on data
-        training.with_options(config_path="configs/training_gpu.yaml")()
-
-    if inference_pipeline:
-        client.activate_stack(stack_id)  # REMOTE_STACK_ID)
-
-        inference.with_options(config_path="configs/cloud_inference.yaml")()
+        training_pipeline.with_options(config_path=config_path)()
+
+    if batch_inference:
+        try:
+            client.get_model_version(
+                model_name_or_id=ZENML_MODEL_NAME,
+                model_version_name_or_number_or_id=ModelStages.PRODUCTION,
+            )
+        except KeyError:
+            raise RuntimeError(
+                "This pipeline requires that there is a version of its "
+                "associated model in the `Production` stage. Make sure you run "
+                "the `data_export_pipeline` at least once to create the Model "
+                "along with a version of this model. After this you can "
+                "promote the version of your choice, either through the "
+                "frontend or with the following command: "
+                f"`zenml model version update {ZENML_MODEL_NAME} staging "
+                f"-s production`"
+            )
+
+        inference_pipeline.with_options(
+            config_path="configs/inference_pipeline.yaml"
+        )()
 
     if fiftyone:
         import fiftyone as fo
 
-        client.activate_stack(stack_id)
         artifact = Client().get_artifact_version(
             name_id_or_prefix=PREDICTIONS_DATASET_ARTIFACT_NAME
         )
diff --git a/end-to-end-computer-vision/steps/export_label_studio.py b/end-to-end-computer-vision/steps/export_label_studio.py
index 12e5cffb..0433ccf4 100644
--- a/end-to-end-computer-vision/steps/export_label_studio.py
+++ b/end-to-end-computer-vision/steps/export_label_studio.py
@@ -16,13 +16,13 @@
 #
 from typing import Annotated, List, Tuple
 
-from zenml import get_step_context, step
+from zenml import log_artifact_metadata, step
 from zenml.client import Client
 from zenml.logger import get_logger
 
-from materializers.label_studio_yolo_dataset_materializer import (
-    LabelStudioYOLODataset,
-    LabelStudioYOLODatasetMaterializer,
+from materializers.label_studio_export_materializer import (
+    LabelStudioAnnotationExport,
+    LabelStudioAnnotationMaterializer,
 )
 from utils.constants import LABELED_DATASET_NAME
 
@@ -31,13 +31,13 @@
 
 @step(
     output_materializers={
-        LABELED_DATASET_NAME: LabelStudioYOLODatasetMaterializer
+        LABELED_DATASET_NAME: LabelStudioAnnotationMaterializer
     }
 )
 def load_data_from_label_studio(
     dataset_name: str,
 ) -> Tuple[
-    Annotated[LabelStudioYOLODataset, LABELED_DATASET_NAME],
+    Annotated[LabelStudioAnnotationExport, LABELED_DATASET_NAME],
     Annotated[List[int], "new_ids"],
 ]:
     """Loads data from Label Studio.
@@ -61,37 +61,19 @@ def load_data_from_label_studio(
     if annotator and annotator._connection_available():
         try:
             dataset = annotator.get_dataset(dataset_name=dataset_name)
-            ls_dataset = LabelStudioYOLODataset()
+            ls_dataset = LabelStudioAnnotationExport()
             ls_dataset.dataset = dataset
 
-            c = Client()
-            step_context = get_step_context()
-            cur_pipeline_name = step_context.pipeline.name
-            cur_step_name = step_context.step_name
-
-            try:
-                last_run = c.get_pipeline(
-                    cur_pipeline_name
-                ).last_successful_run
-                last_task_ids = (
-                    last_run.steps[cur_step_name].outputs["new_ids"].load()
-                )
-            except (RuntimeError, KeyError):
-                last_task_ids = []
-
             current_labeled_task_ids = dataset.get_labeled_tasks_ids()
-            logger.info(f"{len(current_labeled_task_ids)} total labels found.")
 
-            new_task_ids = list(
-                set(current_labeled_task_ids) - set(last_task_ids)
+            ls_dataset.task_ids = current_labeled_task_ids
+            log_artifact_metadata(
+                metadata={
+                    "num_images": len(current_labeled_task_ids),
+                },
+                artifact_name=LABELED_DATASET_NAME,
             )
-            logger.info(
-                f"{len(new_task_ids)} new labels are being beamed "
-                f"straight to you."
-            )
-
-            ls_dataset.task_ids = new_task_ids
-            return ls_dataset, new_task_ids
+            return ls_dataset, current_labeled_task_ids
         except:
             raise ValueError(
                 f"Dataset {dataset_name} not found in Label Studio."
diff --git a/end-to-end-computer-vision/steps/fiftyone_inference.py b/end-to-end-computer-vision/steps/fiftyone_inference.py
index 8c33f7eb..5a4584a7 100644
--- a/end-to-end-computer-vision/steps/fiftyone_inference.py
+++ b/end-to-end-computer-vision/steps/fiftyone_inference.py
@@ -33,7 +33,7 @@
 
 os.environ["YOLO_VERBOSE"] = "False"
 
-INFERENCE_BATCH = 20
+INFERENCE_BATCH = 5
 
 
 @step
diff --git a/end-to-end-computer-vision/steps/process_hf_dataset.py b/end-to-end-computer-vision/steps/process_hf_dataset.py
new file mode 100644
index 00000000..e1847c3d
--- /dev/null
+++ b/end-to-end-computer-vision/steps/process_hf_dataset.py
@@ -0,0 +1,97 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+from typing import Any, Dict
+
+from datasets import load_dataset
+from PIL import Image
+from zenml import step
+from zenml.logger import get_logger
+
+from utils.dataset_utils import split_image_into_tiles
+
+Image.MAX_IMAGE_PIXELS = None
+
+
+logger = get_logger(__name__)
+
+
+@step
+def process_hf_dataset(
+    dataset: str, data_source: str, max_tile_size: int = 1000
+) -> Dict[str, Any]:
+    """Downloads a Hugging Face dataset and does some processing.
+
+    Converts the labels into the label_studio format.
+    Also uploads the images to the datasource path.
+
+    The return of this step maps the path of images in the local fs to the
+    annotations in a format that label-studio can import.
+    {'data/image_116.png': [
+        {'original_width': 907,
+        'original_height': 937,
+        'image_rotation': 0,
+        'value': {
+            'x': 66.26240352811466,
+            'y': 77.5880469583778,
+            'width': 5.071664829106946,
+            'height': 3.4151547491995733,
+            'rotation': 0,
+            'rectanglelabels': ['ship']
+        },
+        'from_name': 'label',
+        'to_name': 'image',
+        'type': 'rectanglelabels',
+        'origin': 'manual'}, ...
+    ]
+
+    Args:
+        dataset: Name of the hf dataset
+        data_source: Location of the image files
+        max_tile_size
+
+    Returns:
+        Dictionary mapping filename to bboxes. You can find an example of this
+            dict in the docstring above
+    """
+    # Load dataset from huggingface
+    dataset = load_dataset(dataset)
+    data = dataset["train"]
+
+    # Create local directory to initially copy data to
+    local_output_dir = "data"
+    if not os.path.exists(local_output_dir):
+        os.mkdir(local_output_dir)
+
+    # Dictionary that maps label information onto the corresponding image
+    all_images = {}
+
+    # iterate through the dataset
+    for i, d in enumerate(data):
+        img_name = f"image_{i}"
+
+        # in case images are very large, we split them up into 4 smaller tiles
+        split_image_into_tiles(
+            d=d,
+            img_name=img_name,
+            output_dir=local_output_dir,
+            all_images=all_images,
+            data_source=data_source,
+            max_tile_size=max_tile_size,
+        )
+
+    return all_images
diff --git a/end-to-end-computer-vision/steps/train_model.py b/end-to-end-computer-vision/steps/train_model.py
index 4f739d04..67be2774 100644
--- a/end-to-end-computer-vision/steps/train_model.py
+++ b/end-to-end-computer-vision/steps/train_model.py
@@ -20,10 +20,10 @@
 from zenml import ArtifactConfig, log_artifact_metadata, step
 from zenml.logger import get_logger
 
-from materializers.label_studio_yolo_dataset_materializer import (
-    LabelStudioYOLODataset,
+from materializers.label_studio_export_materializer import (
+    LabelStudioAnnotationExport,
 )
-from materializers.yolo_materializer import UltralyticsMaterializer
+from materializers.ultralytics_materializer import UltralyticsMaterializer
 from utils.dataset_utils import load_and_split_data
 
 logger = get_logger(__name__)
@@ -35,7 +35,7 @@
 )
 def train_model(
     model: YOLO,
-    dataset: LabelStudioYOLODataset,
+    dataset: LabelStudioAnnotationExport,
     data_source: str,
     epochs: int = 100,
     batch_size: int = 16,
diff --git a/end-to-end-computer-vision/steps/upload_to_label_studio.py b/end-to-end-computer-vision/steps/upload_to_label_studio.py
index ecb52665..ffbecad5 100644
--- a/end-to-end-computer-vision/steps/upload_to_label_studio.py
+++ b/end-to-end-computer-vision/steps/upload_to_label_studio.py
@@ -18,6 +18,9 @@
 
 from zenml import step
 from zenml.client import Client
+from zenml.logger import get_logger
+
+logger = get_logger(__name__)
 
 
 @step
@@ -54,7 +57,9 @@ def upload_labels_to_label_studio(
 
     for task in tasks:
         filename = task["storage_filename"].split("/")[-1]
-
-        project.create_annotation(
-            task["id"], result=labels_dict[filename], ground_truth=True
-        )
+        try:
+            project.create_annotation(
+                task["id"], result=labels_dict[filename], ground_truth=True
+            )
+        except KeyError:
+            logger.info(f"No labels found for {filename}. Skipping ...")
diff --git a/end-to-end-computer-vision/utils/dataset_utils.py b/end-to-end-computer-vision/utils/dataset_utils.py
index 1da26e09..20e38a78 100644
--- a/end-to-end-computer-vision/utils/dataset_utils.py
+++ b/end-to-end-computer-vision/utils/dataset_utils.py
@@ -16,15 +16,20 @@
 #
 import os
 import tempfile
+from typing import Any, Dict, List, Tuple
 
+import numpy as np
 from PIL import Image
 from zenml.io import fileio
+from zenml.logger import get_logger
 
-from materializers.label_studio_yolo_dataset_materializer import (
-    LabelStudioYOLODataset,
+from materializers.label_studio_export_materializer import (
+    LabelStudioAnnotationExport,
 )
 from utils.split_data import generate_yaml, split_dataset, unzip_dataset
 
+logger = get_logger(__name__)
+
 
 def load_images_from_folder(folder):
     images = []
@@ -41,7 +46,7 @@ def load_images_from_folder(folder):
 
 
 def load_and_split_data(
-    dataset: LabelStudioYOLODataset, data_source: str
+    dataset: LabelStudioAnnotationExport, data_source: str
 ) -> str:
     """Load data from dataset into file system and split into train/val.
 
@@ -70,11 +75,189 @@ def load_and_split_data(
     images_folder = os.path.join(extract_location, "images")
     os.makedirs(images_folder, exist_ok=True)
 
-    for filename in filenames:
+    total_images = len(filenames)
+    logger.info(f"Downloading images from {data_source}")
+    for index, filename in enumerate(filenames):
         src_path = f"{data_source}/{filename}.png"
         dst_path = os.path.join(images_folder, f"{filename}.png")
         fileio.copy(src_path, dst_path)
 
+        if (index + 1) % 100 == 0 or index == total_images - 1:
+            logger.info(
+                f"{index + 1} of {total_images} images have been downloaded..."
+            )
     split_dataset(extract_location, ratio=(0.7, 0.15, 0.15), seed=42)
     yaml_path = generate_yaml(extract_location)
     return yaml_path
+
+
+def convert_bbox_for_ls(x1, x2, y1, y2, width, height) -> Dict[str, Any]:
+    """This function converts the bounding box coordinates to the label studio format.
+
+    Parameters:
+    x1, x2, y1, y2 (int): The initial bounding box coordinates in pixels.
+    width, height (int): The original dimensions of the image.
+
+    Returns:
+    A dictionary approximating the label studio.
+    """
+    x = x1 / width
+    y = y1 / height
+    w = (x2 - x1) / width
+    h = (y2 - y1) / height
+    print(f"Converting bbox to results.")
+    return {
+        "original_width": width,
+        "original_height": height,
+        "image_rotation": 0,
+        "value": {
+            "x": x * 100,
+            "y": y * 100,
+            "width": w * 100,
+            "height": h * 100,
+            "rotation": 0,
+            "rectanglelabels": ["ship"],
+        },
+        "from_name": "label",
+        "to_name": "image",
+        "type": "rectanglelabels",
+        "origin": "manual",
+    }
+
+
+def crop_and_adjust_bboxes(
+    image: np.array, bboxes: List[List[int]], crop_coordinates: Tuple[int]
+):
+    """Crops an image and adjust the bboxes that are within the croped portion.
+
+    Args:
+        image: Image as np.array
+        bboxes: List of bboxes [[x1, y1, x2, y2], [x1, y1, x2, y2]]
+        crop_coordinates: Coordinates of the crop, format (x1, y1, x2, y2)
+
+    Returns:
+        Tuple containing the cropped portion of the images and all bboxes
+            within the crop area
+    """
+    x_crop_min, y_crop_min, x_crop_max, y_crop_max = crop_coordinates
+    cropped_image = image[y_crop_min:y_crop_max, x_crop_min:x_crop_max]
+
+    adjusted_bboxes = []
+    for bbox in bboxes:
+        xmin, ymin, xmax, ymax = bbox
+
+        # Adjust bounding box coordinates
+        xmin_new = max(0, xmin - x_crop_min)
+        ymin_new = max(0, ymin - y_crop_min)
+        xmax_new = min(x_crop_max - x_crop_min, xmax - x_crop_min)
+        ymax_new = min(y_crop_max - y_crop_min, ymax - y_crop_min)
+
+        # Check if the adjusted bounding box is still within the cropping area
+        if xmin_new < xmax_new and ymin_new < ymax_new:
+            adjusted_bboxes.append([xmin_new, ymin_new, xmax_new, ymax_new])
+
+    return cropped_image, adjusted_bboxes
+
+
+def split_image_into_tiles(
+    d: Any,
+    img_name: str,
+    output_dir: str,
+    all_images: Dict[str, Any],
+    data_source: str,
+    max_tile_size: int = 500,
+):
+    """Some images are too large to be useful, this splits them into multiple tiles.
+
+    Args:
+        d: One hf dataset entry - needs to contain d['image'] and
+            d['objects']['bbox']
+        img_name: Name of the image (excluding any suffix)
+        output_dir: Where to locally save the image
+        all_images: Dictionary containing the image_name<->bboxes mapping
+        max_tile_size: Maximum tile size
+    """
+    tile_id = 0
+    img = d["image"]
+    bboxes = d["objects"]["bbox"]
+    width, height = d["image"].size
+
+    logger.info(f"Processing {img_name} ...")
+    for x in range(0, width, max_tile_size):
+        print(f"increased x={x}")
+        for y in range(0, height, max_tile_size):
+
+            print(f"increased y={y}")
+            if x + max_tile_size <= width:
+                x1 = x
+                x2 = min(x + max_tile_size, width)
+            else:
+                # The last tile of the row also stays 500 pixels wide by
+                #  cropping from max width to the left
+                x1 = max(0, width - max_tile_size)
+                x2 = width
+            if y + max_tile_size <= height:
+                y1 = y
+                y2 = min(y + max_tile_size, height)
+            else:
+                # The last tile of the row also stays 500 pixels high by
+                #  cropping from max height up
+                y1 = max(0, height - max_tile_size)
+                y2 = height
+            cropped_img, adjusted_bboxes = crop_and_adjust_bboxes(
+                image=np.array(img),
+                bboxes=bboxes,
+                crop_coordinates=(x1, y1, x2, y2),
+            )
+
+            # store this tile
+            new_img_name = img_name + "_" + str(tile_id)
+            img_path = f"{output_dir}/{new_img_name}.png"
+
+            print(f"Storing tile {tile_id} of {img_name} at {img_path} ...")
+            export_to_gcp(
+                data_source=data_source,
+                img=Image.fromarray(cropped_img),
+                img_name=new_img_name,
+                img_path=img_path,
+            )
+
+            logger.info(f"Calculating new bboxes for tile {img_path}")
+            tile_width = x2 - x1
+            tile_height = y2 - y1
+
+            results = []
+            for bbox in adjusted_bboxes:
+                results.append(
+                    convert_bbox_for_ls(
+                        x1=bbox[0],
+                        x2=bbox[2],
+                        y1=bbox[1],
+                        y2=bbox[3],
+                        width=tile_width,
+                        height=tile_height,
+                    )
+                )
+
+            all_images[f"{new_img_name}.png"] = results
+            tile_id += 1
+
+
+def export_to_gcp(data_source: str, img: Image, img_name: str, img_path: str):
+    """
+    Saves a given image locally, then copies it to a Google Cloud Storage bucket.
+
+    Parameters:
+    data_source: The name of source directory or bucket where image needs to be
+        stored.
+    img: The image to be stored. This should be an instance of the PIL Image
+        class.
+    img_name: The name to be used when saving the image in the GCP bucket. Should not contain '.png'
+    img_path: The local path where the image will be temporarily saved before
+        being copied to the bucket.
+    """
+    logger.info(f"Storing image to {img_path}.")
+    img.save(img_path)
+    bucket_path = os.path.join(data_source, f"{img_name}.png")
+    logger.info(f"Copying into gcp bucket {bucket_path}")
+    fileio.copy(img_path, bucket_path, overwrite=True)