immich-app · yoni13 · Nov 29, 2024 · Nov 29, 2024 · Nov 30, 2024 · Dec 1, 2024
@@ -48,7 +48,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        suffix: ["", "-cuda", "-openvino", "-armnn"]
+        suffix: ["", "-cuda", "-openvino", "-armnn","-rknn"]
     steps:
         - name: Login to GitHub Container Registry
           uses: docker/login-action@v3
@@ -116,6 +116,9 @@ jobs:
           - platforms: linux/arm64
             device: armnn
             suffix: -armnn
+          - platforms: linux/arm64
+            device: rknn
+            suffix: -rknn
 
     steps:
       - name: Checkout
@@ -307,4 +310,4 @@ jobs:
         run: exit 1
       - name: All jobs passed or skipped
         if: ${{ !(contains(needs.*.result, 'failure')) }}
-        run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"
+        run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"
@@ -390,7 +390,7 @@ jobs:
           poetry run black --check app export
       - name: Run mypy type checking
         run: |
-          poetry run mypy --install-types --non-interactive --strict app/
+          mkdir .mypy_cache && poetry run mypy --install-types --non-interactive --strict app/
       - name: Run tests and coverage
         run: |
           poetry run pytest app --cov=app --cov-report term-missing

@@ -85,12 +85,12 @@ services:
     image: immich-machine-learning-dev:latest
     # extends:
     #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
     build:
       context: ../machine-learning
       dockerfile: Dockerfile
       args:
-        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
     ports:
       - 3003:3003
     volumes:

@@ -29,12 +29,12 @@ services:
     image: immich-machine-learning:latest
     # extends:
     #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
     build:
       context: ../machine-learning
       dockerfile: Dockerfile
       args:
-        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
     ports:
       - 3003:3003
     volumes:
@@ -68,22 +68,12 @@ services:
       - 5432:5432
     healthcheck:
       test: >-
-        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
-        Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
-        --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
-        echo "checksum failure count is $$Chksum";
-        [ "$$Chksum" = '0' ] || exit 1
+        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
       interval: 5m
       start_interval: 30s
       start_period: 5m
     command: >-
-      postgres
-      -c shared_preload_libraries=vectors.so
-      -c 'search_path="$$user", public, vectors'
-      -c logging_collector=on
-      -c max_wal_size=2GB
-      -c shared_buffers=512MB
-      -c wal_compression=on
+      postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
     restart: always
 
   # set IMMICH_TELEMETRY_INCLUDE=all in .env to enable metrics
@@ -100,7 +90,7 @@ services:
   # add data source for http://immich-prometheus:9090 to get started
   immich-grafana:
     container_name: immich_grafana
-    command: ['./run.sh', '-disable-reporting']
+    command: [ './run.sh', '-disable-reporting' ]
     ports:
       - 3000:3000
     image: grafana/grafana:11.4.0-ubuntu@sha256:afccec22ba0e4815cca1d2bf3836e414322390dc78d77f1851976ffa8d61051c

@@ -32,12 +32,12 @@ services:
 
   immich-machine-learning:
     container_name: immich_machine_learning
-    # For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
+    # For hardware acceleration, add one of -[armnn, cuda, openvino, rknn] to the image tag.
     # Example tag: ${IMMICH_VERSION:-release}-cuda
     image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
     # extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
     #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference - use the `-wsl` version for WSL2 where applicable
     volumes:
       - model-cache:/cache
     env_file:
@@ -66,22 +66,12 @@ services:
       - ${DB_DATA_LOCATION}:/var/lib/postgresql/data
     healthcheck:
       test: >-
-        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
-        Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
-        --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
-        echo "checksum failure count is $$Chksum";
-        [ "$$Chksum" = '0' ] || exit 1
+        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
       interval: 5m
       start_interval: 30s
       start_period: 5m
     command: >-
-      postgres
-      -c shared_preload_libraries=vectors.so
-      -c 'search_path="$$user", public, vectors'
-      -c logging_collector=on
-      -c max_wal_size=2GB
-      -c shared_buffers=512MB
-      -c wal_compression=on
+      postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
     restart: always
 
 volumes:

@@ -13,6 +13,18 @@ services:
     volumes:
       - /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
       - /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)
+
+  rknn:
+    security_opt:
+      - systempaths=unconfined
+      - apparmor=unconfined
+    devices:
+      - /dev/rga:/dev/rga
+      - /dev/dri:/dev/dri
+      - /dev/dma_heap:/dev/dma_heap
+      - /dev/mpp_service:/dev/mpp_service
+    volumes:
+      - /sys/kernel/debug/:/sys/kernel/debug/:ro
 
   cpu: {}
 

diff --git a/docs/docs/features/ml-hardware-acceleration.md b/docs/docs/features/ml-hardware-acceleration.md
@@ -12,6 +12,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - ARM NN (Mali)
 - CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
 - OpenVINO (Intel discrete GPUs such as Iris Xe and Arc)
+- RKNN (Rockchip)
 
 ## Limitations
 
@@ -46,6 +47,15 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - The server must have a discrete GPU, i.e. Iris Xe or Arc. Expect issues when attempting to use integrated graphics.
 - Ensure the server's kernel version is new enough to use the device for hardware accceleration.
 
+#### RKNN
+
+- You must have a supported Rockchip SoC, only RK3566 and RK3588 are supported at this moment.
+- Make sure you have the appropriate linux kernel driver installed
+  - This is usually pre-installed on the device vendor's Linux images
+- RKNPU driver V0.9.8 or later must be available in the host server
+  - You may confirm this by running `cat /sys/kernel/debug/rknpu/version` to check the version
+- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for RKNN specific settings
+
 ## Setup
 
 1. If you do not already have it, download the latest [`hwaccel.ml.yml`][hw-file] file and ensure it's in the same folder as the `docker-compose.yml`.

diff --git a/docs/docs/install/environment-variables.md b/docs/docs/install/environment-variables.md
@@ -166,6 +166,10 @@ Redis (Sentinel) URL example JSON before encoding:
 | `MACHINE_LEARNING_ANN_TUNING_LEVEL`                       | ARM-NN GPU tuning level (1: rapid, 2: normal, 3: exhaustive)                                        |               `2`               | machine learning |
 | `MACHINE_LEARNING_DEVICE_IDS`<sup>\*4</sup>               | Device IDs to use in multi-GPU environments                                                         |               `0`               | machine learning |
 | `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION`     | Set the maximum number of faces that will be processed at once by the facial recognition model      |  None (`1` if using OpenVINO)   | machine learning |
+| `MACHINE_LEARNING_RKNN`                                   | Enable RKNN hardware acceleration if supported                                                      |             `True`              | machine learning |
+| `MACHINE_LEARNING_RKNN_TEXTUAL_THREADS`                   | How many threads of RKNN runtime should be spinned up while infrencing textual model.               |               `1`               | machine learning |
+| `MACHINE_LEARNING_RKNN_VISUAL_THREADS`                    | How many threads of RKNN runtime should be spinned up while infrencing visual model.                |               `1`               | machine learning |
+| `MACHINE_LEARNING_RKNN_FACIAL_THREADS`                    | How many threads of RKNN runtime should be spinned up while infrencing facial model.                |               `1`               | machine learning |
 
 \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
 

@@ -15,6 +15,8 @@ RUN mkdir /opt/armnn && \
     cd /opt/ann && \
     sh build.sh
 
+FROM builder-cpu AS builder-rknn
+
 FROM builder-${DEVICE} AS builder
 
 ARG DEVICE
@@ -80,6 +82,10 @@ COPY --from=builder-armnn \
     /opt/ann/build.sh \
     /opt/armnn/
 
+FROM prod-cpu AS prod-rknn
+
+ADD https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/v2.3.0/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so /usr/lib/
+
 FROM prod-${DEVICE} AS prod
 ARG DEVICE
 
@@ -104,9 +110,10 @@ RUN echo "hard core 0" >> /etc/security/limits.conf && \
 
 COPY --from=builder /opt/venv /opt/venv
 COPY ann/ann.py /usr/src/ann/ann.py
+COPY rknn/rknnpool.py /usr/src/rknn/rknnpool.py
 COPY start.sh log_conf.json gunicorn_conf.py ./
 COPY app .
 ENTRYPOINT ["tini", "--"]
 CMD ["./start.sh"]
 
-HEALTHCHECK CMD python3 healthcheck.py
+HEALTHCHECK CMD python3 healthcheck.py
@@ -44,6 +44,10 @@ class Settings(BaseSettings):
     ann: bool = True
     ann_fp16_turbo: bool = False
     ann_tuning_level: int = 2
+    rknn: bool = True
+    rknn_textual_threads: int = 1
+    rknn_visual_threads: int = 1
+    rknn_facial_detection_threads: int = 1
     preload: PreloadModelData | None = None
     max_batch_size: MaxBatchSize | None = None
 

@@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
         yield mocked
 
 
+@pytest.fixture(scope="function")
+def rknn_session() -> Iterator[mock.Mock]:
+    with mock.patch("app.sessions.rknn.rknnPoolExecutor") as mocked:
+        yield mocked
+
+
 @pytest.fixture(scope="function")
 def rmtree() -> Iterator[mock.Mock]:
     with mock.patch("app.models.base.rmtree", autospec=True) as mocked:

@@ -8,7 +8,9 @@
 from huggingface_hub import snapshot_download
 
 import ann.ann
+import rknn.rknnpool
 from app.sessions.ort import OrtSession
+from app.sessions.rknn import RknnSession
 
 from ..config import clean_name, log, settings
 from ..schemas import ModelFormat, ModelIdentity, ModelSession, ModelTask, ModelType
@@ -67,6 +69,8 @@ def configure(self, **kwargs: Any) -> None:
 
     def _download(self) -> None:
         ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
+        if self.model_format != ModelFormat.RKNN:
+            ignore_patterns.append("*.rknn")
         snapshot_download(
             f"immich-app/{clean_name(self.model_name)}",
             cache_dir=self.cache_dir,
@@ -108,6 +112,8 @@ def _make_session(self, model_path: Path) -> ModelSession:
                 session: ModelSession = AnnSession(model_path)
             case ".onnx":
                 session = OrtSession(model_path)
+            case ".rknn":
+                session = RknnSession(model_path)
             case _:
                 raise ValueError(f"Unsupported model file type: {model_path.suffix}")
         return session
@@ -155,4 +161,9 @@ def model_format(self, model_format: ModelFormat) -> None:
 
     @property
     def _model_format_default(self) -> ModelFormat:
-        return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
+        if rknn.rknnpool.is_available and settings.rknn:
+            return ModelFormat.RKNN
+        elif ann.ann.is_available and settings.ann:
+            return ModelFormat.ARMNN
+        else:
+            return ModelFormat.ONNX
@@ -35,6 +35,7 @@ class ModelType(StrEnum):
 class ModelFormat(StrEnum):
     ARMNN = "armnn"
     ONNX = "onnx"
+    RKNN = "rknn"
 
 
 class ModelSource(StrEnum):

@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import onnxruntime as ort
+from numpy.typing import NDArray
+
+from app.schemas import SessionNode
+from rknn.rknnpool import rknnPoolExecutor, soc_name
+
+from ..config import log, settings
+
+
+def runInfrence(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
+    outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
+
+    return outputs
+
+
+class RknnSession:
+    def __init__(self, model_path: Path | str):
+        self.model_path = Path(str(model_path).replace("model", soc_name))
+        self.ort_model_path = Path(str(self.model_path).replace(f"{soc_name}.rknn", "model.onnx"))
+
+        if "textual" in str(self.model_path):
+            self.tpe = settings.rknn_textual_threads
+        elif "visual" in str(self.model_path):
+            self.tpe = settings.rknn_visual_threads
+        else:
+            self.tpe = settings.rknn_facial_detection_threads
+
+        log.info(f"Loading RKNN model from {self.model_path} with {self.tpe} threads.")
+        self.rknnpool = rknnPoolExecutor(rknnModel=self.model_path.as_posix(), TPEs=self.tpe, func=runInfrence)
+        log.info(f"Loaded RKNN model from {self.model_path} with {self.tpe} threads.")
+
+    def __del__(self) -> None:
+        self.rknnpool.release()
+
+    def _load_ort_session(self) -> None:
+        self.ort_session = ort.InferenceSession(
+            self.ort_model_path.as_posix(),
+        )
+        self.inputs: list[SessionNode] = self.ort_session.get_inputs()
+        self.outputs: list[SessionNode] = self.ort_session.get_outputs()
+        del self.ort_session
+
+    def get_inputs(self) -> list[SessionNode]:
+        try:
+            return self.inputs
+        except AttributeError:
+            self._load_ort_session()
+            return self.inputs
+
+    def get_outputs(self) -> list[SessionNode]:
+        try:
+            return self.outputs
+        except AttributeError:
+            self._load_ort_session()
+            return self.outputs
+
+    def run(
+        self,
+        output_names: list[str] | None,
+        input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
+        run_options: Any = None,
+    ) -> list[NDArray[np.float32]]:
+        input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
+        self.rknnpool.put(input_data)
+        outputs: list[NDArray[np.float32]] = self.rknnpool.get()
+        return outputs