Merge pull request #2 from kzaleskaa/dev

example run
kzaleskaa · Apr 10, 2024 · 3abf2ed · 3abf2ed
2 parents 77a24ba + fdfc677
commit 3abf2ed
Show file tree

Hide file tree

Showing 5 changed files with 155 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -11,9 +11,11 @@ ______________________________________________________________________
 
 </div>
 
+<img src="https://github.com/kzaleskaa/depth-estimation-with-compression/assets/62251989/747c72d8-e096-4113-9951-5886213187bc" />
+
 ## Description
 
-This project entails the development and optimization of a depth estimation model based on a UNET architecture enhanced with **Bi-directional Feature Pyramid Network** (BIFPN) and **EfficientNet** components. This project was implemented within the scope of the "Neural Network Compression with Applications" subject.
+This project entails the development and optimization of a depth estimation model based on a UNET architecture enhanced with **Bi-directional Feature Pyramid Network** (BIFPN) and **EfficientNet** components. The model is trained on the NYU Depth V2 dataset and evaluated on the Structural Similarity Index (SSIM) metric.
 
 ## Installation
 

diff --git a/notebooks/data_analysis.ipynb b/notebooks/data_analysis.ipynb
@@ -53,15 +53,6 @@
     "df_train.shape, df_test.shape"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "50688 + 654"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -118,8 +109,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "img_path = df_train.iloc[0][\"img\"]\n",
-    "depth_path = df_train.iloc[0][\"depth\"]\n",
+    "img_path = df_train.iloc[1150][\"img\"]\n",
+    "depth_path = df_train.iloc[1150][\"depth\"]\n",
     "\n",
     "visualize_example(img_path, depth_path)"
    ]

diff --git a/notebooks/example_model_results.ipynb b/notebooks/example_model_results.ipynb
@@ -0,0 +1,147 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from torchvision.transforms import transforms\n",
+    "\n",
+    "from src.data.components.custom_transforms import BilinearInterpolation, NormalizeData\n",
+    "from src.data.components.nyu_dataset import NYUDataset\n",
+    "from src.models.unet_module import UNETLitModule"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_ckpt = \"./logs/train/runs/2024-04-06_18-37-38/checkpoints/epoch_015.ckpt\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = UNETLitModule.load_from_checkpoint(model_ckpt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.eval()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transforms_img = transforms.Compose([transforms.PILToTensor(), transforms.Resize((224, 224))])\n",
+    "\n",
+    "transforms_mask = transforms.Compose(\n",
+    "    [\n",
+    "        transforms.PILToTensor(),\n",
+    "        NormalizeData(10_000 * (1 / 255)),\n",
+    "        BilinearInterpolation((56, 56)),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_dataset = NYUDataset(\"nyu2_test.csv\", \"data/\", transforms_img, transforms_mask)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "outputs = []\n",
+    "\n",
+    "for i in range(10):\n",
+    "    img, mask = test_dataset[i]\n",
+    "    img = img.unsqueeze(0)\n",
+    "    mask = mask.unsqueeze(0)\n",
+    "    img = img.to(model.device)\n",
+    "    out = model(img)\n",
+    "    outputs.append(out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def visualize_result(img, mask, out):\n",
+    "    _, axs = plt.subplots(1, 3)\n",
+    "    axs[0].imshow(img.squeeze().permute(1, 2, 0))\n",
+    "    axs[0].set_title(\"Input Image\")\n",
+    "    axs[1].imshow(mask.squeeze())\n",
+    "    axs[1].set_title(\"Ground Truth\")\n",
+    "    axs[2].imshow(out.squeeze().detach().cpu())\n",
+    "    axs[2].set_title(\"Predicted Mask\")\n",
+    "\n",
+    "    for ax in axs:\n",
+    "        ax.axis(\"off\")\n",
+    "\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(5):\n",
+    "    visualize_result(test_dataset[i][0], test_dataset[i][1], outputs[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/data/depth_datamodule.py b/src/data/depth_datamodule.py
@@ -156,7 +156,7 @@ def train_dataloader(self) -> DataLoader[Any]:
             num_workers=self.hparams.num_workers,
             pin_memory=self.hparams.pin_memory,
             shuffle=True,
-            persistent_workers=True,
+            # persistent_workers=True,
         )
 
     def val_dataloader(self) -> DataLoader[Any]:
@@ -170,7 +170,7 @@ def val_dataloader(self) -> DataLoader[Any]:
             num_workers=self.hparams.num_workers,
             pin_memory=self.hparams.pin_memory,
             shuffle=False,
-            persistent_workers=True,
+            # persistent_workers=True,
         )
 
     def test_dataloader(self) -> DataLoader[Any]:
@@ -184,7 +184,7 @@ def test_dataloader(self) -> DataLoader[Any]:
             num_workers=self.hparams.num_workers,
             pin_memory=self.hparams.pin_memory,
             shuffle=False,
-            persistent_workers=True,
+            # persistent_workers=True,
         )
 
     def teardown(self, stage: Optional[str] = None) -> None:

diff --git a/src/models/components/bifpn_decoder.py b/src/models/components/bifpn_decoder.py
@@ -142,7 +142,6 @@ def __init__(self, fpn_sizes: List[int]) -> None:
         self.p7_out_w2 = torch.tensor(1, dtype=torch.float, requires_grad=True)
 
     def forward(self, inputs: List[torch.Tensor]) -> List[torch.Tensor]:
-        print(type(inputs))
         epsilon = 0.0001
         P4, P5, P6 = inputs