#293 Add Structured Neuron-level Pruning (SNP) method (#294)

Nota-NetsPresso · Jul 23, 2024 · fa231ae · fa231ae
1 parent 29c965d
commit fa231ae
Show file tree

Hide file tree

Showing 8 changed files with 97 additions and 45 deletions.
diff --git a/docs/_static/compression/methods/pruning_snp.png b/docs/_static/compression/methods/pruning_snp.png
diff --git a/...description/api/compressor/compress/advanced_compression/compression_method.rst b/...description/api/compressor/compress/advanced_compression/compression_method.rst
@@ -15,6 +15,18 @@ Pruning by Criteria
 
 Difference of each pruning method is about measuring importance of filters in each layer. Filters in each layer will be automatically pruned based on certain criteria.
 
+
+Structured Neuron-level Pruning (SNP)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- SNP prunes graphically connected query and key layers having the least informative attention scores while preserving the overall attention scores. Value layers, which can be pruned independently, are pruned to eliminate inter-head redundancy.
+- Click the link for more information. (`Structured Neuron-level Pruning`_)
+
+.. image:: ../../../../../_static/compression/methods/pruning_snp.png
+    :width: 500
+    :align: center
+
+
 L2 Norm Pruning
 ^^^^^^^^^^^^^^^
 
@@ -109,3 +121,4 @@ CP Decomposition
 .. _Tucker Decomposition : https://docs.netspresso.ai/docs/mc-filter-decomposition#supported-method
 .. _Singular Value Decomposition : https://docs.netspresso.ai/docs/mc-filter-decomposition#supported-method
 .. _CP Decomposition : https://docs.netspresso.ai/docs/mc-filter-decomposition#supported-method
+.. _Structured Neuron-level Pruning : https://arxiv.org/abs/2404.11630
diff --git a/...description/api/compressor/compress/advanced_compression/manual_compression.rst b/...description/api/compressor/compress/advanced_compression/manual_compression.rst
@@ -116,23 +116,25 @@ Compression Method
 
 Available Compression Method
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-+------------+------------------------------+
-| Name       | Description                  |
-+============+==============================+
-| PR_L2      | L2 Norm Pruning              |
-+------------+------------------------------+
-| PR_GM      | GM Pruning                   |
-+------------+------------------------------+
-| PR_NN      | Nuclear Norm Pruning         |
-+------------+------------------------------+
-| PR_ID      | Pruning By Index             |
-+------------+------------------------------+
-| FD_TK      | Tucker Decomposition         |
-+------------+------------------------------+
-| FD_SVD     | Singular Value Decomposition |
-+------------+------------------------------+
-| FD_CP      | CP Decomposition             |
-+------------+------------------------------+
++------------+----------------------------------+
+| Name       | Description                      |
++============+==================================+
+| PR_L2      | L2 Norm Pruning                  |
++------------+----------------------------------+
+| PR_GM      | GM Pruning                       |
++------------+----------------------------------+
+| PR_NN      | Nuclear Norm Pruning             |
++------------+----------------------------------+
+| PR_SNP     | Structured Neuron-level Pruning  |
++------------+----------------------------------+
+| PR_ID      | Pruning By Index                 |
++------------+----------------------------------+
+| FD_TK      | Tucker Decomposition             |
++------------+----------------------------------+
+| FD_SVD     | Singular Value Decomposition     |
++------------+----------------------------------+
+| FD_CP      | CP Decomposition                 |
++------------+----------------------------------+
 
 Example
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -144,7 +146,8 @@ Example
     COMPRESSION_METHOD = CompressionMethod.PR_L2
 
 .. warning::
-    - Nuclear Norm is only supported in the Tensorflow-Keras Framework.
+    - Nuclear Norm is only supported in the Tensorflow-Keras framework.
+    - Structured Neuron-level is only supported in the PyTorch and ONNX frameworks.
 
 .. note::
 
@@ -162,6 +165,8 @@ Options
 .. autoclass:: netspresso.enums.__init__.GroupPolicy
     :noindex:
 
+.. autoclass:: netspresso.enums.__init__.StepOp
+    :noindex:
 
 Example
 +++++++
@@ -184,7 +189,7 @@ Example
 
 .. note::
 
-    - This parameter applies only to the Pruning Method (PR_L2, PR_GM, PR_NN).
+    - This parameter applies only to the Pruning Method (PR_L2, PR_GM, PR_NN, PR_SNP).
 
 Details of Returns
 ~~~~~~~~~~~~~~~~~~
@@ -260,13 +265,15 @@ Values of available layer
 +--------------------+------------------+--------+---------------------------------------+
 | Compression Method | Number of Values | Type   | Range                                 |
 +====================+==================+========+=======================================+
-| PR_L2              | 1                | Float  | 0.0 < ratio ≤ 1.0                     |
+| PR_L2              | 1                | Float  | 0.0 < ratio < 1.0                     |
++--------------------+------------------+--------+---------------------------------------+
+| PR_GM              | 1                | Float  | 0.0 < ratio < 1.0                     |
 +--------------------+------------------+--------+---------------------------------------+
-| PR_GM              | 1                | Float  | 0.0 < ratio ≤ 1.0                     |
+| PR_NN              | 1                | Float  | 0.0 < ratio < 1.0                     |
 +--------------------+------------------+--------+---------------------------------------+
-| PR_NN              | 1                | Float  | 0.0 < ratio ≤ 1.0                     |
+| PR_SNP             | 1                | Float  | 0.0 < ratio < 1.0                     |
 +--------------------+------------------+--------+---------------------------------------+
-| PR_ID              | (Num of Out      | Int    | 0 < channels ≤ Num of Out Channels    |
+| PR_ID              | (Num of Out      | Int    | 0 ≤ channels < Num of Out Channels    |
 |                    | Channels - 1)    |        |                                       |
 +--------------------+------------------+--------+---------------------------------------+
 | FD_TK              | 2                | Int    | 0 < rank ≤ (Num of In Channels or     |

diff --git a/docs/description/api/compressor/compress/advanced_compression/pruning_options.rst b/docs/description/api/compressor/compress/advanced_compression/pruning_options.rst
@@ -114,3 +114,30 @@ Reshape channel axis represents which axis of the reshape operator will be prune
 
     - If the `reshape_channel_axis` is `-2` or `0` when the given pruning ratio is 50%, the output model will contain `6 (12*0.5)` channels of the given reshape operator.
 
+
+Step operator
+*************
+
+Step operator is the method of rounding applied to ensure that the amount remaining after pruning aligns with the step_size. 
+
+Options include Round, Round Up, Round Down, or None.
+
+Round
++++++++++++++++++++
+
+    - Rounds to the nearest step size, adjusting the remaining count of filters.
+
+Round Up
++++++++++++++++++++
+
+    - Always rounds up to the next step size, directly affecting the remaining filters.
+
+Round Down
++++++++++++++++++++
+
+    - Always rounds down to the closest lower step size, impacting the remaining filters.
+
+None
++++++++++++++++++++
+
+    - No rounding operation is applied; the exact amount is used for pruning.
diff --git a/...ion/api/compressor/compress/advanced_compression/recommendation_compression.rst b/...ion/api/compressor/compress/advanced_compression/recommendation_compression.rst
@@ -17,19 +17,21 @@ Compression Method
 
 Available Compression Method
 ++++++++++++++++++++++++++++
-+------------+------------------------------+
-| Name       | Description                  |
-+============+==============================+
-| PR_L2      | L2 Norm Pruning              |
-+------------+------------------------------+
-| PR_GM      | GM Pruning                   |
-+------------+------------------------------+
-| PR_NN      | Nuclear Norm Pruning         |
-+------------+------------------------------+
-| FD_TK      | Tucker Decomposition         |
-+------------+------------------------------+
-| FD_SVD     | Singular Value Decomposition |
-+------------+------------------------------+
++------------+----------------------------------+
+| Name       | Description                      |
++============+==================================+
+| PR_L2      | L2 Norm Pruning                  |
++------------+----------------------------------+
+| PR_GM      | GM Pruning                       |
++------------+----------------------------------+
+| PR_NN      | Nuclear Norm Pruning             |
++------------+----------------------------------+
+| PR_SNP     | Structured Neuron-level Pruning  |
++------------+----------------------------------+
+| FD_TK      | Tucker Decomposition             |
++------------+----------------------------------+
+| FD_SVD     | Singular Value Decomposition     |
++------------+----------------------------------+
 
 Example
 +++++++
@@ -41,7 +43,8 @@ Example
     COMPRESSION_METHOD = CompressionMethod.PR_L2
 
 .. warning::
-    - Nuclear Norm is only supported in the Tensorflow-Keras Framework.
+    - Nuclear Norm is only supported in the Tensorflow-Keras framework.
+    - Structured Neuron-level is only supported in the PyTorch and ONNX frameworks.
 
 .. note::
     - Click on the link to learn more about the information. (:ref:`compression_method_heading`)
@@ -73,7 +76,7 @@ Example
     RECOMMENDATION_METHOD = RecommendationMethod.SLAMP
 
 .. note::
-    - If you selected PR_L2, PR_GM, PR_NN for compression_method
+    - If you selected PR_L2, PR_GM, PR_NN, PR_SNP for compression_method
         - The recommended_method available is **SLAMP**.
     - If you selected FD_TK, FD_SVD for compression_method
         - The recommended_method available is **VBMF**.
@@ -93,7 +96,7 @@ Recommendation Ratio
         .. raw:: html
 
             <div align="center" style="padding: 20px;">
-                <img src="https://latex.codecogs.com/svg.image?0&space;<ratio&space;\leq&space;&space;1&space;" title="https://latex.codecogs.com/svg.image?0 <ratio \leq 1 " />
+                <img src="https://latex.codecogs.com/svg.image?0&space;<ratio&space;&space;<&space;1&space;" title="https://latex.codecogs.com/svg.image?0 <ratio < 1 " />
             </div>
 
         - Click the link for more information. (`SLAMP`_)
@@ -129,7 +132,8 @@ Example
 
 .. code-block:: python
 
-    from netspresso.enums import Policy, LayerNorm, GroupPolicy, Options
+    from netspresso.enums import Policy, LayerNorm, GroupPolicy
+    from netspresso.clients.compressor.v2.schemas import Options
 
     OPTIONS = Options(
         policy=Policy.AVERAGE,
@@ -144,7 +148,7 @@ Example
 
 .. note::
 
-    - This parameter applies only to the Pruning Method (PR_L2, PR_GM, PR_NN).
+    - This parameter applies only to the Pruning Method (PR_L2, PR_GM, PR_NN, PR_SNP).
 
 Example
 -------

diff --git a/docs/description/api/compressor/compress/automatic_compression.rst b/docs/description/api/compressor/compress/automatic_compression.rst
@@ -18,7 +18,7 @@ Compression Ratio
     .. raw:: html
 
         <div align="center">
-            <img src="https://latex.codecogs.com/svg.image?0<&space;ratio&space;\leq&space;1" title="https://latex.codecogs.com/svg.image?0< ratio \leq 1" />
+            <img src="https://latex.codecogs.com/svg.image?0<&space;ratio&space;<&space;1" title="https://latex.codecogs.com/svg.image?0< ratio < 1" />
         </div>
 
 

diff --git a/netspresso/compressor/v2/compressor.py b/netspresso/compressor/v2/compressor.py
@@ -323,7 +323,7 @@ def compress_model(
                 if available_layers.values:
                     available_layers.use = True
 
-            if dataset_path and compression.compression_method == CompressionMethod.PR_NN:
+            if dataset_path and compression.compression_method in [CompressionMethod.PR_NN, CompressionMethod.PR_SNP]:
                 self.upload_dataset(
                     compression_id=create_compression_response.data.compression_id, dataset_path=dataset_path
                 )
@@ -473,7 +473,7 @@ def recommendation_compression(
                 verify_ssl=self.token_handler.verify_ssl
             )
 
-            if dataset_path and compression_method == CompressionMethod.PR_NN:
+            if dataset_path and compression_method in [CompressionMethod.PR_NN, CompressionMethod.PR_SNP]:
                 self.upload_dataset(
                     compression_id=create_compression_response.data.compression_id, dataset_path=dataset_path
                 )

diff --git a/netspresso/enums/compression.py b/netspresso/enums/compression.py
@@ -6,14 +6,15 @@ class CompressionMethod(str, Enum):
     PR_L2 = "PR_L2"
     PR_GM = "PR_GM"
     PR_NN = "PR_NN"
+    PR_SNP = "PR_SNP"
     PR_ID = "PR_ID"
     FD_TK = "FD_TK"
     FD_CP = "FD_CP"
     FD_SVD = "FD_SVD"
 
     @classmethod
     def create_literal(cls):
-        return Literal["PR_L2", "PR_GM", "PR_NN", "PR_ID", "FD_TK", "FD_CP", "FD_SVD"]
+        return Literal["PR_L2", "PR_GM", "PR_NN", "PR_SNP", "PR_ID", "FD_TK", "FD_CP", "FD_SVD"]
 
 
 class RecommendationMethod(str, Enum):