Add config and updates for Llama 3.1 (#118)

* update requirements * updates for llama 3.1 * add local config * use quantized model for local * remove gradient_checkpointing * Update llm-lora-finetuning/requirements.txt * revert to main base model * add hf login for evaluate step as well * update README * switch from info to tip * use github syntax * add links
zenml-io · Jul 24, 2024 · bdee3c1 · bdee3c1
1 parent e5adbde
commit bdee3c1
Show file tree

Hide file tree

Showing 6 changed files with 189 additions and 4 deletions.
diff --git a/llm-lora-finetuning/README.md b/llm-lora-finetuning/README.md
@@ -55,6 +55,13 @@ When running the pipeline like this, the trained model will be stored in the Zen
   <br/>
 </div>
 
+> [!TIP]  
+> To finetune the Llama 3.1 base model, please use the alternative configuration
+> files provided in the `configs` folder.
+>
+> For a remote finetune you can use [`llama3-1_finetune_remote.yaml`](configs/llama3-1_finetune_remote.yaml) and for a
+> local finetune you can use [`llama3-1_finetune_local.yaml`](configs/llama3-1_finetune_local.yaml).
+
 ### ⚡ Accelerate your finetuning
 
 Do you want to benefit from multi-GPU-training with Distributed Data Parallelism (DDP)? Then you can use other configuration files prepared for this purpose.

diff --git a/llm-lora-finetuning/configs/llama3-1_finetune_local.yaml b/llm-lora-finetuning/configs/llama3-1_finetune_local.yaml
@@ -0,0 +1,66 @@
+# Apache Software License 2.0
+# 
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
+model:
+  name: llm-peft-llama-3-1
+  description: "Fine-tune `llama-3.1`."
+  tags:
+    - llm
+    - peft
+    - llama-3.1
+  version: 300_steps
+
+settings:
+  docker:
+    parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime
+    requirements: requirements.txt
+    python_package_installer: uv
+    python_package_installer_args:
+      system: null
+    apt_packages: 
+      - git
+    environment:
+      PJRT_DEVICE: CUDA
+      USE_TORCH_XLA: "false"
+      MKL_SERVICE_FORCE_INTEL: "1"
+
+parameters:
+  # uses a 4-bit quantised version of llama-3.1 for local experimentation
+  base_model_id: meta-llama/Meta-Llama-3.1-8B
+  use_fast: False
+  load_in_4bit: True
+  system_prompt: |
+      Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
+      This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
+      The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
+      
+
+steps:
+  prepare_data:
+    parameters:
+      dataset_name: gem/viggo
+
+  finetune:
+    parameters:
+      max_steps: 300
+      eval_steps: 30
+      bf16: True
+
+  promote:
+    parameters:
+      metric: rouge2
+      target_stage: staging
diff --git a/llm-lora-finetuning/configs/llama3-1_finetune_remote.yaml b/llm-lora-finetuning/configs/llama3-1_finetune_remote.yaml
@@ -0,0 +1,84 @@
+# Apache Software License 2.0
+# 
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+
+model:
+  name: llm-peft-llama-3-1
+  description: "Fine-tune `llama-3.1`."
+  tags:
+    - llm
+    - peft
+    - llama-3.1
+  version: 300_steps
+
+settings:
+  docker:
+    parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime
+    requirements: requirements.txt
+    python_package_installer: uv
+    python_package_installer_args:
+      system: null
+    apt_packages: 
+      - git
+    environment:
+      PJRT_DEVICE: CUDA
+      USE_TORCH_XLA: "false"
+      MKL_SERVICE_FORCE_INTEL: "1"
+
+parameters:
+  base_model_id: meta-llama/Meta-Llama-3.1-8B
+  use_fast: False
+  load_in_4bit: True
+  system_prompt: |
+      Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
+      This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
+      The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
+      
+
+steps:
+  prepare_data:
+    parameters:
+      dataset_name: gem/viggo
+
+  finetune:
+    step_operator: gcp_a100
+    retry:
+      max_retries: 3
+      delay: 10
+      backoff: 2
+    parameters:
+      max_steps: 300
+      eval_steps: 30
+      bf16: True
+
+  evaluate_finetuned:
+    step_operator: gcp_a100
+    retry:
+      max_retries: 3
+      delay: 10
+      backoff: 2
+
+  evaluate_base:
+    step_operator: gcp_a100
+    retry:
+      max_retries: 3
+      delay: 10
+      backoff: 2
+
+  promote:
+    parameters:
+      metric: rouge2
+      target_stage: staging
diff --git a/llm-lora-finetuning/requirements.txt b/llm-lora-finetuning/requirements.txt
@@ -1,5 +1,5 @@
 datasets>=2.19.1
-transformers>=4.42.2
+transformers>=4.43.1
 peft
 bitsandbytes>=0.41.3
 scipy
@@ -8,6 +8,7 @@ rouge_score
 nltk
 accelerate>=0.30.0
 urllib3<2
-zenml
+zenml>=0.62.0
 torch>=2.2.0
 sentencepiece
+huggingface_hub
diff --git a/llm-lora-finetuning/steps/evaluate_model.py b/llm-lora-finetuning/steps/evaluate_model.py
@@ -15,10 +15,12 @@
 # limitations under the License.
 #
 
+import os
 from pathlib import Path
 from typing import Optional
 
 import evaluate
+import huggingface_hub
 import torch
 from datasets import load_from_disk
 from utils.loaders import (
@@ -27,6 +29,7 @@
 )
 from utils.tokenizer import load_tokenizer, tokenize_for_eval
 from zenml import save_artifact, step
+from zenml.client import Client
 from zenml.logger import get_logger
 from zenml.utils.cuda_utils import cleanup_gpu_memory
 
@@ -56,6 +59,17 @@ def evaluate_model(
         load_in_8bit: Whether to load the model in 8bit mode.
     """
     cleanup_gpu_memory(force=True)
+
+    # authenticate with Hugging Face for gated repos
+    client = Client()
+
+    if not os.getenv("HF_TOKEN"):
+        try:
+            hf_token = client.get_secret("hf_token").secret_values['token']
+            huggingface_hub.login(token=hf_token)
+        except Exception as e:
+            logger.warning(f"Error authenticating with Hugging Face: {e}")
+
     logger.info("Evaluating model...")
 
     logger.info("Loading dataset...")

diff --git a/llm-lora-finetuning/steps/finetune.py b/llm-lora-finetuning/steps/finetune.py
@@ -15,12 +15,14 @@
 # limitations under the License.
 #
 
+import os
 from pathlib import Path
 
-import transformers
 from accelerate import Accelerator
 from datasets import load_from_disk
+import huggingface_hub
 from materializers.directory_materializer import DirectoryMaterializer
+import transformers
 from typing_extensions import Annotated
 from utils.callbacks import ZenMLCallback
 from utils.loaders import load_base_model
@@ -29,6 +31,7 @@
 from zenml.logger import get_logger
 from zenml.materializers import BuiltInMaterializer
 from zenml.utils.cuda_utils import cleanup_gpu_memory
+from zenml.client import Client
 
 logger = get_logger(__name__)
 
@@ -81,6 +84,16 @@ def finetune(
         The path to the finetuned model directory.
     """
     cleanup_gpu_memory(force=True)
+
+    # authenticate with Hugging Face for gated repos
+    client = Client()
+
+    if not os.getenv("HF_TOKEN"):
+        try:
+            hf_token = client.get_secret("hf_token").secret_values['token']
+            huggingface_hub.login(token=hf_token)
+        except Exception as e:
+            logger.warning(f"Error authenticating with Hugging Face: {e}")
 
     ft_model_dir = Path("model_dir")
     dataset_dir = Path(dataset_dir)
@@ -121,7 +134,7 @@ def finetune(
             output_dir=output_dir,
             warmup_steps=warmup_steps,
             per_device_train_batch_size=per_device_train_batch_size,
-            gradient_checkpointing=True,
+            gradient_checkpointing=False,
             gradient_checkpointing_kwargs={'use_reentrant':False} if use_accelerate else {},
             gradient_accumulation_steps=gradient_accumulation_steps,
             max_steps=max_steps,