diff --git a/llm-lora-finetuning/README.md b/llm-lora-finetuning/README.md index 55fed08d..d7711c72 100644 --- a/llm-lora-finetuning/README.md +++ b/llm-lora-finetuning/README.md @@ -55,6 +55,13 @@ When running the pipeline like this, the trained model will be stored in the Zen
+> [!TIP] +> To finetune the Llama 3.1 base model, please use the alternative configuration +> files provided in the `configs` folder. +> +> For a remote finetune you can use [`llama3-1_finetune_remote.yaml`](configs/llama3-1_finetune_remote.yaml) and for a +> local finetune you can use [`llama3-1_finetune_local.yaml`](configs/llama3-1_finetune_local.yaml). + ### ⚡ Accelerate your finetuning Do you want to benefit from multi-GPU-training with Distributed Data Parallelism (DDP)? Then you can use other configuration files prepared for this purpose. diff --git a/llm-lora-finetuning/configs/llama3-1_finetune_local.yaml b/llm-lora-finetuning/configs/llama3-1_finetune_local.yaml new file mode 100644 index 00000000..7cbff24f --- /dev/null +++ b/llm-lora-finetuning/configs/llama3-1_finetune_local.yaml @@ -0,0 +1,66 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +model: + name: llm-peft-llama-3-1 + description: "Fine-tune `llama-3.1`." + tags: + - llm + - peft + - llama-3.1 + version: 300_steps + +settings: + docker: + parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime + requirements: requirements.txt + python_package_installer: uv + python_package_installer_args: + system: null + apt_packages: + - git + environment: + PJRT_DEVICE: CUDA + USE_TORCH_XLA: "false" + MKL_SERVICE_FORCE_INTEL: "1" + +parameters: + # uses a 4-bit quantised version of llama-3.1 for local experimentation + base_model_id: meta-llama/Meta-Llama-3.1-8B + use_fast: False + load_in_4bit: True + system_prompt: | + Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values. + This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute']. + The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] + + +steps: + prepare_data: + parameters: + dataset_name: gem/viggo + + finetune: + parameters: + max_steps: 300 + eval_steps: 30 + bf16: True + + promote: + parameters: + metric: rouge2 + target_stage: staging diff --git a/llm-lora-finetuning/configs/llama3-1_finetune_remote.yaml b/llm-lora-finetuning/configs/llama3-1_finetune_remote.yaml new file mode 100644 index 00000000..ec93678e --- /dev/null +++ b/llm-lora-finetuning/configs/llama3-1_finetune_remote.yaml @@ -0,0 +1,84 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +model: + name: llm-peft-llama-3-1 + description: "Fine-tune `llama-3.1`." + tags: + - llm + - peft + - llama-3.1 + version: 300_steps + +settings: + docker: + parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime + requirements: requirements.txt + python_package_installer: uv + python_package_installer_args: + system: null + apt_packages: + - git + environment: + PJRT_DEVICE: CUDA + USE_TORCH_XLA: "false" + MKL_SERVICE_FORCE_INTEL: "1" + +parameters: + base_model_id: meta-llama/Meta-Llama-3.1-8B + use_fast: False + load_in_4bit: True + system_prompt: | + Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values. + This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute']. + The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] + + +steps: + prepare_data: + parameters: + dataset_name: gem/viggo + + finetune: + step_operator: gcp_a100 + retry: + max_retries: 3 + delay: 10 + backoff: 2 + parameters: + max_steps: 300 + eval_steps: 30 + bf16: True + + evaluate_finetuned: + step_operator: gcp_a100 + retry: + max_retries: 3 + delay: 10 + backoff: 2 + + evaluate_base: + step_operator: gcp_a100 + retry: + max_retries: 3 + delay: 10 + backoff: 2 + + promote: + parameters: + metric: rouge2 + target_stage: staging diff --git a/llm-lora-finetuning/requirements.txt b/llm-lora-finetuning/requirements.txt index d42e3b3f..cf8f78fd 100644 --- a/llm-lora-finetuning/requirements.txt +++ b/llm-lora-finetuning/requirements.txt @@ -1,5 +1,5 @@ datasets>=2.19.1 -transformers>=4.42.2 +transformers>=4.43.1 peft bitsandbytes>=0.41.3 scipy @@ -8,6 +8,7 @@ rouge_score nltk accelerate>=0.30.0 urllib3<2 -zenml +zenml>=0.62.0 torch>=2.2.0 sentencepiece +huggingface_hub diff --git a/llm-lora-finetuning/steps/evaluate_model.py b/llm-lora-finetuning/steps/evaluate_model.py index 4dfba094..f36642c1 100644 --- a/llm-lora-finetuning/steps/evaluate_model.py +++ b/llm-lora-finetuning/steps/evaluate_model.py @@ -15,10 +15,12 @@ # limitations under the License. # +import os from pathlib import Path from typing import Optional import evaluate +import huggingface_hub import torch from datasets import load_from_disk from utils.loaders import ( @@ -27,6 +29,7 @@ ) from utils.tokenizer import load_tokenizer, tokenize_for_eval from zenml import save_artifact, step +from zenml.client import Client from zenml.logger import get_logger from zenml.utils.cuda_utils import cleanup_gpu_memory @@ -56,6 +59,17 @@ def evaluate_model( load_in_8bit: Whether to load the model in 8bit mode. """ cleanup_gpu_memory(force=True) + + # authenticate with Hugging Face for gated repos + client = Client() + + if not os.getenv("HF_TOKEN"): + try: + hf_token = client.get_secret("hf_token").secret_values['token'] + huggingface_hub.login(token=hf_token) + except Exception as e: + logger.warning(f"Error authenticating with Hugging Face: {e}") + logger.info("Evaluating model...") logger.info("Loading dataset...") diff --git a/llm-lora-finetuning/steps/finetune.py b/llm-lora-finetuning/steps/finetune.py index ce14a490..ece4b1b0 100644 --- a/llm-lora-finetuning/steps/finetune.py +++ b/llm-lora-finetuning/steps/finetune.py @@ -15,12 +15,14 @@ # limitations under the License. # +import os from pathlib import Path -import transformers from accelerate import Accelerator from datasets import load_from_disk +import huggingface_hub from materializers.directory_materializer import DirectoryMaterializer +import transformers from typing_extensions import Annotated from utils.callbacks import ZenMLCallback from utils.loaders import load_base_model @@ -29,6 +31,7 @@ from zenml.logger import get_logger from zenml.materializers import BuiltInMaterializer from zenml.utils.cuda_utils import cleanup_gpu_memory +from zenml.client import Client logger = get_logger(__name__) @@ -81,6 +84,16 @@ def finetune( The path to the finetuned model directory. """ cleanup_gpu_memory(force=True) + + # authenticate with Hugging Face for gated repos + client = Client() + + if not os.getenv("HF_TOKEN"): + try: + hf_token = client.get_secret("hf_token").secret_values['token'] + huggingface_hub.login(token=hf_token) + except Exception as e: + logger.warning(f"Error authenticating with Hugging Face: {e}") ft_model_dir = Path("model_dir") dataset_dir = Path(dataset_dir) @@ -121,7 +134,7 @@ def finetune( output_dir=output_dir, warmup_steps=warmup_steps, per_device_train_batch_size=per_device_train_batch_size, - gradient_checkpointing=True, + gradient_checkpointing=False, gradient_checkpointing_kwargs={'use_reentrant':False} if use_accelerate else {}, gradient_accumulation_steps=gradient_accumulation_steps, max_steps=max_steps,