From b9ad5740e7c372b0d6f15a6ddcd35b3628c8f2ec Mon Sep 17 00:00:00 2001 From: Ronaldd Matias <101843459+RonalddMatias@users.noreply.github.com> Date: Mon, 13 Jan 2025 18:09:00 -0300 Subject: [PATCH] Add support for Granite 3.1 model family (IBM) (#3261) Co-authored-by: lucas-s-p --- src/helm/config/model_deployments.yaml | 80 ++++++++++++++++++++++++++ src/helm/config/model_metadata.yaml | 79 +++++++++++++++++++++++++ src/helm/config/tokenizer_configs.yaml | 74 +++++++++++++++++++++++- 3 files changed, 232 insertions(+), 1 deletion(-) diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index 01810cd64c5..a2c2c854b9b 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -2948,3 +2948,83 @@ model_deployments: class_name: "helm.clients.huggingface_client.HuggingFaceClient" args: pretrained_model_name_or_path: maritaca-ai/sabia-7b + +# Granite-3.1-8b-base + - name: huggingface/granite-3.1-8b-base + model_name: ibm-granite/granite-3.1-8b-base + tokenizer_name: ibm-granite/granite-3.1-8b-base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base + +# Granite-3.1-8b-instruct + - name: huggingface/granite-3.1-8b-instruct + model_name: ibm-granite/granite-3.1-8b-instruct + tokenizer_name: ibm-granite/granite-3.1-8b-instruct + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct + +# Granite-3.1-2b-instruct + - name: huggingface/granite-3.1-2b-instruct + model_name: ibm-granite/granite-3.1-2b-instruct + tokenizer_name: ibm-granite/granite-3.1-2b-instruct + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct + +# Granite-3.1-2b-base + - name: huggingface/granite-3.1-2b-base + model_name: ibm-granite/granite-3.1-2b-base + tokenizer_name: ibm-granite/granite-3.1-2b-base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base + +# Granite-3.1-3b-a800m-instruct + - name: huggingface/granite-3.1-3b-a800m-instruct + model_name: ibm-granite/granite-3.1-3b-a800m-instruct + tokenizer_name: ibm-granite/granite-3.1-3b-a800m-instruct + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct + +# Granite-3.1-3b-a800m-base + - name: huggingface/granite-3.1-3b-a800m-base + model_name: ibm-granite/granite-3.1-3b-a800m-base + tokenizer_name: ibm-granite/granite-3.1-3b-a800m-base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base + +# Granite-3.1-1b-a400m-instruct + - name: huggingface/granite-3.1-1b-a400m-instruct + model_name: ibm-granite/granite-3.1-1b-a400m-instruct + tokenizer_name: ibm-granite/granite-3.1-1b-a400m-instruct + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct + +# Granite-3.1-1b-a400m-base + - name: huggingface/granite-3.1-1b-a400m-base + model_name: ibm-granite/granite-3.1-1b-a400m-base + tokenizer_name: ibm-granite/granite-3.1-1b-a400m-base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.huggingface_client.HuggingFaceClient" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base \ No newline at end of file diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index 40e0186dc2c..0a4995a6d3b 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -3584,3 +3584,82 @@ models: release_date: 2023-11-08 tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] +# Granite-3.1-8b-base + - name: ibm-granite/granite-3.1-8b-base + display_name: Granite 3.1 - 8B - Base + description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 8170000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG] + +# Granite-3.1-8b-instruct + - name: ibm-granite/granite-3.1-8b-instruct + display_name: Granite 3.1 - 8B - Instruct + description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 8170000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + +# Granite-3.1-2b-instruct + - name: ibm-granite/granite-3.1-2b-instruct + display_name: Granite 3.1 - 2B - Instruct + description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 2530000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + +# Granite-3.1-2b-base + - name: ibm-granite/granite-3.1-2b-base + display_name: Granite 3.1 - 2B - Base + description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 2530000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG] + +# Granite-3.1-3b-a800m-instruct + - name: ibm-granite/granite-3.1-3b-a800m-instruct + display_name: Granite 3.1 - 3B - A800M - Instruct + description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 3300000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + +# Granite-3.1-3b-a800m-base + - name: ibm-granite/granite-3.1-3b-a800m-base + display_name: Granite 3.1 - 3B - A800M - Base + description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 3300000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG] + +# Granite-3.1-1b-a400m-instruct + - name: ibm-granite/granite-3.1-1b-a400m-instruct + display_name: Granite 3.1 - 1B - A400M - Instruct + description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 1330000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + +# Granite-3.1-1b-a400m-base + - name: ibm-granite/granite-3.1-1b-a400m-base + display_name: Granite 3.1 - 1B - A400M - Base + description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. + creator_organization_name: IBM-GRANITE + access: open + num_parameters: 1330000000 + release_date: 2024-12-18 + tags: [TEXT_MODEL_TAG] \ No newline at end of file diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml index 720ef30e9bc..841db8c0aaa 100644 --- a/src/helm/config/tokenizer_configs.yaml +++ b/src/helm/config/tokenizer_configs.yaml @@ -763,4 +763,76 @@ tokenizer_configs: args: pretrained_model_name_or_path: maritaca-ai/sabia-7b end_of_text_token: "" - prefix_token: "" \ No newline at end of file + prefix_token: "" + +# Granite-3.1-8b-base + - name: ibm-granite/granite-3.1-8b-base + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base + prefix_token: "" + end_of_text_token: "<|endoftext|>" + +# Granite-3.1-8b-instruct + - name: ibm-granite/granite-3.1-8b-instruct + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct + prefix_token: "" + end_of_text_token: "<|endoftext|>" + +# Granite-3.1-2b-instruct + - name: ibm-granite/granite-3.1-2b-instruct + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct + prefix_token: "" + end_of_text_token: "" + +# Granite-3.1-2b-base + - name: ibm-granite/granite-3.1-2b-base + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base + prefix_token: "" + end_of_text_token: "" + +# Granite-3.1-3b-a800m-instruct + - name: ibm-granite/granite-3.1-3b-a800m-instruct + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct + prefix_token: "" + end_of_text_token: "" + +# Granite-3.1-3b-a800m-base + - name: ibm-granite/granite-3.1-3b-a800m-base + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base + prefix_token: "" + end_of_text_token: "" + +# Granite-3.1-1b-a400m-instruct + - name: ibm-granite/granite-3.1-1b-a400m-instruct + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct + prefix_token: "" + end_of_text_token: "" + +# Granite-3.1-1b-a400m-base + - name: ibm-granite/granite-3.1-1b-a400m-base + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base + prefix_token: "" + end_of_text_token: "" \ No newline at end of file