Skip to content

Commit

Permalink
Add support for Granite 3.1 model family (IBM) (#3261)
Browse files Browse the repository at this point in the history
Co-authored-by: lucas-s-p <[email protected]>
  • Loading branch information
RonalddMatias and lucas-s-p authored Jan 13, 2025
1 parent 12ab30b commit b9ad574
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 1 deletion.
80 changes: 80 additions & 0 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2948,3 +2948,83 @@ model_deployments:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: maritaca-ai/sabia-7b

# Granite-3.1-8b-base
- name: huggingface/granite-3.1-8b-base
model_name: ibm-granite/granite-3.1-8b-base
tokenizer_name: ibm-granite/granite-3.1-8b-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base

# Granite-3.1-8b-instruct
- name: huggingface/granite-3.1-8b-instruct
model_name: ibm-granite/granite-3.1-8b-instruct
tokenizer_name: ibm-granite/granite-3.1-8b-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct

# Granite-3.1-2b-instruct
- name: huggingface/granite-3.1-2b-instruct
model_name: ibm-granite/granite-3.1-2b-instruct
tokenizer_name: ibm-granite/granite-3.1-2b-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct

# Granite-3.1-2b-base
- name: huggingface/granite-3.1-2b-base
model_name: ibm-granite/granite-3.1-2b-base
tokenizer_name: ibm-granite/granite-3.1-2b-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base

# Granite-3.1-3b-a800m-instruct
- name: huggingface/granite-3.1-3b-a800m-instruct
model_name: ibm-granite/granite-3.1-3b-a800m-instruct
tokenizer_name: ibm-granite/granite-3.1-3b-a800m-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct

# Granite-3.1-3b-a800m-base
- name: huggingface/granite-3.1-3b-a800m-base
model_name: ibm-granite/granite-3.1-3b-a800m-base
tokenizer_name: ibm-granite/granite-3.1-3b-a800m-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base

# Granite-3.1-1b-a400m-instruct
- name: huggingface/granite-3.1-1b-a400m-instruct
model_name: ibm-granite/granite-3.1-1b-a400m-instruct
tokenizer_name: ibm-granite/granite-3.1-1b-a400m-instruct
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct

# Granite-3.1-1b-a400m-base
- name: huggingface/granite-3.1-1b-a400m-base
model_name: ibm-granite/granite-3.1-1b-a400m-base
tokenizer_name: ibm-granite/granite-3.1-1b-a400m-base
max_sequence_length: 128000
client_spec:
class_name: "helm.clients.huggingface_client.HuggingFaceClient"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
79 changes: 79 additions & 0 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3584,3 +3584,82 @@ models:
release_date: 2023-11-08
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-8b-base
- name: ibm-granite/granite-3.1-8b-base
display_name: Granite 3.1 - 8B - Base
description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 8170000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]

# Granite-3.1-8b-instruct
- name: ibm-granite/granite-3.1-8b-instruct
display_name: Granite 3.1 - 8B - Instruct
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 8170000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-2b-instruct
- name: ibm-granite/granite-3.1-2b-instruct
display_name: Granite 3.1 - 2B - Instruct
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 2530000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-2b-base
- name: ibm-granite/granite-3.1-2b-base
display_name: Granite 3.1 - 2B - Base
description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 2530000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]

# Granite-3.1-3b-a800m-instruct
- name: ibm-granite/granite-3.1-3b-a800m-instruct
display_name: Granite 3.1 - 3B - A800M - Instruct
description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 3300000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-3b-a800m-base
- name: ibm-granite/granite-3.1-3b-a800m-base
display_name: Granite 3.1 - 3B - A800M - Base
description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 3300000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]

# Granite-3.1-1b-a400m-instruct
- name: ibm-granite/granite-3.1-1b-a400m-instruct
display_name: Granite 3.1 - 1B - A400M - Instruct
description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 1330000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

# Granite-3.1-1b-a400m-base
- name: ibm-granite/granite-3.1-1b-a400m-base
display_name: Granite 3.1 - 1B - A400M - Base
description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
creator_organization_name: IBM-GRANITE
access: open
num_parameters: 1330000000
release_date: 2024-12-18
tags: [TEXT_MODEL_TAG]
74 changes: 73 additions & 1 deletion src/helm/config/tokenizer_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -763,4 +763,76 @@ tokenizer_configs:
args:
pretrained_model_name_or_path: maritaca-ai/sabia-7b
end_of_text_token: "</s>"
prefix_token: "<s>"
prefix_token: "<s>"

# Granite-3.1-8b-base
- name: ibm-granite/granite-3.1-8b-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-base
prefix_token: ""
end_of_text_token: "<|endoftext|>"

# Granite-3.1-8b-instruct
- name: ibm-granite/granite-3.1-8b-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-8b-instruct
prefix_token: ""
end_of_text_token: "<|endoftext|>"

# Granite-3.1-2b-instruct
- name: ibm-granite/granite-3.1-2b-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-instruct
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-2b-base
- name: ibm-granite/granite-3.1-2b-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-2b-base
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-3b-a800m-instruct
- name: ibm-granite/granite-3.1-3b-a800m-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-instruct
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-3b-a800m-base
- name: ibm-granite/granite-3.1-3b-a800m-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-3b-a800m-base
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-1b-a400m-instruct
- name: ibm-granite/granite-3.1-1b-a400m-instruct
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-instruct
prefix_token: ""
end_of_text_token: ""

# Granite-3.1-1b-a400m-base
- name: ibm-granite/granite-3.1-1b-a400m-base
tokenizer_spec:
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
args:
pretrained_model_name_or_path: ibm-granite/granite-3.1-1b-a400m-base
prefix_token: ""
end_of_text_token: ""

0 comments on commit b9ad574

Please sign in to comment.