Merge pull request #147 from stanford-crfm/jonathan/1222-weekly-assets

weekly update
stanford-crfm · Jan 9, 2024 · f994396 · f994396
2 parents adbc2da + 58bf027
commit f994396
Show file tree

Hide file tree

Showing 16 changed files with 246 additions and 44 deletions.
diff --git a/assets/01ai.yaml b/assets/01ai.yaml
@@ -2,18 +2,21 @@
 - type: model
   name: Yi
   organization: 01 AI
-  description: The Yi series models are large language models trained from scratch by developers at 01 AI.
+  description: The Yi series models are large language models trained from scratch
+    by developers at 01 AI.
   created_date: 2023-11-02
   url: https://github.com/01-ai/Yi
   model_card: https://huggingface.co/01-ai/Yi-34B
   modality: text; text
-  analysis: Evaluated on standard language benchmarks, common sense reasoning, and reading comprehension in comparison to SoTA LLMs. 
+  analysis: Evaluated on standard language benchmarks, common sense reasoning, and
+    reading comprehension in comparison to SoTA LLMs.
   size: 34B parameters (dense)
   dependencies: []
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown
-  quality_control: Model underwent supervised fine-tuning, leading to a greater diversity of responses.
+  quality_control: Model underwent supervised fine-tuning, leading to a greater
+    diversity of responses.
   access: open
   license: Apache 2.0
   intended_uses: ''

diff --git a/assets/ai2.yaml b/assets/ai2.yaml
@@ -119,17 +119,28 @@
 - type: dataset
   name: Tulu-V2-mix
   organization: AI2
-  description: Tulu-V2-mix is a dataset composed of many high-quality instruction datasets that results in stronger performance across a variety of reasoning and knowledge-probing tasks.
+  description: Tulu-V2-mix is a dataset composed of many high-quality instruction
+    datasets that results in stronger performance across a variety of reasoning
+    and knowledge-probing tasks.
   created_date: 2023-11-20
   url: https://arxiv.org/pdf/2311.10702.pdf
   datasheet: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture
   modality: text
   size:
     value: unknown
-    explanation: Magnitude of size is around 100M tokens, given the length distribution of dataset provided in model card.
+    explanation: Magnitude of size is around 100M tokens, given the length distribution
+      of dataset provided in model card.
   sample: []
   analysis: Models trained with dataset evaluated on downstream performance.
-  dependencies: [FLAN Collection, Open Assistant 1, ShareGPT, Alpaca dataset, Code Alpaca, LIMA, WizardLM, OpenOrca]
+  dependencies:
+    - FLAN Collection
+    - Open Assistant 1
+    - ShareGPT
+    - Alpaca dataset
+    - Code Alpaca
+    - LIMA
+    - WizardLM
+    - OpenOrca
   included: ''
   excluded: ''
   quality_control: ''
@@ -143,13 +154,13 @@
 - type: model
   name: Tulu 2
   organization: AI2
-  description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and fine-tuned on more state of the art language models.
+  description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset
+    and fine-tuned on more state of the art language models.
   created_date: 2023-11-20
   url: https://arxiv.org/pdf/2311.10702.pdf
   model_card: https://huggingface.co/allenai/tulu-2-70b
   modality: text; text
-  analysis: Evaluated on MT-Bench and AlpacaEval.
-    compared to other chatbots.
+  analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.
   size: 70B parameters (dense)
   dependencies: [LLaMA 2, Tulu-V2-mix]
   training_emissions: unknown
@@ -166,13 +177,13 @@
 - type: model
   name: Tulu 2 DPO
   organization: AI2
-  description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct Preference Optimization (DPO).
+  description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct
+    Preference Optimization (DPO).
   created_date: 2023-11-20
   url: https://arxiv.org/pdf/2311.10702.pdf
   model_card: https://huggingface.co/allenai/tulu-2-dpo-70b
   modality: text; text
-  analysis: Evaluated on MT-Bench and AlpacaEval.
-    compared to other chatbots.
+  analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.
   size: 70B parameters (dense)
   dependencies: [LLaMA 2, Tulu-V2-mix]
   training_emissions: unknown
@@ -189,13 +200,13 @@
 - type: model
   name: Code Tulu 2
   organization: AI2
-  description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained on a mix of publicly available, synthetic and human datasets.
+  description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained
+    on a mix of publicly available, synthetic and human datasets.
   created_date: 2023-11-20
   url: https://arxiv.org/pdf/2311.10702.pdf
   model_card: https://huggingface.co/allenai/codetulu-2-13b
   modality: text; code, text
-  analysis: Evaluated on MT-Bench and AlpacaEval.
-    compared to other chatbots.
+  analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.
   size: 13B parameters (dense)
   dependencies: [Code LLaMA, Tulu-V2-mix]
   training_emissions: unknown
@@ -208,4 +219,3 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
-
diff --git a/assets/amazon.yaml b/assets/amazon.yaml
@@ -41,7 +41,7 @@
   analysis: Evaluated against benchmarks that are specifically designed to assess
     the capabilities of LLMs in handling longer contexts.
   size: 40B parameters (dense)
-  dependencies: [Falcon]
+  dependencies: [Falcon-40B]
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown

diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml
@@ -549,3 +549,24 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: none
+- type: application
+  name: Claude for Sheets
+  organization: Anthropic
+  description: Claude for Sheets is a Google Sheets add-on that allows the usage
+    of Claude directly in Google Sheets.
+  created_date: 2023-12-21
+  url: https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257
+  dependencies: [Anthropic API]
+  adaptation: ''
+  output_space: AI-generated text from prompt
+  quality_control: ''
+  access: open
+  license: unknown
+  terms_of_service: https://claude.ai/legal
+  intended_uses: as an integrated AI assistant in Google Sheets
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: Reviews on https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257
+  monthly_active_users: unknown
+  user_distribution: unknown
+  failures: unknown
diff --git a/assets/cresta.yaml b/assets/cresta.yaml
@@ -0,0 +1,25 @@
+---
+- type: model
+  name: Ocean-1
+  organization: Cresta
+  description: Ocean-1 is the culmination of Cresta's experience in deploying generative
+    AI systems for large enterprises and signifies their latest milestone in advancing
+    the cutting edge AI technology for customer facing conversations.
+  created_date: 2023-06-20
+  url: https://cresta.com/blog/introducing-ocean-1-worlds-first-contact-center-foundation-model/
+  model_card: none
+  modality: text; text
+  analysis: Outperforms GPT-4 in common sense and reasoning tasks on the basis of
+    both efficiency and accuracy.
+  size: 7B parameters (dense)
+  dependencies: [GPT-4, Claude, Falcon-40B]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: Acting as a contact center chatbot agent.
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: none
diff --git a/assets/deci.yaml b/assets/deci.yaml
@@ -0,0 +1,25 @@
+---
+- type: model
+  name: DeciLM
+  organization: Deci
+  description: DeciLM is a LLM that on release ranks as the fastest and most accurate
+    model of its size.
+  created_date: 2023-12-12
+  url: https://deci.ai/blog/introducing-decilm-7b-the-fastest-and-most-accurate-7b-large-language-model-to-date
+  model_card: https://deci.ai/model-zoo/decilm-7b/
+  modality: text; text
+  analysis: Evaluated on the OpenLLM benchmarks and, on release, outperforms all
+    other 7B models on the OpenLLM Leaderboard.
+  size: 7B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: NVIDIA A10 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: This model is intended for commercial and research use in English
+    and can be fine-tuned for use in other languages.
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: none
diff --git a/assets/deepseek.yaml b/assets/deepseek.yaml
@@ -2,18 +2,21 @@
 - type: model
   name: Deepseek
   organization: Deepseek AI
-  description: Deepseek is a 67B parameter model with Grouped-Query Attention trained on 2 trillion tokens from scratch.
+  description: Deepseek is a 67B parameter model with Grouped-Query Attention trained
+    on 2 trillion tokens from scratch.
   created_date: 2023-11-29
   url: https://github.com/deepseek-ai/DeepSeek-LLM
   model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base
   modality: text; text
-  analysis: Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese.
+  analysis: Deepseek and baseline models (for comparison) evaluated on a series
+    of representative benchmarks, both in English and Chinese.
   size: 67B parameters (dense)
   dependencies: []
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown
-  quality_control: Training dataset comprised of diverse data composition and pruned and deduplicated.
+  quality_control: Training dataset comprised of diverse data composition and pruned
+    and deduplicated.
   access: open
   license: MIT
   intended_uses: ''

diff --git a/assets/google.yaml b/assets/google.yaml
@@ -1678,6 +1678,32 @@
     within specific downstream applications without prior assessment
   monitoring: Google internal monitoring
   feedback: Specific queries provided by annotators
+- type: model
+  name: MedLM
+  organization: Google
+  description: MedLM is a collection of foundation models tuned to follow natural
+    language instructions for tasks in medicine, such as question answering and
+    creating draft summaries.
+  created_date: 2023-12-13
+  url: https://cloud.google.com/vertex-ai/docs/generative-ai/medlm/overview
+  model_card: https://cloud.google.com/static/vertex-ai/docs/generative-ai/medlm/MedLM-model-card.pdf
+  modality: text; text
+  analysis: Assessed on medical benchmarks of professional medical exams, medical
+    research, and consumer queries.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: to be used for question answering and creating draft summaries
+    from existing documentation, to be reviewed, edited, and approved by the user
+    before use.
+  prohibited_uses: ''
+  monitoring: Google internal monitoring
+  feedback: none
 - type: model
   name: Gemini
   organization: Google

diff --git a/assets/inflection.yaml b/assets/inflection.yaml
@@ -49,12 +49,15 @@
 - type: model
   name: Inflection-2
   organization: Inflection AI
-  description: Inflection-2 is the best model in the world for its compute class and the second most capable LLM in the world, according to benchmark evaluation, as of its release.
+  description: Inflection-2 is the best model in the world for its compute class
+    and the second most capable LLM in the world, according to benchmark evaluation,
+    as of its release.
   created_date: 2023-11-22
   url: https://inflection.ai/inflection-2
   model_card: none
   modality: text; text
-  analysis: Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4.
+  analysis: Evaluated against state of the art models on benchmarks, and found to
+    be most performant model outside of GPT-4.
   size: unknown
   dependencies: []
   training_emissions: unknown
@@ -67,4 +70,3 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: none
-
diff --git a/assets/llm360.yaml b/assets/llm360.yaml
@@ -0,0 +1,60 @@
+---
+- type: model
+  name: Amber
+  organization: LLM360
+  description: Amber is the first model in the LLM360 family, an initiative for
+    comprehensive and fully open-sourced LLMs, where all training details, model
+    checkpoints, intermediate results, and additional analyses are made available
+    to the community.
+  created_date: 2023-12-12
+  url: https://www.llm360.ai/
+  model_card: https://huggingface.co/LLM360/Amber
+  modality: text; text
+  analysis: Evaluated on several benchmark LLM tasks
+  size: 7B parameters (dense)
+  dependencies:
+    - Arxiv
+    - Books
+    - C4
+    - RefinedWeb
+    - StarCoder
+    - StackExchange
+    - Wikipedia
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 56 DGX A100 nodes, each equipped with 4 80GB A100 GPUs
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: to support open and collaborative AI research by making the full
+    LLM training process transparent.
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/LLM360/Amber/discussions
+
+- type: model
+  name: CrystalCoder
+  organization: LLM360
+  description: CrystalCoder is a language model with a balance of code and text
+    data that follows the initiative under LLM360 of its training process being
+    fully transparent.
+  created_date: 2023-12-12
+  url: https://www.llm360.ai/
+  model_card: https://huggingface.co/LLM360/CrystalCoder
+  modality: text; code, text
+  analysis: Evaluated on English and coding tasks and benchmarks, and outperforms
+    LLaMA 2 in some.
+  size: 7B parameters (dense)
+  dependencies: [SlimPajama dataset, StarCoder]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: Trained on the Cerebras Condor Galaxy 1 (CG-1), a 4 exaFLOPS,
+    54 million core, 64-node cloud AI supercomputer.
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: to support open and collaborative AI research by making the full
+    LLM training process transparent.
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/LLM360/CrystalCoder/discussions
diff --git a/assets/meta.yaml b/assets/meta.yaml
@@ -698,7 +698,8 @@
 - type: model
   name: Code LLaMA
   organization: Meta
-  description: Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters.
+  description: Code Llama is a collection of pretrained and fine-tuned generative
+    text models ranging in scale from 7 billion to 34 billion parameters.
   created_date: 2023-08-24
   url: https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/
   model_card: https://huggingface.co/codellama/CodeLlama-34b-hf
@@ -712,8 +713,12 @@
   quality_control: ''
   access: open
   license: LLaMA 2
-  intended_uses: Code Llama and its variants is intended for commercial and research use in English and relevant programming languages.
-  prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants.
+  intended_uses: Code Llama and its variants is intended for commercial and research
+    use in English and relevant programming languages.
+  prohibited_uses: Use in any manner that violates applicable laws or regulations
+    (including trade compliance laws). Use in languages other than English. Use
+    in any other way that is prohibited by the Acceptable Use Policy and Licensing
+    Agreement for Code Llama and its variants.
   monitoring: ''
   feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
 
@@ -788,4 +793,3 @@
   prohibited_uses: ''
   monitoring: none
   feedback: none
-
diff --git a/assets/qwen.yaml b/assets/qwen.yaml
@@ -2,12 +2,16 @@
 - type: model
   name: Qwen
   organization: Qwen AI
-  description: Qwen is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+  description: Qwen is a Transformer-based large language model, which is pretrained
+    on a large volume of data, including web texts, books, codes, etc.
   created_date: 2023-11-26
   url: https://arxiv.org/pdf/2309.16609.pdf
   model_card: https://huggingface.co/Qwen/Qwen-72B
   modality: text; text
-  analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, which are currently popular benchmarks, to test the model’s Chinese and English knowledge capabilities, translation, mathematical reasoning, coding and other capabilities.
+  analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU,
+    which are currently popular benchmarks, to test the model’s Chinese and English
+    knowledge capabilities, translation, mathematical reasoning, coding and other
+    capabilities.
   size: 72B parameters (dense)
   dependencies: []
   training_emissions: unknown