stanford-crfm · rishibommasani · Jan 9, 2024 · Nov 25, 2023 · Nov 30, 2023 · Dec 4, 2023
diff --git a/assets/01ai.yaml b/assets/01ai.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Yi
+  organization: 01 AI
+  description: The Yi series models are large language models trained from scratch by developers at 01 AI.
+  created_date: 2023-11-02
+  url: https://github.com/01-ai/Yi
+  model_card: https://huggingface.co/01-ai/Yi-34B
+  modality: text; text
+  analysis: Evaluated on standard language benchmarks, common sense reasoning, and reading comprehension in comparison to SoTA LLMs. 
+  size: 34B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: Model underwent supervised fine-tuning, leading to a greater diversity of responses.
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: https://huggingface.co/01-ai/Yi-34B/discussions
diff --git a/assets/ai2.yaml b/assets/ai2.yaml
@@ -115,3 +115,97 @@
   monitoring: Quality filtration, deduplication, and risk mitigation via logistic
     qualifiers and regular expressions used.
   feedback: ''
+
+- type: dataset
+  name: Tulu-V2-mix
+  organization: AI2
+  description: Tulu-V2-mix is a dataset composed of many high-quality instruction datasets that results in stronger performance across a variety of reasoning and knowledge-probing tasks.
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  datasheet: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture
+  modality: text
+  size:
+    value: unknown
+    explanation: Magnitude of size is around 100M tokens, given the length distribution of dataset provided in model card.
+  sample: []
+  analysis: Models trained with dataset evaluated on downstream performance.
+  dependencies: [FLAN Collection, Open Assistant 1, ShareGPT, Alpaca dataset, Code Alpaca, LIMA, WizardLM, OpenOrca]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: ODC-BY
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture/discussions
+
+- type: model
+  name: Tulu 2
+  organization: AI2
+  description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and fine-tuned on more state of the art language models.
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  model_card: https://huggingface.co/allenai/tulu-2-70b
+  modality: text; text
+  analysis: Evaluated on MT-Bench and AlpacaEval.
+    compared to other chatbots.
+  size: 70B parameters (dense)
+  dependencies: [LLaMA 2, Tulu-V2-mix]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: AI2 ImpACT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/tulu-2-70b/discussions
+
+- type: model
+  name: Tulu 2 DPO
+  organization: AI2
+  description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct Preference Optimization (DPO).
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  model_card: https://huggingface.co/allenai/tulu-2-dpo-70b
+  modality: text; text
+  analysis: Evaluated on MT-Bench and AlpacaEval.
+    compared to other chatbots.
+  size: 70B parameters (dense)
+  dependencies: [LLaMA 2, Tulu-V2-mix]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: AI2 ImpACT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/tulu-2-dpo-70b/discussions
+
+- type: model
+  name: Code Tulu 2
+  organization: AI2
+  description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained on a mix of publicly available, synthetic and human datasets.
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  model_card: https://huggingface.co/allenai/codetulu-2-13b
+  modality: text; code, text
+  analysis: Evaluated on MT-Bench and AlpacaEval.
+    compared to other chatbots.
+  size: 13B parameters (dense)
+  dependencies: [Code LLaMA, Tulu-V2-mix]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: AI2 ImpACT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
+
diff --git a/assets/deepseek.yaml b/assets/deepseek.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Deepseek
+  organization: Deepseek AI
+  description: Deepseek is a 67B parameter model with Grouped-Query Attention trained on 2 trillion tokens from scratch.
+  created_date: 2023-11-29
+  url: https://github.com/deepseek-ai/DeepSeek-LLM
+  model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base
+  modality: text; text
+  analysis: Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese.
+  size: 67B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: Training dataset comprised of diverse data composition and pruned and deduplicated.
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base/discussions
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
@@ -45,3 +45,28 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+
+- type: model
+  name: Inflection-2
+  organization: Inflection AI
+  description: Inflection-2 is the best model in the world for its compute class and the second most capable LLM in the world, according to benchmark evaluation, as of its release.
+  created_date: 2023-11-22
+  url: https://inflection.ai/inflection-2
+  model_card: none
+  modality: text; text
+  analysis: Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4.
+  size:
+    value: unknown
+    explanation: Likely larger than Inflection-1, which was stated to be around the size of ChatGPT (175B parameters).
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 5000 NVIDIA H100 GPUs
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+
diff --git a/assets/meta.yaml b/assets/meta.yaml
@@ -694,3 +694,26 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: ''
+
+- type: model
+  name: Code LLaMA
+  organization: Meta
+  description: Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters.
+  created_date: 2023-08-24
+  url: https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/
+  model_card: https://huggingface.co/codellama/CodeLlama-34b-hf
+  modality: text; code, text
+  analysis: Evaluated on several code benchmarks like HumanEval and MBPP.
+  size: 34B parameters (dense)
+  dependencies: [LLaMA 2]
+  training_emissions: 65.3 tCO2eq
+  training_time: 400K GPU hours
+  training_hardware: A100-80GB GPUs
+  quality_control: ''
+  access: open
+  license: LLaMA 2
+  intended_uses: Code Llama and its variants is intended for commercial and research use in English and relevant programming languages.
+  prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants.
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
+
diff --git a/assets/qwen.yaml b/assets/qwen.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Qwen
+  organization: Qwen AI
+  description: Qwen is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+  created_date: 2023-11-26
+  url: https://arxiv.org/pdf/2309.16609.pdf
+  model_card: https://huggingface.co/Qwen/Qwen-72B
+  modality: text; text
+  analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, which are currently popular benchmarks, to test the model’s Chinese and English knowledge capabilities, translation, mathematical reasoning, coding and other capabilities.
+  size: 72B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: none
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: https://huggingface.co/Qwen/Qwen-72B/discussions
diff --git a/assets/stability.yaml b/assets/stability.yaml
@@ -94,3 +94,48 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+
+- type: model
+  name: Stable Video Diffusion
+  organization: Stability AI
+  description: Stable Video Diffusion is a latent diffusion model trained to generate short video clips from an image conditioning.
+  created_date: 2023-11-21
+  url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf
+  model_card: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt
+  modality: text; video
+  analysis: Evaluated via a user study comparing preferences between Stable Video Diffusion and competing text-to-video models.
+  size: unknown
+  dependencies: [Large Video Dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: Intended for research purposes only.
+  prohibited_uses: Using the model to generate representations of real-world people or events. 
+  monitoring: ''
+  feedback: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/discussions
+
+- type: dataset
+  name: Large Video Dataset
+  organization: Stability AI
+  description: Large Video Dataset is the dataset that trained Stable Video Diffusion, consisting of over 212 years of content.
+  created_date: 2023-11-21
+  url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf
+  datasheet: ''
+  modality: video with caption
+  size: 580M annotated video clip pairs
+  sample: []
+  analysis: Large Video Dataset compared to publicly available research datasets on general statistics before and after filtering.
+  dependencies: [WebVid-10M, CoCa, V-BLIP]
+  included: ''
+  excluded: ''
+  quality_control: Dataset annotated with dense optical flow, and low optical flow videos are removed.
+  access:  closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+
diff --git a/assets/xai.yaml b/assets/xai.yaml
@@ -0,0 +1,24 @@
+---
+- type: model
+  name: Grok-1
+  organization: xAI
+  description: Grok is a text chatbot modeled after the Hitchhiker’s Guide to the Galaxy, so intended to answer almost anything and even suggest what questions to ask.
+  created_date: 2023-11-04
+  url: https://grok.x.ai/
+  model_card: https://x.ai/model-card/
+  modality: text; text
+  analysis: Evaluated on standard language benchmarks in comparison to SoTA LLMs. 
+  size: 
+    explanation: No model size specified, but Grok-1 is larger than predecessor Grok-0 (33B parameters), as stated in the Grok announcement at https://x.ai/.
+    value: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: none
+  access: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: none