From 745577771b8bac85ab020daf7be9ab9843b0b479 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Fri, 24 Nov 2023 22:42:54 -0800 Subject: [PATCH 1/4] weekly update --- assets/ai2.yaml | 94 ++++++++++++++++++++++++++++++++++++++++++ assets/inflection.yaml | 25 +++++++++++ assets/meta.yaml | 23 +++++++++++ assets/stability.yaml | 45 ++++++++++++++++++++ 4 files changed, 187 insertions(+) diff --git a/assets/ai2.yaml b/assets/ai2.yaml index f2520c31..2dd849b9 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -115,3 +115,97 @@ monitoring: Quality filtration, deduplication, and risk mitigation via logistic qualifiers and regular expressions used. feedback: '' + +- type: dataset + name: Tulu-V2-mix + organization: AI2 + description: Tulu-V2-mix is a dataset composed of many high-quality instruction datasets that results in stronger performance across a variety of reasoning and knowledge-probing tasks. + created_date: 2023-11-20 + url: https://arxiv.org/pdf/2311.10702.pdf + datasheet: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture + modality: text + size: + value: unknown + explanation: Magnitude of size is around 100M tokens, given the length distribution of dataset provided in model card. + sample: [] + analysis: Models trained with dataset evaluated on downstream performance. + dependencies: [FLAN, Open Assistant 1, ShareGPT, GPT4 Alpaca, Code Alpaca, LIMA, WizardLM Evol Instruct, Open Orca] + included: '' + excluded: '' + quality_control: '' + access: open + license: ODC-BY + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture/discussions + +- type: model + name: Tulu 2 + organization: AI2 + description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and fine-tuned on more state of the art language models. + created_date: 2023-11-20 + url: https://arxiv.org/pdf/2311.10702.pdf + model_card: https://huggingface.co/allenai/tulu-2-70b + modality: text; text + analysis: Evaluated on MT-Bench and AlpacaEval. + compared to other chatbots. + size: 70B parameters (dense) + dependencies: [LLaMA 2, Tulu-V2-mix] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: AI2 ImpACT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/tulu-2-70b/discussions + +- type: model + name: Tulu 2 DPO + organization: AI2 + description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct Preference Optimization (DPO). + created_date: 2023-11-20 + url: https://arxiv.org/pdf/2311.10702.pdf + model_card: https://huggingface.co/allenai/tulu-2-dpo-70b + modality: text; text + analysis: Evaluated on MT-Bench and AlpacaEval. + compared to other chatbots. + size: 70B parameters (dense) + dependencies: [LLaMA 2, Tulu-V2-mix] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: AI2 ImpACT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/tulu-2-dpo-70b/discussions + +- type: model + name: Code Tulu 2 + organization: AI2 + description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained on a mix of publicly available, synthetic and human datasets. + created_date: 2023-11-20 + url: https://arxiv.org/pdf/2311.10702.pdf + model_card: https://huggingface.co/allenai/codetulu-2-13b + modality: text; code, text + analysis: Evaluated on MT-Bench and AlpacaEval. + compared to other chatbots. + size: 13B parameters (dense) + dependencies: [Code LLaMA, Tulu-V2-mix] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: AI2 ImpACT + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions + diff --git a/assets/inflection.yaml b/assets/inflection.yaml index 19a7f40c..3dcb7370 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -45,3 +45,28 @@ monthly_active_users: '' user_distribution: '' failures: '' + +- type: model + name: Inflection-2 + organization: Inflection AI + description: Inflection-2 is the best model in the world for its compute class and the second most capable LLM in the world, according to benchmark evaluation, as of its release. + created_date: 2023-11-22 + url: https://inflection.ai/inflection-2 + model_card: none + modality: text; text + analysis: Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4. + size: + value: unknown + explanation: Likely larger than Inflection-1, which was stated to be around the size of ChatGPT (175B parameters). + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: 5000 NVIDIA H100 GPUs + quality_control: '' + access: limited + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: none + diff --git a/assets/meta.yaml b/assets/meta.yaml index 10a5d453..4d6bb01b 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -694,3 +694,26 @@ prohibited_uses: '' monitoring: '' feedback: '' + +- type: model + name: Code LLaMA + organization: Meta + description: Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. + created_date: 2023-08-24 + url: https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/ + model_card: https://huggingface.co/codellama/CodeLlama-34b-hf + modality: text; code, text + analysis: Evaluated on several code benchmarks like HumanEval and MBPP. + size: 34B parameters (dense) + dependencies: [LLaMA 2] + training_emissions: 65.3 tCO2eq + training_time: 400K GPU hours + training_hardware: A100-80GB GPUs + quality_control: '' + access: open + license: LLaMA 2 + intended_uses: Code Llama and its variants is intended for commercial and research use in English and relevant programming languages. + prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants. + monitoring: '' + feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions + diff --git a/assets/stability.yaml b/assets/stability.yaml index 3360176c..635bc5b9 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -94,3 +94,48 @@ monthly_active_users: '' user_distribution: '' failures: '' + +- type: model + name: Stable Video Diffusion + organization: Stability AI + description: Stable Video Diffusion is a latent diffusion model trained to generate short video clips from an image conditioning. + created_date: 2023-11-21 + url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf + model_card: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt + modality: text; video + analysis: Evaluated via a user study comparing preferences between Stable Video Diffusion and competing text-to-video models. + size: unknown + dependencies: [Large Video Dataset] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: limited + license: unknown + intended_uses: Intended for research purposes only. + prohibited_uses: Using the model to generate representations of real-world people or events. + monitoring: '' + feedback: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/discussions + +- type: dataset + name: Large Video Dataset + organization: Stability AI + description: Large Video Dataset is the dataset that trained Stable Video Diffusion, consisting of over 212 years of content. + created_date: 2023-11-21 + url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf + datasheet: '' + modality: video with caption + size: 580M annotated video clip pairs + sample: [] + analysis: Large Video Dataset compared to publicly available research datasets on general statistics before and after filtering. + dependencies: [WebVid-10M, CoCa, V-BLIP] + included: '' + excluded: '' + quality_control: Dataset annotated with dense optical flow, and low optical flow videos are removed. + access: closed + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: none + From faed058b09ac0b6786d48b0f5af42e09010182ae Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Thu, 30 Nov 2023 15:12:55 -0800 Subject: [PATCH 2/4] add new models --- assets/01ai.yaml | 22 ++++++++++++++++++++++ assets/deepseek.yaml | 22 ++++++++++++++++++++++ assets/qwen.yaml | 22 ++++++++++++++++++++++ assets/xai.yaml | 24 ++++++++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 assets/01ai.yaml create mode 100644 assets/deepseek.yaml create mode 100644 assets/qwen.yaml create mode 100644 assets/xai.yaml diff --git a/assets/01ai.yaml b/assets/01ai.yaml new file mode 100644 index 00000000..84e4ca9f --- /dev/null +++ b/assets/01ai.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: Yi + organization: 01 AI + description: The Yi series models are large language models trained from scratch by developers at 01 AI. + created_date: 2023-11-02 + url: https://github.com/01-ai/Yi + model_card: https://huggingface.co/01-ai/Yi-34B + modality: text; text + analysis: Evaluated on standard language benchmarks, common sense reasoning, and reading comprehension in comparison to SoTA LLMs. + size: 34B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: Model underwent supervised fine-tuning, leading to a greater diversity of responses. + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: none + monitoring: unknown + feedback: https://huggingface.co/01-ai/Yi-34B/discussions diff --git a/assets/deepseek.yaml b/assets/deepseek.yaml new file mode 100644 index 00000000..9354fe51 --- /dev/null +++ b/assets/deepseek.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: Deepseek + organization: Deepseek AI + description: Deepseek is a 67B parameter model with Grouped-Query Attention trained on 2 trillion tokens from scratch. + created_date: 2023-11-29 + url: https://github.com/deepseek-ai/DeepSeek-LLM + model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base + modality: text; text + analysis: Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese. + size: 67B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: Training dataset comprised of diverse data composition and pruned and deduplicated. + access: open + license: MIT + intended_uses: '' + prohibited_uses: none + monitoring: unknown + feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base/discussions diff --git a/assets/qwen.yaml b/assets/qwen.yaml new file mode 100644 index 00000000..5bb16a5c --- /dev/null +++ b/assets/qwen.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: Qwen + organization: Qwen AI + description: Qwen is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc. + created_date: 2023-11-26 + url: https://arxiv.org/pdf/2309.16609.pdf + model_card: https://huggingface.co/Qwen/Qwen-72B + modality: text; text + analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, which are currently popular benchmarks, to test the model’s Chinese and English knowledge capabilities, translation, mathematical reasoning, coding and other capabilities. + size: 72B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: none + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: none + monitoring: unknown + feedback: https://huggingface.co/Qwen/Qwen-72B/discussions diff --git a/assets/xai.yaml b/assets/xai.yaml new file mode 100644 index 00000000..4889be3b --- /dev/null +++ b/assets/xai.yaml @@ -0,0 +1,24 @@ +--- +- type: model + name: Grok-1 + organization: xAI + description: Grok is a text chatbot modeled after the Hitchhiker’s Guide to the Galaxy, so intended to answer almost anything and even suggest what questions to ask. + created_date: 2023-11-04 + url: https://github.com/01-ai/Yi + model_card: https://grok.x.ai/ + modality: text; text + analysis: Evaluated on standard language benchmarks in comparison to SoTA LLMs. + size: + explanation: No model size specified, but Grok-1 is larger than predecessor Grok-0 (33B parameters) + value: unknown + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: none + access: limited + license: unknown + intended_uses: '' + prohibited_uses: none + monitoring: unknown + feedback: none From 54ec7994ce55b053ac3d68f783b724ab0562b67e Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Sun, 3 Dec 2023 22:37:02 -0800 Subject: [PATCH 3/4] resolve changes --- assets/ai2.yaml | 2 +- assets/inflection.yaml | 2 +- assets/xai.yaml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/ai2.yaml b/assets/ai2.yaml index 2dd849b9..fe116670 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -129,7 +129,7 @@ explanation: Magnitude of size is around 100M tokens, given the length distribution of dataset provided in model card. sample: [] analysis: Models trained with dataset evaluated on downstream performance. - dependencies: [FLAN, Open Assistant 1, ShareGPT, GPT4 Alpaca, Code Alpaca, LIMA, WizardLM Evol Instruct, Open Orca] + dependencies: [FLAN Collection, Open Assistant 1, ShareGPT, Alpaca dataset, Code Alpaca, LIMA, WizardLM, OpenOrca] included: '' excluded: '' quality_control: '' diff --git a/assets/inflection.yaml b/assets/inflection.yaml index 3dcb7370..714506e3 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -63,7 +63,7 @@ training_time: unknown training_hardware: 5000 NVIDIA H100 GPUs quality_control: '' - access: limited + access: closed license: unknown intended_uses: '' prohibited_uses: '' diff --git a/assets/xai.yaml b/assets/xai.yaml index 4889be3b..fb1dd0a0 100644 --- a/assets/xai.yaml +++ b/assets/xai.yaml @@ -4,12 +4,12 @@ organization: xAI description: Grok is a text chatbot modeled after the Hitchhiker’s Guide to the Galaxy, so intended to answer almost anything and even suggest what questions to ask. created_date: 2023-11-04 - url: https://github.com/01-ai/Yi - model_card: https://grok.x.ai/ + url: https://grok.x.ai/ + model_card: https://x.ai/model-card/ modality: text; text analysis: Evaluated on standard language benchmarks in comparison to SoTA LLMs. size: - explanation: No model size specified, but Grok-1 is larger than predecessor Grok-0 (33B parameters) + explanation: No model size specified, but Grok-1 is larger than predecessor Grok-0 (33B parameters), as stated in the Grok announcement at https://x.ai/. value: unknown dependencies: [] training_emissions: unknown From 58c0385b03e8f2aca75479cafbdb2b6c5bf7fb20 Mon Sep 17 00:00:00 2001 From: jxue16 <105090474+jxue16@users.noreply.github.com> Date: Sat, 9 Dec 2023 13:33:30 -0800 Subject: [PATCH 4/4] resolve changes --- assets/inflection.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/assets/inflection.yaml b/assets/inflection.yaml index 714506e3..e891bd84 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -55,9 +55,7 @@ model_card: none modality: text; text analysis: Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4. - size: - value: unknown - explanation: Likely larger than Inflection-1, which was stated to be around the size of ChatGPT (175B parameters). + size: unknown dependencies: [] training_emissions: unknown training_time: unknown