diff --git a/assets/01ai.yaml b/assets/01ai.yaml index 84e4ca9f..60c537fb 100644 --- a/assets/01ai.yaml +++ b/assets/01ai.yaml @@ -2,18 +2,21 @@ - type: model name: Yi organization: 01 AI - description: The Yi series models are large language models trained from scratch by developers at 01 AI. + description: The Yi series models are large language models trained from scratch + by developers at 01 AI. created_date: 2023-11-02 url: https://github.com/01-ai/Yi model_card: https://huggingface.co/01-ai/Yi-34B modality: text; text - analysis: Evaluated on standard language benchmarks, common sense reasoning, and reading comprehension in comparison to SoTA LLMs. + analysis: Evaluated on standard language benchmarks, common sense reasoning, and + reading comprehension in comparison to SoTA LLMs. size: 34B parameters (dense) dependencies: [] training_emissions: unknown training_time: unknown training_hardware: unknown - quality_control: Model underwent supervised fine-tuning, leading to a greater diversity of responses. + quality_control: Model underwent supervised fine-tuning, leading to a greater + diversity of responses. access: open license: Apache 2.0 intended_uses: '' diff --git a/assets/ai2.yaml b/assets/ai2.yaml index fe116670..9e5f0301 100644 --- a/assets/ai2.yaml +++ b/assets/ai2.yaml @@ -119,17 +119,28 @@ - type: dataset name: Tulu-V2-mix organization: AI2 - description: Tulu-V2-mix is a dataset composed of many high-quality instruction datasets that results in stronger performance across a variety of reasoning and knowledge-probing tasks. + description: Tulu-V2-mix is a dataset composed of many high-quality instruction + datasets that results in stronger performance across a variety of reasoning + and knowledge-probing tasks. created_date: 2023-11-20 url: https://arxiv.org/pdf/2311.10702.pdf datasheet: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture modality: text size: value: unknown - explanation: Magnitude of size is around 100M tokens, given the length distribution of dataset provided in model card. + explanation: Magnitude of size is around 100M tokens, given the length distribution + of dataset provided in model card. sample: [] analysis: Models trained with dataset evaluated on downstream performance. - dependencies: [FLAN Collection, Open Assistant 1, ShareGPT, Alpaca dataset, Code Alpaca, LIMA, WizardLM, OpenOrca] + dependencies: + - FLAN Collection + - Open Assistant 1 + - ShareGPT + - Alpaca dataset + - Code Alpaca + - LIMA + - WizardLM + - OpenOrca included: '' excluded: '' quality_control: '' @@ -143,13 +154,13 @@ - type: model name: Tulu 2 organization: AI2 - description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and fine-tuned on more state of the art language models. + description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset + and fine-tuned on more state of the art language models. created_date: 2023-11-20 url: https://arxiv.org/pdf/2311.10702.pdf model_card: https://huggingface.co/allenai/tulu-2-70b modality: text; text - analysis: Evaluated on MT-Bench and AlpacaEval. - compared to other chatbots. + analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots. size: 70B parameters (dense) dependencies: [LLaMA 2, Tulu-V2-mix] training_emissions: unknown @@ -166,13 +177,13 @@ - type: model name: Tulu 2 DPO organization: AI2 - description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct Preference Optimization (DPO). + description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct + Preference Optimization (DPO). created_date: 2023-11-20 url: https://arxiv.org/pdf/2311.10702.pdf model_card: https://huggingface.co/allenai/tulu-2-dpo-70b modality: text; text - analysis: Evaluated on MT-Bench and AlpacaEval. - compared to other chatbots. + analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots. size: 70B parameters (dense) dependencies: [LLaMA 2, Tulu-V2-mix] training_emissions: unknown @@ -189,13 +200,13 @@ - type: model name: Code Tulu 2 organization: AI2 - description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained on a mix of publicly available, synthetic and human datasets. + description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained + on a mix of publicly available, synthetic and human datasets. created_date: 2023-11-20 url: https://arxiv.org/pdf/2311.10702.pdf model_card: https://huggingface.co/allenai/codetulu-2-13b modality: text; code, text - analysis: Evaluated on MT-Bench and AlpacaEval. - compared to other chatbots. + analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots. size: 13B parameters (dense) dependencies: [Code LLaMA, Tulu-V2-mix] training_emissions: unknown @@ -208,4 +219,3 @@ prohibited_uses: '' monitoring: '' feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions - diff --git a/assets/amazon.yaml b/assets/amazon.yaml index 4e634935..32b24e29 100644 --- a/assets/amazon.yaml +++ b/assets/amazon.yaml @@ -41,7 +41,7 @@ analysis: Evaluated against benchmarks that are specifically designed to assess the capabilities of LLMs in handling longer contexts. size: 40B parameters (dense) - dependencies: [Falcon] + dependencies: [Falcon-40B] training_emissions: unknown training_time: unknown training_hardware: unknown diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index 62f520b5..4d2dff3d 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -549,3 +549,24 @@ prohibited_uses: '' monitoring: '' feedback: none +- type: application + name: Claude for Sheets + organization: Anthropic + description: Claude for Sheets is a Google Sheets add-on that allows the usage + of Claude directly in Google Sheets. + created_date: 2023-12-21 + url: https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257 + dependencies: [Anthropic API] + adaptation: '' + output_space: AI-generated text from prompt + quality_control: '' + access: open + license: unknown + terms_of_service: https://claude.ai/legal + intended_uses: as an integrated AI assistant in Google Sheets + prohibited_uses: '' + monitoring: unknown + feedback: Reviews on https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257 + monthly_active_users: unknown + user_distribution: unknown + failures: unknown diff --git a/assets/cresta.yaml b/assets/cresta.yaml new file mode 100644 index 00000000..3a52df59 --- /dev/null +++ b/assets/cresta.yaml @@ -0,0 +1,25 @@ +--- +- type: model + name: Ocean-1 + organization: Cresta + description: Ocean-1 is the culmination of Cresta's experience in deploying generative + AI systems for large enterprises and signifies their latest milestone in advancing + the cutting edge AI technology for customer facing conversations. + created_date: 2023-06-20 + url: https://cresta.com/blog/introducing-ocean-1-worlds-first-contact-center-foundation-model/ + model_card: none + modality: text; text + analysis: Outperforms GPT-4 in common sense and reasoning tasks on the basis of + both efficiency and accuracy. + size: 7B parameters (dense) + dependencies: [GPT-4, Claude, Falcon-40B] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: closed + license: unknown + intended_uses: Acting as a contact center chatbot agent. + prohibited_uses: none + monitoring: unknown + feedback: none diff --git a/assets/deci.yaml b/assets/deci.yaml new file mode 100644 index 00000000..14a2aef6 --- /dev/null +++ b/assets/deci.yaml @@ -0,0 +1,25 @@ +--- +- type: model + name: DeciLM + organization: Deci + description: DeciLM is a LLM that on release ranks as the fastest and most accurate + model of its size. + created_date: 2023-12-12 + url: https://deci.ai/blog/introducing-decilm-7b-the-fastest-and-most-accurate-7b-large-language-model-to-date + model_card: https://deci.ai/model-zoo/decilm-7b/ + modality: text; text + analysis: Evaluated on the OpenLLM benchmarks and, on release, outperforms all + other 7B models on the OpenLLM Leaderboard. + size: 7B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: NVIDIA A10 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: This model is intended for commercial and research use in English + and can be fine-tuned for use in other languages. + prohibited_uses: '' + monitoring: unknown + feedback: none diff --git a/assets/deepseek.yaml b/assets/deepseek.yaml index 9354fe51..3ab98d9e 100644 --- a/assets/deepseek.yaml +++ b/assets/deepseek.yaml @@ -2,18 +2,21 @@ - type: model name: Deepseek organization: Deepseek AI - description: Deepseek is a 67B parameter model with Grouped-Query Attention trained on 2 trillion tokens from scratch. + description: Deepseek is a 67B parameter model with Grouped-Query Attention trained + on 2 trillion tokens from scratch. created_date: 2023-11-29 url: https://github.com/deepseek-ai/DeepSeek-LLM model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base modality: text; text - analysis: Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese. + analysis: Deepseek and baseline models (for comparison) evaluated on a series + of representative benchmarks, both in English and Chinese. size: 67B parameters (dense) dependencies: [] training_emissions: unknown training_time: unknown training_hardware: unknown - quality_control: Training dataset comprised of diverse data composition and pruned and deduplicated. + quality_control: Training dataset comprised of diverse data composition and pruned + and deduplicated. access: open license: MIT intended_uses: '' diff --git a/assets/google.yaml b/assets/google.yaml index 1bedd680..61671472 100644 --- a/assets/google.yaml +++ b/assets/google.yaml @@ -1678,6 +1678,32 @@ within specific downstream applications without prior assessment monitoring: Google internal monitoring feedback: Specific queries provided by annotators +- type: model + name: MedLM + organization: Google + description: MedLM is a collection of foundation models tuned to follow natural + language instructions for tasks in medicine, such as question answering and + creating draft summaries. + created_date: 2023-12-13 + url: https://cloud.google.com/vertex-ai/docs/generative-ai/medlm/overview + model_card: https://cloud.google.com/static/vertex-ai/docs/generative-ai/medlm/MedLM-model-card.pdf + modality: text; text + analysis: Assessed on medical benchmarks of professional medical exams, medical + research, and consumer queries. + size: unknown + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: limited + license: unknown + intended_uses: to be used for question answering and creating draft summaries + from existing documentation, to be reviewed, edited, and approved by the user + before use. + prohibited_uses: '' + monitoring: Google internal monitoring + feedback: none - type: model name: Gemini organization: Google diff --git a/assets/inflection.yaml b/assets/inflection.yaml index e891bd84..84f958a7 100644 --- a/assets/inflection.yaml +++ b/assets/inflection.yaml @@ -49,12 +49,15 @@ - type: model name: Inflection-2 organization: Inflection AI - description: Inflection-2 is the best model in the world for its compute class and the second most capable LLM in the world, according to benchmark evaluation, as of its release. + description: Inflection-2 is the best model in the world for its compute class + and the second most capable LLM in the world, according to benchmark evaluation, + as of its release. created_date: 2023-11-22 url: https://inflection.ai/inflection-2 model_card: none modality: text; text - analysis: Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4. + analysis: Evaluated against state of the art models on benchmarks, and found to + be most performant model outside of GPT-4. size: unknown dependencies: [] training_emissions: unknown @@ -67,4 +70,3 @@ prohibited_uses: '' monitoring: '' feedback: none - diff --git a/assets/llm360.yaml b/assets/llm360.yaml new file mode 100644 index 00000000..0ae769b0 --- /dev/null +++ b/assets/llm360.yaml @@ -0,0 +1,60 @@ +--- +- type: model + name: Amber + organization: LLM360 + description: Amber is the first model in the LLM360 family, an initiative for + comprehensive and fully open-sourced LLMs, where all training details, model + checkpoints, intermediate results, and additional analyses are made available + to the community. + created_date: 2023-12-12 + url: https://www.llm360.ai/ + model_card: https://huggingface.co/LLM360/Amber + modality: text; text + analysis: Evaluated on several benchmark LLM tasks + size: 7B parameters (dense) + dependencies: + - Arxiv + - Books + - C4 + - RefinedWeb + - StarCoder + - StackExchange + - Wikipedia + training_emissions: unknown + training_time: unknown + training_hardware: 56 DGX A100 nodes, each equipped with 4 80GB A100 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: to support open and collaborative AI research by making the full + LLM training process transparent. + prohibited_uses: '' + monitoring: unknown + feedback: https://huggingface.co/LLM360/Amber/discussions + +- type: model + name: CrystalCoder + organization: LLM360 + description: CrystalCoder is a language model with a balance of code and text + data that follows the initiative under LLM360 of its training process being + fully transparent. + created_date: 2023-12-12 + url: https://www.llm360.ai/ + model_card: https://huggingface.co/LLM360/CrystalCoder + modality: text; code, text + analysis: Evaluated on English and coding tasks and benchmarks, and outperforms + LLaMA 2 in some. + size: 7B parameters (dense) + dependencies: [SlimPajama dataset, StarCoder] + training_emissions: unknown + training_time: unknown + training_hardware: Trained on the Cerebras Condor Galaxy 1 (CG-1), a 4 exaFLOPS, + 54 million core, 64-node cloud AI supercomputer. + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: to support open and collaborative AI research by making the full + LLM training process transparent. + prohibited_uses: '' + monitoring: unknown + feedback: https://huggingface.co/LLM360/CrystalCoder/discussions diff --git a/assets/meta.yaml b/assets/meta.yaml index 82d25994..af86f916 100644 --- a/assets/meta.yaml +++ b/assets/meta.yaml @@ -698,7 +698,8 @@ - type: model name: Code LLaMA organization: Meta - description: Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters. + description: Code Llama is a collection of pretrained and fine-tuned generative + text models ranging in scale from 7 billion to 34 billion parameters. created_date: 2023-08-24 url: https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/ model_card: https://huggingface.co/codellama/CodeLlama-34b-hf @@ -712,8 +713,12 @@ quality_control: '' access: open license: LLaMA 2 - intended_uses: Code Llama and its variants is intended for commercial and research use in English and relevant programming languages. - prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants. + intended_uses: Code Llama and its variants is intended for commercial and research + use in English and relevant programming languages. + prohibited_uses: Use in any manner that violates applicable laws or regulations + (including trade compliance laws). Use in languages other than English. Use + in any other way that is prohibited by the Acceptable Use Policy and Licensing + Agreement for Code Llama and its variants. monitoring: '' feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions @@ -788,4 +793,3 @@ prohibited_uses: '' monitoring: none feedback: none - diff --git a/assets/qwen.yaml b/assets/qwen.yaml index 5bb16a5c..34ffaa02 100644 --- a/assets/qwen.yaml +++ b/assets/qwen.yaml @@ -2,12 +2,16 @@ - type: model name: Qwen organization: Qwen AI - description: Qwen is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc. + description: Qwen is a Transformer-based large language model, which is pretrained + on a large volume of data, including web texts, books, codes, etc. created_date: 2023-11-26 url: https://arxiv.org/pdf/2309.16609.pdf model_card: https://huggingface.co/Qwen/Qwen-72B modality: text; text - analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, which are currently popular benchmarks, to test the model’s Chinese and English knowledge capabilities, translation, mathematical reasoning, coding and other capabilities. + analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, + which are currently popular benchmarks, to test the model’s Chinese and English + knowledge capabilities, translation, mathematical reasoning, coding and other + capabilities. size: 72B parameters (dense) dependencies: [] training_emissions: unknown diff --git a/assets/stability.yaml b/assets/stability.yaml index 255e298d..107803e2 100644 --- a/assets/stability.yaml +++ b/assets/stability.yaml @@ -98,12 +98,14 @@ - type: model name: Stable Video Diffusion organization: Stability AI - description: Stable Video Diffusion is a latent diffusion model trained to generate short video clips from an image conditioning. + description: Stable Video Diffusion is a latent diffusion model trained to generate + short video clips from an image conditioning. created_date: 2023-11-21 url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf model_card: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt modality: text; video - analysis: Evaluated via a user study comparing preferences between Stable Video Diffusion and competing text-to-video models. + analysis: Evaluated via a user study comparing preferences between Stable Video + Diffusion and competing text-to-video models. size: unknown dependencies: [Large Video Dataset] training_emissions: unknown @@ -113,26 +115,30 @@ access: limited license: unknown intended_uses: Intended for research purposes only. - prohibited_uses: Using the model to generate representations of real-world people or events. + prohibited_uses: Using the model to generate representations of real-world people + or events. monitoring: '' feedback: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/discussions - type: dataset name: Large Video Dataset organization: Stability AI - description: Large Video Dataset is the dataset that trained Stable Video Diffusion, consisting of over 212 years of content. + description: Large Video Dataset is the dataset that trained Stable Video Diffusion, + consisting of over 212 years of content. created_date: 2023-11-21 url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf datasheet: '' modality: video with caption size: 580M annotated video clip pairs sample: [] - analysis: Large Video Dataset compared to publicly available research datasets on general statistics before and after filtering. + analysis: Large Video Dataset compared to publicly available research datasets + on general statistics before and after filtering. dependencies: [WebVid-10M, CoCa, V-BLIP] included: '' excluded: '' - quality_control: Dataset annotated with dense optical flow, and low optical flow videos are removed. - access: closed + quality_control: Dataset annotated with dense optical flow, and low optical flow + videos are removed. + access: closed license: unknown intended_uses: '' prohibited_uses: '' @@ -161,4 +167,3 @@ monthly_active_users: '' user_distribution: '' failures: '' - diff --git a/assets/together.yaml b/assets/together.yaml index 5bc511d0..2b54ee86 100644 --- a/assets/together.yaml +++ b/assets/together.yaml @@ -179,7 +179,9 @@ - type: model name: StripedHyena organization: Together - description: StripedHyena is an LLM and the first alternative model competitive with the best open-source Transformers in short and long-context evaluations, according to Together. + description: StripedHyena is an LLM and the first alternative model competitive + with the best open-source Transformers in short and long-context evaluations, + according to Together. created_date: 2023-12-08 url: https://www.together.ai/blog/stripedhyena-7b model_card: https://huggingface.co/togethercomputer/StripedHyena-Hessian-7B @@ -200,7 +202,9 @@ - type: model name: StripedHyena Nous organization: Together - description: StripedHyena Nous is an LLM and chatbot, along with the first alternative model competitive with the best open-source Transformers in short and long-context evaluations, according to Together. + description: StripedHyena Nous is an LLM and chatbot, along with the first alternative + model competitive with the best open-source Transformers in short and long-context + evaluations, according to Together. created_date: 2023-12-08 url: https://www.together.ai/blog/stripedhyena-7b model_card: https://huggingface.co/togethercomputer/StripedHyena-Nous-7B diff --git a/assets/xai.yaml b/assets/xai.yaml index d7e0e772..f2a1bfea 100644 --- a/assets/xai.yaml +++ b/assets/xai.yaml @@ -7,9 +7,11 @@ url: https://grok.x.ai/ model_card: https://x.ai/model-card/ modality: text; text - analysis: Grok-1 was evaluated on a range of reasoning benchmark tasks and on curated foreign mathematic examination questions. - size: - explanation: No model size specified, but Grok-1 is larger than predecessor Grok-0 (33B parameters), as stated in the Grok announcement at https://x.ai/. + analysis: Grok-1 was evaluated on a range of reasoning benchmark tasks and on + curated foreign mathematic examination questions. + size: + explanation: No model size specified, but Grok-1 is larger than predecessor + Grok-0 (33B parameters), as stated in the Grok announcement at https://x.ai/. value: unknown dependencies: [] training_emissions: unknown diff --git a/js/main.js b/js/main.js index 1db28eaf..ec01c79b 100644 --- a/js/main.js +++ b/js/main.js @@ -670,7 +670,7 @@ function loadAssetsAndRenderPageContent() { 'assets/bigcode.yaml', 'assets/transformify.yaml', 'assets/paladin.yaml', - 'assets/lmsys.yaml', + 'assets/01ai.yaml', 'assets/ai2.yaml', 'assets/ai21.yaml', 'assets/aleph_alpha.yaml', @@ -688,11 +688,15 @@ function loadAssetsAndRenderPageContent() { 'assets/cerebras.yaml', 'assets/cmu.yaml', 'assets/cohere.yaml', + 'assets/continue.yaml', + 'assets/cresta.yaml', 'assets/databricks.yaml', + 'assets/deci.yaml', 'assets/ollama.yaml', 'assets/argilla.yaml', 'assets/epfl.yaml', 'assets/deepmind.yaml', + 'assets/deepseek.yaml', 'assets/duckduckgo.yaml', 'assets/duolingo.yaml', 'assets/eleutherai.yaml', @@ -707,6 +711,8 @@ function loadAssetsAndRenderPageContent() { 'assets/laion.yaml', 'assets/latitude.yaml', 'assets/linkedin.yaml', + 'assets/llm360.yaml', + 'assets/lmsys.yaml', 'assets/meta.yaml', 'assets/microsoft.yaml', 'assets/naver.yaml', @@ -716,6 +722,7 @@ function loadAssetsAndRenderPageContent() { 'assets/openai.yaml', 'assets/othersideai.yaml', 'assets/oxford.yaml', + 'assets/peking.yaml', 'assets/perplexity.yaml', 'assets/quizlet.yaml', 'assets/quora.yaml', @@ -735,8 +742,9 @@ function loadAssetsAndRenderPageContent() { 'assets/tsinghua.yaml', 'assets/uw.yaml', 'assets/viable.yaml', + 'assets/xai.yaml', 'assets/yandex.yaml', - 'assets/you.yaml', + 'assets/you.yaml' ]; $.get('js/schemas.yaml', {}, (response) => {