diff --git a/assets/adobe.yaml b/assets/adobe.yaml new file mode 100644 index 00000000..9d8b0a44 --- /dev/null +++ b/assets/adobe.yaml @@ -0,0 +1,88 @@ +--- +- type: model + name: Firefly Image 2 + organization: Adobe + description: Firefly Image 2 is the next generation of generative AI for imaging, bringing significant advancements to creative control and quality, including new Text to Image capabilities now available in the popular Firefly web app where 90% of users are new to Adobe products. + created_date: 2023-10-10 + url: https://firefly.adobe.com/ + model_card: none + modality: text; image + analysis: '' + size: unknown + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: closed + license: unknown + intended_uses: creative generation of digital art and images + prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content. + monitoring: '' + feedback: '' + +- type: model + name: Firefly Vector + organization: Adobe + description: Firefly Vector is the world’s first generative AI focused on producing vector graphics, bringing Adobe's vector graphic and generative AI expertise directly into Adobe Illustrator workflows with Text to Vector Graphic. + created_date: 2023-10-10 + url: https://firefly.adobe.com/ + model_card: none + modality: text; vector graphic + analysis: '' + size: unknown + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: closed + license: unknown + intended_uses: creative generation of digital art and images + prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content. + monitoring: '' + feedback: '' + +- type: model + name: Firefly Design + organization: Adobe + description: Firefly Design powers instant generation of amazing quality template designs in Adobe Express with the new Text to Template capability. + created_date: 2023-10-10 + url: https://firefly.adobe.com/ + model_card: none + modality: text; template design + analysis: '' + size: unknown + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: closed + license: unknown + intended_uses: creative generation of digital art and images + prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content. + monitoring: '' + feedback: '' + +- type: application + name: Firefly + organization: Adobe + description: Adobe Firefly is a standalone web application. It offers new ways to ideate, create, and communicate while significantly improving creative workflows using generative AI. + created_date: 2023-03-21 + url: https://firefly.adobe.com/ + dependencies: [Firefly Image 2, Firefly Vector, Firefly Design] + adaptation: '' + output_space: AI-generated creations + quality_control: '' + access: limited + license: unknown + terms_of_service: https://www.adobe.com/legal/licenses-terms/adobe-gen-ai-user-guidelines.html + intended_uses: creative generation of digital art and images + prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential content. + monitoring: '' + feedback: '' + monthly_active_users: unknown + user_distribution: unknown + failures: unknown + diff --git a/assets/amazon.yaml b/assets/amazon.yaml index 2fb5244e..5a4334a3 100644 --- a/assets/amazon.yaml +++ b/assets/amazon.yaml @@ -29,3 +29,24 @@ monthly_active_users: '' user_distribution: '' failures: '' +- type: model + name: FalconLite2 + organization: Amazon + description: FalconLite2 is a fine-tuned and quantized Falcon language model, capable of processing long (up to 24K tokens) input sequences. + created_date: 2023-08-08 + url: https://huggingface.co/amazon/FalconLite2 + model_card: https://huggingface.co/amazon/FalconLite2 + modality: text; text + analysis: Evaluated against benchmarks that are specifically designed to assess the capabilities of LLMs in handling longer contexts. + size: 40B parameters (dense) + dependencies: [Falcon] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/amazon/FalconLite2/discussions diff --git a/assets/anthropic.yaml b/assets/anthropic.yaml index a0188d7a..c7869018 100644 --- a/assets/anthropic.yaml +++ b/assets/anthropic.yaml @@ -524,3 +524,24 @@ where an incorrect answer would cause harm. monitoring: '' feedback: '' +- type: model + name: Claude 2.1 + organization: Anthropic + description: Claude 2.1 is an updated version of Claude 2, with an increased context window, less hallucination and tool use. + created_date: 2023-11-21 + url: https://www.anthropic.com/index/claude-2-1 + model_card: none + modality: text; text + analysis: Evaluated on open-ended conversation accuracy and long context question answering. In evaluations, Claude 2.1 demonstrated a 30% reduction in incorrect answers and a 3-4x lower rate of mistakenly concluding a document supports a particular claim. + size: unknown + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: none diff --git a/assets/baichuan.yaml b/assets/baichuan.yaml new file mode 100644 index 00000000..cdbf117e --- /dev/null +++ b/assets/baichuan.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: Baichuan 2 + organization: Baichuan Inc. + description: Baichuan 2 is a series of large-scale multilingual language models containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion tokens. + created_date: 2023-09-20 + url: https://arxiv.org/pdf/2309.10305.pdf + model_card: none + modality: text; text + analysis: Evaluated on public benchmarks like MMLU, CMMLU, GSM8K, and HumanEval. + size: 13B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: 1024 NVIDIA A800 GPUs + quality_control: '' + access: open + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: none + feedback: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions diff --git a/assets/character.yaml b/assets/character.yaml new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/assets/character.yaml @@ -0,0 +1 @@ + diff --git a/assets/columbia.yaml b/assets/columbia.yaml index 18fd04a0..58c6c299 100644 --- a/assets/columbia.yaml +++ b/assets/columbia.yaml @@ -20,3 +20,24 @@ prohibited_uses: '' monitoring: '' feedback: '' +- type: model + name: Ferret + organization: Columbia + description: Ferret is a Multimodal Large Language Model (MLLM) capable of understanding spatial referring of any shape or granularity within an image and accurately grounding open-vocabulary descriptions. + created_date: 2023-10-11 + url: https://arxiv.org/pdf/2310.07704.pdf + model_card: none + modality: image, text; image, text + analysis: Evaluated on the object hallucination benchmark and compared to GPT-4V. + size: 13B parameters + dependencies: [CLIP, Vicuna] + training_emissions: unknown + training_time: 2.5 to 5 days + training_hardware: 8 A100 GPUs + quality_control: '' + access: open + license: unknown + intended_uses: '' + prohibited_uses: '' + monitoring: none + feedback: none diff --git a/assets/huggingface.yaml b/assets/huggingface.yaml index 54b3d82c..7bbf3233 100644 --- a/assets/huggingface.yaml +++ b/assets/huggingface.yaml @@ -24,3 +24,68 @@ prohibited_uses: none monitoring: none feedback: none +- type: model + name: Zephyr + organization: HuggingFace + description: Zephyr is a series of language models that are trained to act as helpful assistants. + created_date: 2023-10-11 + url: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha + model_card: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha + modality: text; text + analysis: Evaluated on loss, rewards, logps, and logits rejected and chosen. + size: 7B parameters (dense) + dependencies: [Mistral] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: none + access: open + license: MIT + intended_uses: Educational and research purposes + prohibited_uses: none + monitoring: none + feedback: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha/discussions +- type: model + name: IDEFICS + organization: HuggingFace + description: IDEFICS is an open-access visual language model, based on Flamingo. + created_date: 2023-08-22 + url: https://huggingface.co/blog/idefics + model_card: https://huggingface.co/HuggingFaceM4/idefics-80b-instruct + modality: image, text; text + analysis: Evaluated in comparison to Flamingo and OpenFlamingo on standard benchmarks. + size: 80B parameters (dense) + dependencies: [OBELICS, Wikipedia, LAION-5B, PMD] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: none + access: open + license: + explanation: Can be found at https://huggingface.co/HuggingFaceM4/idefics-80b-instruct#license + value: custom + intended_uses: Educational and research purposes + prohibited_uses: none + monitoring: none + feedback: https://huggingface.co/HuggingFaceM4/idefics-80b-instruct/discussions +- type: dataset + name: OBELICS + organization: HuggingFace + description: OBELICS is a dataset consisting of 141 million interleaved image-text documents scraped from the web and contains 353 million images. + created_date: 2023-08-22 + url: https://huggingface.co/blog/idefics + datasheet: https://huggingface.co/datasets/HuggingFaceM4/OBELICS + modality: image, text + size: 115B tokens + sample: [] + analysis: Subset of training dataset evaluated for bias using Data Measurements Tool. + dependencies: [] + included: '' + excluded: All images for which creators explicitly requested opt-out of AI training. + quality_control: Sexual and violent content still present in OBELICS even after filtering. + access: open + license: CC-BY-4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/datasets/HuggingFaceM4/OBELICS/discussions diff --git a/assets/microsoft.yaml b/assets/microsoft.yaml index 57c12277..d9d98f8d 100644 --- a/assets/microsoft.yaml +++ b/assets/microsoft.yaml @@ -763,3 +763,68 @@ prohibited_uses: '' monitoring: '' feedback: https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0/discussions +- type: dataset + name: OpenOrca + organization: Microsoft + description: The OpenOrca dataset is a collection of augmented FLAN Collection data. Currently ~1M GPT-4 completions, and ~3.2M GPT-3.5 completions. It is tabularized in alignment with the distributions presented in the ORCA paper and currently represents a partial completion of the full intended dataset, with ongoing generation to expand its scope. + created_date: 2023-06-05 + url: https://huggingface.co/datasets/Open-Orca/OpenOrca + datasheet: https://huggingface.co/datasets/Open-Orca/OpenOrca + modality: text + size: 4.5M text queries + sample: [] + analysis: Models trained on OpenOrca compared to GPT-series on language benchmarks. + dependencies: [GPT-3.5, GPT-4, Flan Collection] + included: '' + excluded: '' + quality_control: '' + access: open + license: MIT + intended_uses: training and evaluation in the field of natural language processing. + prohibited_uses: none + monitoring: '' + feedback: none +- type: model + name: LlongOrca + organization: Microsoft + description: LlongOrca is an attempt to make OpenOrca able to function in a Llong context. + created_date: 2023-08-01 + url: https://huggingface.co/Open-Orca/LlongOrca-7B-16k + model_card: https://huggingface.co/Open-Orca/LlongOrca-7B-16k + modality: text; text + analysis: LlongOrca evaluated on BigBench-Hard and AGIEval results. + size: 7B parameters (dense) + dependencies: [OpenOrca, LLongMA-2] + training_emissions: unknown + training_time: 37 hours + training_hardware: 8x A6000-48GB (first-gen) GPUs + quality_control: '' + access: open + license: LLaMA2 + intended_uses: training and evaluation in the field of natural language processing. + prohibited_uses: none + monitoring: '' + feedback: https://huggingface.co/Open-Orca/LlongOrca-7B-16k/discussions +- type: model + name: Phi-1.5 + organization: Microsoft + description: Phi-1.5 is a large language transformer model. + created_date: 2023-09-11 + url: https://arxiv.org/pdf/2309.05463.pdf + model_card: https://huggingface.co/microsoft/phi-1_5 + modality: text; text + analysis: Evaluated on common sense reasoning, language understanding, and multi-step reasoning compared to other SOTA language models. + size: 1.3B parameters (dense) + dependencies: [phi-1] + training_emissions: unknown + training_time: 8 days + training_hardware: 32 A100-40G GPUs + quality_control: generic web-crawl data is removed from dataset. + access: open + license: + explanation: can be found via the license tab at top of https://huggingface.co/microsoft/phi-1_5 + value: microsoft research license + intended_uses: Phi-1.5 is best suited for answering prompts using the QA format, the chat format, and the code format. + prohibited_uses: '' + monitoring: none + feedback: https://huggingface.co/microsoft/phi-1_5/discussions diff --git a/assets/mila.yaml b/assets/mila.yaml new file mode 100644 index 00000000..ed367edb --- /dev/null +++ b/assets/mila.yaml @@ -0,0 +1,64 @@ +--- +- type: dataset + name: ToyMix + organization: Mila - Quebec AI Institute + description: ToyMix is the smallest dataset of three extensive and meticulously curated multi-label datasets that cover nearly 100 million molecules and over 3000 sparsely defined tasks. + created_date: 2023-10-09 + url: https://arxiv.org/pdf/2310.04292.pdf + datasheet: none + modality: molecules, tasks + size: 13B labels of quantum and biological nature. + sample: [] + analysis: Models of size 150k parameters trained on ToyMix and compared to models trained on its dependencies across GNN baselines. + dependencies: [QM9, TOX21, ZINC12K] + included: '' + excluded: '' + quality_control: '' + access: open + license: CC BY-NC-SA 4.0 + intended_uses: The datasets are intended to be used in an academic setting for training molecular GNNs with orders of magnitude more parameters than current large models. Further, the ToyMix dataset is intended to be used in a multi-task setting, meaning that a single model should be trained to predict them simultaneously. + prohibited_uses: none + monitoring: none + feedback: none +- type: dataset + name: LargeMix + organization: Mila - Quebec AI Institute + description: LargeMix is the middle-sized dataset of three extensive and meticulously curated multi-label datasets that cover nearly 100 million molecules and over 3000 sparsely defined tasks. + created_date: 2023-10-09 + url: https://arxiv.org/pdf/2310.04292.pdf + datasheet: none + modality: molecules, tasks + size: 13B labels of quantum and biological nature. + sample: [] + analysis: Models of size between 4M and 6M parameters trained for 200 epochs on LargeMix and compared to models trained on its dependencies across GNN baselines. + dependencies: [L1000 VCAP, L1000 MCF7, PCBA1328, PCQM4M_G25_N4] + included: '' + excluded: '' + quality_control: '' + access: open + license: CC BY-NC-SA 4.0 + intended_uses: The datasets are intended to be used in an academic setting for training molecular GNNs with orders of magnitude more parameters than current large models. Further, the LargeMix dataset is intended to be used in a multi-task setting, meaning that a single model should be trained to predict them simultaneously. + prohibited_uses: none + monitoring: none + feedback: none +- type: dataset + name: UltraLarge + organization: Mila - Quebec AI Institute + description: UltraLarge is the largest dataset of three extensive and meticulously curated multi-label datasets that cover nearly 100 million molecules and over 3000 sparsely defined tasks. + created_date: 2023-10-09 + url: https://arxiv.org/pdf/2310.04292.pdf + datasheet: none + modality: molecules, tasks + size: 13B labels of quantum and biological nature. + sample: [] + analysis: Models of size between 4M and 6M parameters trained for 50 epochs on UltraLarge and compared to models trained on its dependencies across GNN baselines. + dependencies: [PM6_83M] + included: '' + excluded: '' + quality_control: '' + access: open + license: CC BY-NC-SA 4.0 + intended_uses: The datasets are intended to be used in an academic setting for training molecular GNNs with orders of magnitude more parameters than current large models. + prohibited_uses: none + monitoring: none + feedback: none diff --git a/assets/mistral.yaml b/assets/mistral.yaml new file mode 100644 index 00000000..c9afff9f --- /dev/null +++ b/assets/mistral.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: Mistral + organization: Mistral AI + description: Mistral is a compact language model. + created_date: 2023-09-27 + url: https://mistral.ai/news/announcing-mistral-7b/ + model_card: https://huggingface.co/mistralai/Mistral-7B-v0.1 + modality: text; text + analysis: Evaluated in comparison to LLaMA series models on standard language benchmarks. + size: 7.3B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: none + feedback: https://huggingface.co/mistralai/Mistral-7B-v0.1/discussions diff --git a/assets/moonhub.yaml b/assets/moonhub.yaml new file mode 100644 index 00000000..9ee115f3 --- /dev/null +++ b/assets/moonhub.yaml @@ -0,0 +1,21 @@ +--- +- type: application + name: Moonhub Recruiter + organization: Moonhub + description: Moonhub Recruiter is the world's first AI-powered recruiter providing sourcing and recruiting services for startups and growing businesses. + created_date: 2023-10-11 + url: https://www.moonhub.ai/ + dependencies: [Cohere Base] + adaptation: '' + output_space: job candidate matches + quality_control: '' + access: limited + license: unknown + terms_of_service: https://aws.amazon.com/service-terms/ + intended_uses: recruiting candidates for business needs + prohibited_uses: none + monitoring: '' + feedback: '' + monthly_active_users: unknown + user_distribution: unknown + failures: '' diff --git a/assets/openai.yaml b/assets/openai.yaml index 9eb9ee2b..ad0eb15b 100644 --- a/assets/openai.yaml +++ b/assets/openai.yaml @@ -1325,3 +1325,38 @@ monthly_active_users: '' user_distribution: '' failures: '' +- type: model + name: DALL·E 3 + organization: OpenAI + description: DALL·E 3 is an artificial intelligence model that takes a text + prompt and/or existing image as an input and generates a new image as an output + The model is now in research preview, and will be available to ChatGPT Plus and Enterprise customers in October. + created_date: + explanation: OpenAI announced that DALL·E 3 was coming soon in a tweet on 2023-09-20. Users could begin experimenting with DALL·E 3 in research preview in early October. + value: 2023-09-20 + url: https://openai.com/dall-e-3 + model_card: none + modality: text; image + analysis: The model is capable of generating explicit content and the researchers + found limited amount of spurious content generated. + size: unknown + dependencies: [DALL·E 2 dataset, CLIP dataset, ChatGPT] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: DALL·E 3 has mitigations to decline requests that ask for a public figure by name. We improved safety performance in risk areas like generation of public figures and harmful biases related to visual over/under-representation, in partnership with red teamers—domain experts who stress-test the model—to help inform our risk assessment and mitigation efforts in areas like propaganda and misinformation. + access: + explanation: DALL·E 3 is now in research preview, and will be available to ChatGPT Plus and Enterprise customers in October, via the API and in Labs later this fall. + value: limited + license: unknown + intended_uses: The intended use of the DALL·E 3 Preview at this time is for + personal, non-commercial exploration and research purposes by people who are + interested in understanding the potential uses of these capabilities + prohibited_uses: Use of the model is governed by the OpenAI Content Policy, which + prohibits posting of G rated content. Users are not allowed to utilize the model + in commercial products in the preview version. + monitoring: Uses of the model are monitored. In the preview version, any user + can flag content. The specific policies for monitoring are not disclosed, + but possible measures include disabling of accounts violating the content + feedback: Feedback can be provided at openai.com + diff --git a/assets/openlemur.yaml b/assets/openlemur.yaml new file mode 100644 index 00000000..7464f631 --- /dev/null +++ b/assets/openlemur.yaml @@ -0,0 +1,43 @@ +--- +- type: model + name: Lemur + organization: OpenLemur + description: Lemur is an openly accessible language model optimized for both natural language and coding capabilities to serve as the backbone of versatile language agents. + created_date: 2023-10-10 + url: https://arxiv.org/pdf/2310.06830.pdf + model_card: https://huggingface.co/OpenLemur/lemur-70b-v1 + modality: code, text; code, text + analysis: Evaluated on text and code benchmarks in comparison to other models. + size: 70B parameters (dense) + dependencies: [LLaMA 2, The Stack, RefinedWeb, RedPajama, Common Crawl, Wikipedia, ArXiv] + training_emissions: unknown + training_time: unknown + training_hardware: TPUv4-512 pod + quality_control: '' + access: open + license: LLaMA2 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/OpenLemur/lemur-70b-v1/discussions +- type: model + name: Lemur-Chat + organization: OpenLemur + description: Lemur-Chat is an openly accessible language model optimized for both natural language and coding capabilities to serve as the backbone of versatile language agents. + created_date: 2023-10-10 + url: https://arxiv.org/pdf/2310.06830.pdf + model_card: https://huggingface.co/OpenLemur/lemur-70b-chat-v1 + modality: text; text + analysis: Evaluated on text and code benchmarks in comparison to other models. + size: 70B parameters (dense) + dependencies: [Lemur, OpenAssistant 1, OpenOrca, ShareGPT & ChatLogs, Evol-CodeAlpaca data] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: CC-BY-NC-4.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: https://huggingface.co/OpenLemur/lemur-70b-chat-v1/discussions \ No newline at end of file diff --git a/assets/shanghai.yaml b/assets/shanghai.yaml index df69ef04..e75a75bc 100644 --- a/assets/shanghai.yaml +++ b/assets/shanghai.yaml @@ -73,3 +73,24 @@ prohibited_uses: none monitoring: none feedback: none +- type: model + name: InternLM + organization: Shanghai AI Laboratory + description: InternLM is a high-quality language model proficient in English, Chinese, and code. + created_date: 2023-09-20 + url: https://github.com/InternLM/InternLM + model_card: https://huggingface.co/internlm/internlm-20b + modality: code, text; code, text + analysis: Evaluated in comparison to LLaMA series models on standard benchmarks. + size: 20B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: none + feedback: https://huggingface.co/internlm/internlm-20b/discussions diff --git a/assets/soochow.yaml b/assets/soochow.yaml new file mode 100644 index 00000000..fca7cd53 --- /dev/null +++ b/assets/soochow.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: OpenBA + organization: Soochow University + description: OpenBA is an open-sourced 15B bilingual (English + Chinese) asymmetric seq2seq model. + created_date: 2023-10-01 + url: https://arxiv.org/pdf/2309.10706.pdf + model_card: https://huggingface.co/OpenBA/OpenBA-LM + modality: text; text + analysis: Evaluated across different text benchmarks in English and Chinese. + size: 15B parameters (dense) + dependencies: [] + training_emissions: 6.5 tCO2eq + training_time: 38k GPU hours + training_hardware: 8 NVIDIA A100-80GB GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: none + feedback: https://huggingface.co/OpenBA/OpenBA-LM/discussions diff --git a/assets/stanford.yaml b/assets/stanford.yaml index 7ee0c2b7..0d795d95 100644 --- a/assets/stanford.yaml +++ b/assets/stanford.yaml @@ -69,3 +69,77 @@ prohibited_uses: '' monitoring: '' feedback: '' +- type: dataset + name: Alpaca dataset + # General + organization: Stanford + description: > + Alpaca dataset consistes of 52,000 instruction-following demonstrations generated + in the style of the [Self-Instruct framework](https://github.com/yizhongw/self-instruct) + using OpenAI's text-davinci-003 engine. This instruction data can be used to + conduct instruction-tuning for language models and make the language model follow + instruction better. + created_date: + value: 2023-03-13 + explanation: > + The date the [[blog post]](https://crfm.stanford.edu/2023/03/13/alpaca.html) + was released. + url: https://crfm.stanford.edu/2023/03/13/alpaca.html + datasheet: https://huggingface.co/datasets/tatsu-lab/alpaca + modality: text (English) + size: 52K instruction-following demonstrations + sample: [] + analysis: '' + # Construction + dependencies: [text-davinci-003] + license: CC BY-NC 4.0 + included: '' + excluded: '' + quality_control: '' + # Downstream + access: + value: open + explanation: The dataset can be downloaded from [[Hugging Face]](https://huggingface.co/datasets/tatsu-lab/alpaca). + The code for generating data is available on the [[GitHub repository]](https://github.com/tatsu-lab/stanford_alpaca#data-generation-process). + intended_uses: Alpaca is intended and licensed for research use only. + prohibited_uses: '' + monitoring: '' + feedback: Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues). + +- type: model + name: Alpaca + # General + organization: Stanford + description: > + Alpaca-7B is an instruction-following model fine-tuned from the LLaMA 7B model + on 52K instruction-following demonstrations. + created_date: + value: 2023-03-13 + explanation: > + The date the [[blog post]](https://crfm.stanford.edu/2023/03/13/alpaca.html) + was released. + url: https://crfm.stanford.edu/2023/03/13/alpaca.html + model_card: '' + modality: text (English) + size: 7B parameters (dense model) + analysis: '' + # Construction + dependencies: [LLaMa, Alpaca dataset] + training_emissions: unknown + training_time: '' + training_hardware: '' + quality_control: '' + # Downstream + access: + value: open + explanation: The weight diff between Alpaca-7B and LLaMA-7B is located on the + [[Hugging Face]](https://huggingface.co/tatsu-lab/alpaca-7b-wdiff). To recover + the original Alpaca-7B weights, follow the steps given [[here]](https://github.com/tatsu-lab + stanford_alpaca#recovering-alpaca-weights). Training and data generation code + can be found on the [[GitHub repository]](https://github.com/tatsu-lab/stanford_alpaca). + An [[online demo]](https://chat.lmsys.org/?model=alpaca-13b) is also available. + license: CC BY NC 4.0 (model weights) + intended_uses: Alpaca is intended and licensed for research use only. + prohibited_uses: '' + monitoring: '' + feedback: Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues). diff --git a/assets/toronto.yaml b/assets/toronto.yaml new file mode 100644 index 00000000..ae498064 --- /dev/null +++ b/assets/toronto.yaml @@ -0,0 +1,22 @@ +--- +- type: dataset + name: OpenWebMath + organization: University of Toronto + description: OpenWebMath is an open dataset containing 14.7B tokens of mathematical webpages from Common Crawl, inspired by Minerva. + created_date: 2023-10-10 + url: https://arxiv.org/pdf/2310.06786.pdf + datasheet: Can be found at section E of https://arxiv.org/pdf/2310.06786.pdf + modality: text, mathematical tokens + size: 14.7B documents + sample: [] + analysis: Compared models trained on OpenWebMath for 1 epoch to models trained on The Pile and ProofPile on mathematics benchmarks. + dependencies: [Common Crawl] + included: '' + excluded: '' + quality_control: Documents are filtered, processed for mathematical value, deduplicated, and then the largest documents are manually inspected for quality. + access: open + license: ODC-By 1.0 + intended_uses: Language model pretraining, finetuning, and evaluation. + prohibited_uses: Any tasks which may considered irresponsible or harmful. + monitoring: none + feedback: https://huggingface.co/datasets/open-web-math/open-web-math/discussions diff --git a/assets/uwashington.yaml b/assets/uwashington.yaml index 2b59e2d4..9778afb5 100644 --- a/assets/uwashington.yaml +++ b/assets/uwashington.yaml @@ -23,3 +23,24 @@ prohibited_uses: '' monitoring: '' feedback: '' +- type: model + name: Llark + organization: University of Washington, Spotify + description: Llark is an instruction-tuned multimodal model for music understanding. + created_date: 2023-10-11 + url: https://arxiv.org/pdf/2310.07160.pdf + model_card: none + modality: audio, text; text + analysis: Evaluated on benchmark music understanding tasks on SOTA music datasets. + size: 12B parameters (dense) + dependencies: [LLaMA 2, Jukebox] + training_emissions: unknown + training_time: 54 hours + training_hardware: 4 80GB NVIDIA A40 GPUs + quality_control: '' + access: open + license: Apache 2.0 + intended_uses: '' + prohibited_uses: '' + monitoring: '' + feedback: none diff --git a/assets/xwin.yaml b/assets/xwin.yaml new file mode 100644 index 00000000..a421d71e --- /dev/null +++ b/assets/xwin.yaml @@ -0,0 +1,22 @@ +--- +- type: model + name: Xwin-LM + organization: Xwin + description: Xwin-LM is a LLM, which on release, ranked top 1 on AlpacaEval, becoming the first to surpass GPT-4 on this benchmark. + created_date: 2023-09-20 + url: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1 + model_card: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1 + modality: text; text + analysis: Evaluated on AlpacaEval benchmark against SOTA LLMs. + size: 70B parameters (dense) + dependencies: [] + training_emissions: unknown + training_time: unknown + training_hardware: unknown + quality_control: '' + access: open + license: LLaMA2 + intended_uses: '' + prohibited_uses: '' + monitoring: none + feedback: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions diff --git a/js/main.js b/js/main.js index dbf4572f..aee0ec8c 100644 --- a/js/main.js +++ b/js/main.js @@ -629,6 +629,14 @@ function loadAssetsAndRenderPageContent() { const paths = [ 'assets/adept.yaml', + 'assets/mila.yaml', + 'assets/soochow.yaml', + 'assets/baichuan.yaml', + 'assets/xwin.yaml', + 'assets/mistral.yaml', + 'assets/adobe.yaml', + 'assets/openlemur.yaml', + 'assets/toronto.yaml', 'assets/wayve.yaml', 'assets/openx.yaml', 'assets/ibm.yaml', @@ -644,6 +652,7 @@ function loadAssetsAndRenderPageContent() { 'assets/casia.yaml', 'assets/lehigh.yaml', 'assets/nolano.yaml', + 'assets/moonhub.yaml', 'assets/chatglm.yaml', 'assets/uae.yaml', 'assets/singapore.yaml',