Merge pull request #142 from stanford-crfm/jonathan/1103-weekly-assets

add weekly assets
stanford-crfm · Nov 28, 2023 · ceed9e4 · ceed9e4
2 parents a850f72 + 8985e68
commit ceed9e4
Show file tree

Hide file tree

Showing 6 changed files with 132 additions and 0 deletions.
diff --git a/assets/01ai.yaml b/assets/01ai.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Yi
+  organization: 01.AI
+  description: Yi is a LLM that can accept input/outputs in both English and Chinese.
+  created_date: 2023-11-02
+  url: https://github.com/01-ai/Yi
+  model_card: https://huggingface.co/01-ai/Yi-34B
+  modality: text; text
+  analysis: Evaluated on common sense reasoning and reading comprehension, analogous to LLaMA 2's analysis. 
+  size: 34B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: Academic research and free commercial usage
+  prohibited_uses: ''
+  monitoring: none
+  feedback: https://huggingface.co/01-ai/Yi-34B/discussions
diff --git a/assets/cohere.yaml b/assets/cohere.yaml
@@ -449,3 +449,27 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+- type: model
+  name: Cohere Embedv3 (English)
+  organization: Cohere
+  description: As of release, Cohere Embedv3 is Cohere's latest and most advanced embeddings model.
+  created_date: 2023-11-02
+  url: https://txt.cohere.com/introducing-embed-v3/
+  model_card: https://huggingface.co/Cohere/Cohere-embed-english-v3.0
+  modality: text; text
+  analysis: Achieves SOTA performances on trusted MTEB and BEIR benchmarks.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access:
+    explanation: "The model is available to the public through the Cohere Platform\
+      \ [[Cohere Platform]](https://cohere.ai/).\n"
+    value: limited
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: none
+  feedback: https://huggingface.co/Cohere/Cohere-embed-english-v3.0/discussions
diff --git a/assets/kunlun.yaml b/assets/kunlun.yaml
@@ -0,0 +1,25 @@
+---
+- type: model
+  name: Skywork
+  organization: Kunlun Inc. 
+  description: The Skywork series is a family of large language models (LLMs) trained on a corpus of over 3.2 trillion tokens drawn from both English and Chinese texts.
+  created_date: 2023-10-30
+  url: https://arxiv.org/pdf/2310.19341.pdf
+  model_card: https://huggingface.co/Skywork/Skywork-13B-base
+  modality: text; text
+  analysis: Evaluated on several popular benchmarks and performance in different fields.
+  size: 13B parameters (dense)
+  dependencies: [SkyPile]
+  training_emissions: unknown
+  training_time: 39 days
+  training_hardware: 512 A800-80GB GPUs
+  quality_control: ''
+  access: open
+  license:
+    value: custom
+    explanation: can be found at https://github.com/SkyworkAI/Skywork/blob/main/LICENSE
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: none
+  feedback: https://huggingface.co/Skywork/Skywork-13B-base/discussions
+
diff --git a/assets/perplexity.yaml b/assets/perplexity.yaml
@@ -44,3 +44,23 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+- type: application
+  name: Perplexity Chat
+  organization: Perplexity
+  description: Perplexity chat is an AI chatbot trained in-house by Perplexity.
+  created_date: 2023-10-27
+  url: https://labs.perplexity.ai/
+  dependencies: []
+  adaptation: ''
+  output_space: Chatbot output in response to user queries
+  quality_control: ''
+  access: open
+  license: none
+  terms_of_service: https://blog.perplexity.ai/legal/terms-of-service
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/stability.yaml b/assets/stability.yaml
@@ -94,3 +94,23 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+- type: application
+  name: Sky Replacer
+  organization: Stability AI
+  description: Sky Replacer is an exciting new tool that allows users to replace the color and aesthetic of the sky in their original photos with a selection of nine alternatives to improve the overall look and feel of the image. 
+  created_date: 2023-11-01
+  url: https://clipdrop.co/real-estate/sky-replacer
+  dependencies: []
+  adaptation: ''
+  output_space: image
+  quality_control: ''
+  access: open
+  license: unknown
+  terms_of_service: https://stability.ai/terms-of-use
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: ''
+  monthly_active_users: ''
+  user_distribution: ''
+  failures: ''
diff --git a/assets/together.yaml b/assets/together.yaml
@@ -151,3 +151,24 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct/discussions
+- type: dataset
+  name: RedPajama-Data-v2
+  organization: Together
+  description: RedPajama-Data-v2 is a new version of the RedPajama dataset, with 30 trillion filtered and deduplicated tokens (100+ trillions raw) from 84 CommonCrawl dumps covering 5 languages, along with 40+ pre-computed data quality annotations that can be used for further filtering and weighting. 
+  created_date: 2023-10-30
+  url: https://together.ai/blog/redpajama-data-v2
+  datasheet: ''
+  modality: text
+  size: 30 trillion tokens
+  sample: []
+  analysis: none
+  dependencies: [Common Crawl]
+  included: documents in English, German, French, Spanish, and Italian.
+  excluded: ''
+  quality_control: tokens filtered and deduplicated
+  access: open
+  license: Apache 2.0
+  intended_uses: To be used as the start of a larger, community-driven development of large-scale datasets for LLMs.
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: Feedback can be sent to Together via https://www.together.ai/contact