From 3b8e3bb69be18e5800b81c9c539cdc32511ec50c Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Mon, 8 Jul 2024 22:34:14 +0200
Subject: [PATCH 1/8] Fix errors in cli parameter description in README

---
 README.md | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 99b6f15..0af26bd 100644
--- a/README.md
+++ b/README.md
@@ -116,13 +116,21 @@ The CLI will prompt you to input instructions interactively:
 
 You can configure the demo by specifying the following parameters:
 
-- `--aggregator`: The primary model used for final response generation.
-- `--reference_models`: List of models used as references.
+- `--model`: The primary model used for final response generation.
+- `--reference-models`: Models used as references.
 - `--temperature`: Controls the randomness of the response generation.
-- `--max_tokens`: Maximum number of tokens in the response.
+- `--max-tokens`: Maximum number of tokens in the response.
 - `--rounds`: Number of rounds to process the input for refinement. (num rounds == num of MoA layers - 1)
-- `--num_proc`: Number of processes to run in parallel for faster execution.
-- `--multi_turn`: Boolean to toggle multi-turn interaction capability.
+- `--num-proc`: Number of processes to run in parallel for faster execution.
+- `--multi-turn`: Boolean to toggle multi-turn interaction capability.
+
+Specify `--reference-models` multiple times to use multiple models as references. For example:
+
+```bash
+# Specify multiple reference models
+python bot.py --reference-models "mistralai/Mixtral-8x22B-Instruct-v0.1" --reference-models "Qwen/Qwen2-72B-Instruct"
+```
+
 
 ## Evaluation
 

From 6433c08397f62fcc25b6d3b85e0a6cf9e3c60700 Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Sun, 7 Jul 2024 21:35:03 +0200
Subject: [PATCH 2/8] make bot --model parameter function correctly

---
 bot.py           | 7 ++++---
 requirements.txt | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/bot.py b/bot.py
index 89f8f19..fd91825 100644
--- a/bot.py
+++ b/bot.py
@@ -118,7 +118,7 @@ def main(
 
     model = Prompt.ask(
         "\n1. What main model do you want to use?",
-        default="Qwen/Qwen2-72B-Instruct",
+        default=model,
     )
     console.print(f"Selected {model}.", style="yellow italic")
     temperature = float(
@@ -199,8 +199,9 @@ def main(
 
         for chunk in output:
             out = chunk.choices[0].delta.content
-            console.print(out, end="")
-            all_output += out
+            if out is not None:
+                console.print(out, end="")
+                all_output += out
         print()
 
         if DEBUG:
diff --git a/requirements.txt b/requirements.txt
index bf9f390..a09808a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ loguru
 datasets
 typer
 rich
+cffi

From 18872faf6472235660fd4447023f2203f57f4056 Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Sun, 7 Jul 2024 21:38:09 +0200
Subject: [PATCH 3/8] Get OpenAI API key and base_url from environment.

Use OPENAI_API_KEY, OPENAI_BASE_URL, REFERENCE_API_KEY, REFERENCE_BASE_URL
(REFERENCE_API_KEY is what used to be OPENAI_API_KEY)

This way, we can easily connect to third-party api's.
---
 utils.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/utils.py b/utils.py
index 4651745..e084983 100644
--- a/utils.py
+++ b/utils.py
@@ -9,6 +9,10 @@
 
 
 DEBUG = int(os.environ.get("DEBUG", "0"))
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.together.xyz/v1")
+REFERENCE_API_KEY = os.environ.get("REFERENCE_API_KEY")
+REFERENCE_BASE_URL = os.environ.get("REFERENCE_BASE_URL", "https://api.openai.com/v1")
 
 
 def generate_together(
@@ -21,12 +25,12 @@ def generate_together(
 
     output = None
 
+    endpoint = f"{OPENAI_BASE_URL}/chat/completions"
+
     for sleep_time in [1, 2, 4, 8, 16, 32]:
 
         try:
 
-            endpoint = "https://api.together.xyz/v1/chat/completions"
-
             if DEBUG:
                 logger.debug(
                     f"Sending messages ({len(messages)}) (last message: `{messages[-1]['content'][:20]}...`) to `{model}`."
@@ -41,7 +45,7 @@ def generate_together(
                     "messages": messages,
                 },
                 headers={
-                    "Authorization": f"Bearer {os.environ.get('TOGETHER_API_KEY')}",
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
                 },
             )
             if "error" in res.json():
@@ -80,11 +84,10 @@ def generate_together_stream(
     max_tokens=2048,
     temperature=0.7,
 ):
-    endpoint = "https://api.together.xyz/v1"
     client = openai.OpenAI(
-        api_key=os.environ.get("TOGETHER_API_KEY"), base_url=endpoint
+        api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL
     )
-    endpoint = "https://api.together.xyz/v1/chat/completions"
+    endpoint = f"{OPENAI_BASE_URL}/chat/completions"
     response = client.chat.completions.create(
         model=model,
         messages=messages,
@@ -104,7 +107,8 @@ def generate_openai(
 ):
 
     client = openai.OpenAI(
-        api_key=os.environ.get("OPENAI_API_KEY"),
+        api_key=REFERENCE_API_KEY,
+        base_url=REFERENCE_BASE_URL,
     )
 
     for sleep_time in [1, 2, 4, 8, 16, 32]:

From d3dbd712c4c97e3ca18637f5163c602ffa201174 Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Mon, 8 Jul 2024 22:25:41 +0200
Subject: [PATCH 4/8] Support original TOGETHER_API_KEY as well

---
 utils.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/utils.py b/utils.py
index e084983..7b60979 100644
--- a/utils.py
+++ b/utils.py
@@ -9,10 +9,19 @@
 
 
 DEBUG = int(os.environ.get("DEBUG", "0"))
+
+TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+EVAL_API_KEY = os.environ.get("EVAL_API_KEY")
+
+# If TOGETHER_API_KEY is set, use that one instead and use OPENAI for evaluations
+if TOGETHER_API_KEY:
+    OPENAI_API_KEY = TOGETHER_API_KEY
+    EVAL_API_KEY = os.environ.get("OPENAI_API_KEY")
+
 OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.together.xyz/v1")
-REFERENCE_API_KEY = os.environ.get("REFERENCE_API_KEY")
-REFERENCE_BASE_URL = os.environ.get("REFERENCE_BASE_URL", "https://api.openai.com/v1")
+EVAL_BASE_URL = os.environ.get("EVAL_BASE_URL", "https://api.openai.com/v1")
+
 
 
 def generate_together(
@@ -107,8 +116,8 @@ def generate_openai(
 ):
 
     client = openai.OpenAI(
-        api_key=REFERENCE_API_KEY,
-        base_url=REFERENCE_BASE_URL,
+        api_key=EVAL_API_KEY,
+        base_url=EVAL_BASE_URL,
     )
 
     for sleep_time in [1, 2, 4, 8, 16, 32]:
@@ -183,3 +192,4 @@ def generate_with_references(
         temperature=temperature,
         max_tokens=max_tokens,
     )
+

From 0f0b5b043598c22add96e8c6d71365c6f581660c Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Mon, 8 Jul 2024 22:51:10 +0200
Subject: [PATCH 5/8] Update README to describe OPENAI_BASE_URL usage

---
 README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/README.md b/README.md
index 0af26bd..5477f8d 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,17 @@ Specify `--reference-models` multiple times to use multiple models as references
 python bot.py --reference-models "mistralai/Mixtral-8x22B-Instruct-v0.1" --reference-models "Qwen/Qwen2-72B-Instruct"
 ```
 
+## Other OpenAI Compatible API endpoints
+
+To use different OpenAI-compatible API endpoints, set the OPENAI_BASE_URL and OPENAI_API_KEY variable.
+
+```
+export TOGETHER_API_KEY=
+export OPENAI_BASE_URL="https://your-api-provider.com/v1"
+export OPENAI_API_KEY="your-api-key-here"
+```
+
+This way, any 3rd party API can be used, including local models.
 
 ## Evaluation
 

From 433ff73c4241b5a1266827715c22759417c0d064 Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Wed, 10 Jul 2024 15:56:06 +0200
Subject: [PATCH 6/8] Running with local ollama example

---
 README.md | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5477f8d..0d0d41d 100644
--- a/README.md
+++ b/README.md
@@ -141,7 +141,24 @@ export OPENAI_BASE_URL="https://your-api-provider.com/v1"
 export OPENAI_API_KEY="your-api-key-here"
 ```
 
-This way, any 3rd party API can be used, including local models.
+This way, any 3rd party API can be used, such as OpenRouter, Groq, local models, etc.
+
+### Ollama
+
+For example, to run the bot using Ollama:
+
+1. Set up the environment:
+
+```
+export OPENAI_BASE_URL=http://localhost:11434/v1
+export OPENAI_API_KEY=ollama
+```
+
+2. Run the bot command:
+
+```
+python bot.py --model llama3 --reference-models llama3 --reference-models mistral
+```
 
 ## Evaluation
 

From 9e78a917df524549708abb6ff5903c2d20308881 Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Fri, 26 Jul 2024 13:49:38 +0200
Subject: [PATCH 7/8] Rename --model option to --aggregator

---
 README.md | 2 +-
 bot.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 0d0d41d..bc9148b 100644
--- a/README.md
+++ b/README.md
@@ -116,7 +116,7 @@ The CLI will prompt you to input instructions interactively:
 
 You can configure the demo by specifying the following parameters:
 
-- `--model`: The primary model used for final response generation.
+- `--aggregator`: The primary model used for final response generation.
 - `--reference-models`: Models used as references.
 - `--temperature`: Controls the randomness of the response generation.
 - `--max-tokens`: Maximum number of tokens in the response.
diff --git a/bot.py b/bot.py
index fd91825..fe5b0ee 100644
--- a/bot.py
+++ b/bot.py
@@ -83,7 +83,7 @@ def process_fn(
 
 
 def main(
-    model: str = "Qwen/Qwen2-72B-Instruct",
+    aggregator: str = "Qwen/Qwen2-72B-Instruct",
     reference_models: list[str] = default_reference_models,
     temperature: float = 0.7,
     max_tokens: int = 512,
@@ -118,7 +118,7 @@ def main(
 
     model = Prompt.ask(
         "\n1. What main model do you want to use?",
-        default=model,
+        default=aggregator,
     )
     console.print(f"Selected {model}.", style="yellow italic")
     temperature = float(

From c0c7997addb82b7e5d2631272c49fd5c0ca0f677 Mon Sep 17 00:00:00 2001
From: Tijs Zwinkels <tijs@tinkertank.eu>
Date: Fri, 26 Jul 2024 13:55:27 +0200
Subject: [PATCH 8/8] Remove cffi import

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a09808a..bf9f390 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,3 @@ loguru
 datasets
 typer
 rich
-cffi