From 3b8e3bb69be18e5800b81c9c539cdc32511ec50c Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Mon, 8 Jul 2024 22:34:14 +0200 Subject: [PATCH 1/8] Fix errors in cli parameter description in README --- README.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 99b6f15..0af26bd 100644 --- a/README.md +++ b/README.md @@ -116,13 +116,21 @@ The CLI will prompt you to input instructions interactively: You can configure the demo by specifying the following parameters: -- `--aggregator`: The primary model used for final response generation. -- `--reference_models`: List of models used as references. +- `--model`: The primary model used for final response generation. +- `--reference-models`: Models used as references. - `--temperature`: Controls the randomness of the response generation. -- `--max_tokens`: Maximum number of tokens in the response. +- `--max-tokens`: Maximum number of tokens in the response. - `--rounds`: Number of rounds to process the input for refinement. (num rounds == num of MoA layers - 1) -- `--num_proc`: Number of processes to run in parallel for faster execution. -- `--multi_turn`: Boolean to toggle multi-turn interaction capability. +- `--num-proc`: Number of processes to run in parallel for faster execution. +- `--multi-turn`: Boolean to toggle multi-turn interaction capability. + +Specify `--reference-models` multiple times to use multiple models as references. For example: + +```bash +# Specify multiple reference models +python bot.py --reference-models "mistralai/Mixtral-8x22B-Instruct-v0.1" --reference-models "Qwen/Qwen2-72B-Instruct" +``` + ## Evaluation From 6433c08397f62fcc25b6d3b85e0a6cf9e3c60700 Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Sun, 7 Jul 2024 21:35:03 +0200 Subject: [PATCH 2/8] make bot --model parameter function correctly --- bot.py | 7 ++++--- requirements.txt | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bot.py b/bot.py index 89f8f19..fd91825 100644 --- a/bot.py +++ b/bot.py @@ -118,7 +118,7 @@ def main( model = Prompt.ask( "\n1. What main model do you want to use?", - default="Qwen/Qwen2-72B-Instruct", + default=model, ) console.print(f"Selected {model}.", style="yellow italic") temperature = float( @@ -199,8 +199,9 @@ def main( for chunk in output: out = chunk.choices[0].delta.content - console.print(out, end="") - all_output += out + if out is not None: + console.print(out, end="") + all_output += out print() if DEBUG: diff --git a/requirements.txt b/requirements.txt index bf9f390..a09808a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ loguru datasets typer rich +cffi From 18872faf6472235660fd4447023f2203f57f4056 Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Sun, 7 Jul 2024 21:38:09 +0200 Subject: [PATCH 3/8] Get OpenAI API key and base_url from environment. Use OPENAI_API_KEY, OPENAI_BASE_URL, REFERENCE_API_KEY, REFERENCE_BASE_URL (REFERENCE_API_KEY is what used to be OPENAI_API_KEY) This way, we can easily connect to third-party api's. --- utils.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/utils.py b/utils.py index 4651745..e084983 100644 --- a/utils.py +++ b/utils.py @@ -9,6 +9,10 @@ DEBUG = int(os.environ.get("DEBUG", "0")) +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") +OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.together.xyz/v1") +REFERENCE_API_KEY = os.environ.get("REFERENCE_API_KEY") +REFERENCE_BASE_URL = os.environ.get("REFERENCE_BASE_URL", "https://api.openai.com/v1") def generate_together( @@ -21,12 +25,12 @@ def generate_together( output = None + endpoint = f"{OPENAI_BASE_URL}/chat/completions" + for sleep_time in [1, 2, 4, 8, 16, 32]: try: - endpoint = "https://api.together.xyz/v1/chat/completions" - if DEBUG: logger.debug( f"Sending messages ({len(messages)}) (last message: `{messages[-1]['content'][:20]}...`) to `{model}`." @@ -41,7 +45,7 @@ def generate_together( "messages": messages, }, headers={ - "Authorization": f"Bearer {os.environ.get('TOGETHER_API_KEY')}", + "Authorization": f"Bearer {OPENAI_API_KEY}", }, ) if "error" in res.json(): @@ -80,11 +84,10 @@ def generate_together_stream( max_tokens=2048, temperature=0.7, ): - endpoint = "https://api.together.xyz/v1" client = openai.OpenAI( - api_key=os.environ.get("TOGETHER_API_KEY"), base_url=endpoint + api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL ) - endpoint = "https://api.together.xyz/v1/chat/completions" + endpoint = f"{OPENAI_BASE_URL}/chat/completions" response = client.chat.completions.create( model=model, messages=messages, @@ -104,7 +107,8 @@ def generate_openai( ): client = openai.OpenAI( - api_key=os.environ.get("OPENAI_API_KEY"), + api_key=REFERENCE_API_KEY, + base_url=REFERENCE_BASE_URL, ) for sleep_time in [1, 2, 4, 8, 16, 32]: From d3dbd712c4c97e3ca18637f5163c602ffa201174 Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Mon, 8 Jul 2024 22:25:41 +0200 Subject: [PATCH 4/8] Support original TOGETHER_API_KEY as well --- utils.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/utils.py b/utils.py index e084983..7b60979 100644 --- a/utils.py +++ b/utils.py @@ -9,10 +9,19 @@ DEBUG = int(os.environ.get("DEBUG", "0")) + +TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY") OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") +EVAL_API_KEY = os.environ.get("EVAL_API_KEY") + +# If TOGETHER_API_KEY is set, use that one instead and use OPENAI for evaluations +if TOGETHER_API_KEY: + OPENAI_API_KEY = TOGETHER_API_KEY + EVAL_API_KEY = os.environ.get("OPENAI_API_KEY") + OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.together.xyz/v1") -REFERENCE_API_KEY = os.environ.get("REFERENCE_API_KEY") -REFERENCE_BASE_URL = os.environ.get("REFERENCE_BASE_URL", "https://api.openai.com/v1") +EVAL_BASE_URL = os.environ.get("EVAL_BASE_URL", "https://api.openai.com/v1") + def generate_together( @@ -107,8 +116,8 @@ def generate_openai( ): client = openai.OpenAI( - api_key=REFERENCE_API_KEY, - base_url=REFERENCE_BASE_URL, + api_key=EVAL_API_KEY, + base_url=EVAL_BASE_URL, ) for sleep_time in [1, 2, 4, 8, 16, 32]: @@ -183,3 +192,4 @@ def generate_with_references( temperature=temperature, max_tokens=max_tokens, ) + From 0f0b5b043598c22add96e8c6d71365c6f581660c Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Mon, 8 Jul 2024 22:51:10 +0200 Subject: [PATCH 5/8] Update README to describe OPENAI_BASE_URL usage --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 0af26bd..5477f8d 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,17 @@ Specify `--reference-models` multiple times to use multiple models as references python bot.py --reference-models "mistralai/Mixtral-8x22B-Instruct-v0.1" --reference-models "Qwen/Qwen2-72B-Instruct" ``` +## Other OpenAI Compatible API endpoints + +To use different OpenAI-compatible API endpoints, set the OPENAI_BASE_URL and OPENAI_API_KEY variable. + +``` +export TOGETHER_API_KEY= +export OPENAI_BASE_URL="https://your-api-provider.com/v1" +export OPENAI_API_KEY="your-api-key-here" +``` + +This way, any 3rd party API can be used, including local models. ## Evaluation From 433ff73c4241b5a1266827715c22759417c0d064 Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Wed, 10 Jul 2024 15:56:06 +0200 Subject: [PATCH 6/8] Running with local ollama example --- README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5477f8d..0d0d41d 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,24 @@ export OPENAI_BASE_URL="https://your-api-provider.com/v1" export OPENAI_API_KEY="your-api-key-here" ``` -This way, any 3rd party API can be used, including local models. +This way, any 3rd party API can be used, such as OpenRouter, Groq, local models, etc. + +### Ollama + +For example, to run the bot using Ollama: + +1. Set up the environment: + +``` +export OPENAI_BASE_URL=http://localhost:11434/v1 +export OPENAI_API_KEY=ollama +``` + +2. Run the bot command: + +``` +python bot.py --model llama3 --reference-models llama3 --reference-models mistral +``` ## Evaluation From 9e78a917df524549708abb6ff5903c2d20308881 Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Fri, 26 Jul 2024 13:49:38 +0200 Subject: [PATCH 7/8] Rename --model option to --aggregator --- README.md | 2 +- bot.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0d0d41d..bc9148b 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ The CLI will prompt you to input instructions interactively: You can configure the demo by specifying the following parameters: -- `--model`: The primary model used for final response generation. +- `--aggregator`: The primary model used for final response generation. - `--reference-models`: Models used as references. - `--temperature`: Controls the randomness of the response generation. - `--max-tokens`: Maximum number of tokens in the response. diff --git a/bot.py b/bot.py index fd91825..fe5b0ee 100644 --- a/bot.py +++ b/bot.py @@ -83,7 +83,7 @@ def process_fn( def main( - model: str = "Qwen/Qwen2-72B-Instruct", + aggregator: str = "Qwen/Qwen2-72B-Instruct", reference_models: list[str] = default_reference_models, temperature: float = 0.7, max_tokens: int = 512, @@ -118,7 +118,7 @@ def main( model = Prompt.ask( "\n1. What main model do you want to use?", - default=model, + default=aggregator, ) console.print(f"Selected {model}.", style="yellow italic") temperature = float( From c0c7997addb82b7e5d2631272c49fd5c0ca0f677 Mon Sep 17 00:00:00 2001 From: Tijs Zwinkels Date: Fri, 26 Jul 2024 13:55:27 +0200 Subject: [PATCH 8/8] Remove cffi import --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a09808a..bf9f390 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,3 @@ loguru datasets typer rich -cffi