diff --git a/tests/basic_correctness/test_cpu_offload.py b/tests/basic_correctness/test_cpu_offload.py index a5df5639cf948..3ab01d52277d7 100644 --- a/tests/basic_correctness/test_cpu_offload.py +++ b/tests/basic_correctness/test_cpu_offload.py @@ -4,3 +4,5 @@ def test_cpu_offload(): compare_two_settings("meta-llama/Llama-2-7b-hf", [], ["--cpu-offload-gb", "4"]) + compare_two_settings("nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t", + [], ["--cpu-offload-gb", "1"])