Skip to content

Commit

Permalink
Return more information in Omni-MATH annotations (#3271)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Jan 14, 2025
1 parent 6989b81 commit 61a9bc0
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
13 changes: 8 additions & 5 deletions src/helm/benchmark/annotation/omni_math_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ def annotate(self, request_state: RequestState) -> Any:

info = parse_report(annotator_response_text)

correctness = info.get("Equivalence Judgement", "FALSE")
equivalence_judgement = info.get("Equivalence Judgement", "")
student_final_answer = info.get("Student Final Answer", "")
justification = info.get("Justification", "").strip().removesuffix("=== report over ===").strip()

if correctness == "TRUE":
return {"prompt_text": annotator_prompt, "correctness": 1.0}
else:
return {"prompt_text": annotator_prompt, "correctness": 0.0}
return {
"student_final_answer": student_final_answer,
"equivalence_judgement": equivalence_judgement,
"justification": justification,
}
2 changes: 1 addition & 1 deletion src/helm/benchmark/metrics/omni_math_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def evaluate_generation(
eval_cache_path: str,
) -> List[Stat]:
assert request_state.annotations
score = request_state.annotations["omni_math"]["correctness"]
score = request_state.annotations["omni_math"]["equivalence_judgement"].strip().upper() == "TRUE"
return [
Stat(MetricName("omni_math_accuracy")).add(score),
]

0 comments on commit 61a9bc0

Please sign in to comment.