Build deterministic training/eval datasets used by loop objectives and metrics.
from shared.dataset import load_gsm8k_dataset, evaluate_gsm8k_response
rows = load_gsm8k_dataset("/path/to/gsm8k.jsonl", max_rows=1000)
reward, detail = evaluate_gsm8k_response(response="42", ground_truth="42")
print(reward, detail)
Was this page helpful?