{ "results": { "gsm8k": { "exact_match,get-answer": 0.35784685367702807, "exact_match_stderr,get-answer": 0.013204142536119947, "alias": "gsm8k" } }, "configs": { "gsm8k": { "task": "gsm8k", "group": [ "math_word_problems" ], "dataset_path": "gsm8k", "dataset_name": "main", "training_split": "train", "test_split": "test", "fewshot_split": "train", "doc_to_text": "Question: {{question}}\nAnswer:", "doc_to_target": "{{answer}}", "description": "", "target_delimiter": " ", "fewshot_delimiter": "\n\n", "num_fewshot": 5, "metric_list": [ { "metric": "exact_match", "aggregation": "mean", "higher_is_better": true, "ignore_case": true, "ignore_punctuation": false, "regexes_to_ignore": [ ",", "\\$", "(?s).*#### " ] } ], "output_type": "generate_until", "generation_kwargs": { "until": [ "\n\n", "Question:" ], "do_sample": false, "temperature": 0.0 }, "repeats": 1, "filter_list": [ { "name": "get-answer", "filter": [ { "function": "regex", "regex_pattern": "#### (\\-?[0-9\\.\\,]+)" }, { "function": "take_first" } ] } ], "should_decontaminate": false, "metadata": { "version": 2.0 } } }, "versions": { "gsm8k": 2.0 }, "n-shot": { "gsm8k": 5 }, "config": { "model": "hf", "model_args": "pretrained=UCLA-AGI/zephyr-7b-sft-full-SPIN-iter1,revision=main,dtype=bfloat16", "batch_size": "auto", "batch_sizes": [], "device": null, "use_cache": null, "limit": null, "bootstrap_iters": 100000, "gen_kwargs": null }, "git_hash": "0acdfc3" }