Enderchef commited on
Commit
9dcd426
·
verified ·
1 Parent(s): aed021b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -9
app.py CHANGED
@@ -33,17 +33,15 @@ def evaluate(model_id, sample_count, config_name):
33
  for item in dataset:
34
  prompt, answer = format_prompt(item)
35
  output = gen(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]
36
- output_letter = next((char for char in output[::-1] if char in "ABCD"), None)
37
  is_correct = output_letter == answer
38
  correct += is_correct
39
  results.append((prompt, output.strip(), answer, output_letter, is_correct))
40
 
41
  accuracy = correct / len(dataset) * 100
42
- return f"Accuracy: {accuracy:.2f}%", results
43
 
44
  def run(model_id, sample_count, config_name):
45
- if config_name == "coming soon":
46
- return "Only MMLU is currently available. MMLU-Pro and HLE coming soon.", ""
47
  score, details = evaluate(model_id, sample_count, config_name)
48
  formatted = "\n\n".join([
49
  f"### Question:\n{q}\n\n**Model Answer:** {o}\n**Expected:** {a}\n**Predicted:** {g}\n**Correct:** {c}"
@@ -95,8 +93,4 @@ with gr.Blocks(css="body {font-family: Inter, sans-serif; padding: 1em; max-widt
95
  run_button.click(run, inputs=[model_id, sample_count, config_name], outputs=[acc_output, detail_output])
96
  download_button.click(save_text, inputs=detail_output, outputs=gr.File())
97
 
98
- gr.Markdown("""
99
- MMLU-Pro and HLE support will be added soon.
100
- """)
101
-
102
- demo.launch()
 
33
  for item in dataset:
34
  prompt, answer = format_prompt(item)
35
  output = gen(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]
36
+ output_letter = next((char for char in reversed(output) if char in "ABCD"), None)
37
  is_correct = output_letter == answer
38
  correct += is_correct
39
  results.append((prompt, output.strip(), answer, output_letter, is_correct))
40
 
41
  accuracy = correct / len(dataset) * 100
42
+ return f"Accuracy: {accuracy:.2f}%, out of {len(dataset)} samples", results
43
 
44
  def run(model_id, sample_count, config_name):
 
 
45
  score, details = evaluate(model_id, sample_count, config_name)
46
  formatted = "\n\n".join([
47
  f"### Question:\n{q}\n\n**Model Answer:** {o}\n**Expected:** {a}\n**Predicted:** {g}\n**Correct:** {c}"
 
93
  run_button.click(run, inputs=[model_id, sample_count, config_name], outputs=[acc_output, detail_output])
94
  download_button.click(save_text, inputs=detail_output, outputs=gr.File())
95
 
96
+ demo.launch()