Enderchef commited on
Commit
3d20418
·
verified ·
1 Parent(s): a319c62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -32
app.py CHANGED
@@ -70,8 +70,7 @@ def evaluate(model_id, sample_count, config_name):
70
  accuracy = correct / len(dataset) * 100
71
  record = {"model_id": model_id, "subject": subject, "accuracy": accuracy}
72
  with open("eval.jsonl", "a") as f:
73
- f.write(json.dumps(record) + "
74
- ")
75
  total_correct += correct
76
  total_samples += len(dataset)
77
  avg_accuracy = total_correct / total_samples * 100
@@ -96,23 +95,14 @@ def evaluate(model_id, sample_count, config_name):
96
 
97
  def run(model_id, sample_count, config_name):
98
  score, details = evaluate(model_id, sample_count, config_name)
99
- formatted = "
100
-
101
- ".join([
102
- f"### Question:
103
- {q}
104
-
105
- **Model Answer:** {o}
106
- **Expected:** {a}
107
- **Predicted:** {g}
108
- **Correct:** {c}"
109
  for q, o, a, g, c in details
110
  ])
111
  accuracy_value = float(score.split()[1][:-1])
112
  record = {"model_id": model_id, "subject": config_name, "accuracy": accuracy_value}
113
  with open("eval.jsonl", "a") as f:
114
- f.write(json.dumps(record) + "
115
- ")
116
  return score, formatted
117
 
118
  def save_text(text):
@@ -151,25 +141,24 @@ with gr.Blocks(css="body {font-family: Inter, sans-serif; padding: 1em; max-widt
151
  leaderboard_table = gr.Dataframe(headers=["Model ID", "Average Accuracy"], interactive=False, datatype=["str", "number"], row_count=20, col_count=2)
152
 
153
  def load_leaderboard():
154
- try:
155
- df = pd.read_json("eval.jsonl", lines=True)
156
- df_avg = df.groupby("model_id")["accuracy"].mean().reset_index()
157
- df_avg.columns = ["model_id", "average_accuracy"]
158
- df_sorted = df_avg.sort_values(by="average_accuracy", ascending=False)
159
- top10 = df_sorted.head(10)
160
-
161
- fig, ax = plt.subplots()
162
- ax.barh(top10['model_id'], top10['average_accuracy'])
163
- ax.set_xlabel("Average Accuracy")
164
- ax.set_ylabel("Model")
165
- ax.set_title("Top 10 Models by Average Accuracy")
166
-
167
- return fig, df_sorted
168
- except Exception as e:
169
- return plt.figure(), pd.DataFrame(columns=["model_id", "average_accuracy"])
170
  except Exception as e:
171
- return plt.figure(), pd.DataFrame(columns=["model_id", "subject", "accuracy"])
 
172
 
173
  demo.load(load_leaderboard, inputs=[], outputs=[leaderboard_plot, leaderboard_table])
174
 
175
- demo.launch()
 
70
  accuracy = correct / len(dataset) * 100
71
  record = {"model_id": model_id, "subject": subject, "accuracy": accuracy}
72
  with open("eval.jsonl", "a") as f:
73
+ f.write(json.dumps(record) + "\n") # Fixed: added closing double quote and newline
 
74
  total_correct += correct
75
  total_samples += len(dataset)
76
  avg_accuracy = total_correct / total_samples * 100
 
95
 
96
  def run(model_id, sample_count, config_name):
97
  score, details = evaluate(model_id, sample_count, config_name)
98
+ formatted = "\n\n".join([
99
+ f"### Question:\n{q}\n\n**Model Answer:** {o}\n**Expected:** {a}\n**Predicted:** {g}\n**Correct:** {c}"
 
 
 
 
 
 
 
 
100
  for q, o, a, g, c in details
101
  ])
102
  accuracy_value = float(score.split()[1][:-1])
103
  record = {"model_id": model_id, "subject": config_name, "accuracy": accuracy_value}
104
  with open("eval.jsonl", "a") as f:
105
+ f.write(json.dumps(record) + "\n") # Fixed: added closing double quote and newline
 
106
  return score, formatted
107
 
108
  def save_text(text):
 
141
  leaderboard_table = gr.Dataframe(headers=["Model ID", "Average Accuracy"], interactive=False, datatype=["str", "number"], row_count=20, col_count=2)
142
 
143
  def load_leaderboard():
144
+ try:
145
+ df = pd.read_json("eval.jsonl", lines=True)
146
+ df_avg = df.groupby("model_id")["accuracy"].mean().reset_index()
147
+ df_avg.columns = ["model_id", "average_accuracy"]
148
+ df_sorted = df_avg.sort_values(by="average_accuracy", ascending=False)
149
+ top10 = df_sorted.head(10)
150
+
151
+ fig, ax = plt.subplots()
152
+ ax.barh(top10['model_id'], top10['average_accuracy'])
153
+ ax.set_xlabel("Average Accuracy")
154
+ ax.set_ylabel("Model")
155
+ ax.set_title("Top 10 Models by Average Accuracy")
156
+
157
+ return fig, df_sorted
 
 
158
  except Exception as e:
159
+ # Handle the case where eval.jsonl might not exist yet
160
+ return plt.figure(), pd.DataFrame(columns=["model_id", "average_accuracy"]) # Corrected columns
161
 
162
  demo.load(load_leaderboard, inputs=[], outputs=[leaderboard_plot, leaderboard_table])
163
 
164
+ demo.launch()