helliun commited on
Commit
5325678
·
verified ·
1 Parent(s): a8cd06d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -0
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from openai import OpenAI
3
+ import json
4
+ import gradio as gr
5
+
6
+ client = OpenAI()
7
+
8
+ def generate_questions(category):
9
+ if category == "":
10
+ category = "general knowledge"
11
+ print(category)
12
+ response = client.chat.completions.create(
13
+ model="gpt-4o-mini",
14
+ messages=[
15
+ {
16
+ "role": "user",
17
+ "content": [
18
+ {
19
+ "type": "text",
20
+ "text": "Break the category \""+category+"\" into 6 subcategories, and for each subcategory create 5 True/False questions ranging from a question a Beginner would know to a question only an Expert would know. There should be as many True as False, and the structure of the questions should not make it obvious which is the answer. The harder questions should be tricky and trick non-experts into saying the wrong thing. Provide the correct answers and a field with a 1 sentence explanation. This will total out to 30 questions. Output just a JSON, nothing else. Here's an example JSON output for \"nutrition\":\n\n```json\n{\n \"Macronutrients\": [\n {\n \"question\": \"Protein is one of the three primary macronutrients.\",\n \"answer\": True,\n \"explanation\": \"Protein is one of the three primary macronutrients, along with carbohydrates and fats.\"\n },\n {\n \"question\": \"Carbohydrates are the body's main source of energy.\",\n \"answer\": True,\n \"explanation\": \"Carbohydrates are typically the body's preferred energy source.\"\n },\n {\n \"question\": \"Fats have the same caloric content per gram as carbohydrates.\",\n \"answer\": False,\n \"explanation\": \"Fats have 9 calories per gram, while carbohydrates have 4 calories per gram.\"\n },\n {\n \"question\": \"All proteins are equally effective for muscle growth.\",\n \"answer\": False,\n \"explanation\": \"Different proteins have varying amino acid profiles and bioavailability, affecting their effectiveness.\"\n },\n {\n \"question\": \"Omega-3 fatty acids are a type of fat that can reduce inflammation.\",\n \"answer\": True,\n \"explanation\": \"Omega-3 fatty acids, found in foods like fish, are known to have anti-inflammatory properties.\"\n }\n ],\n \"Micronutrients\": [\n { ..."
21
+ }
22
+ ]
23
+ }
24
+ ],
25
+ response_format={ "type": "json_object" },
26
+ temperature=1,
27
+ max_tokens=4071,
28
+ top_p=1,
29
+ frequency_penalty=0,
30
+ presence_penalty=0
31
+ )
32
+ return json.loads(response.choices[0].message.content)
33
+
34
+ # Function to calculate MetaCog score
35
+ def calculate_meta_cog_score(df):
36
+ df['Correct'] = df['User Answer'] == df['Correct Answer']
37
+ df['C'] = df['Correct'].apply(lambda x: 1 if x else -1)
38
+ n = len(df)
39
+ sum_C_Conf = (df['C'] * df['Confidence']).sum()
40
+ meta_cog_ratio = 0.5 + (sum_C_Conf / (2 * n))
41
+ return meta_cog_ratio
42
+
43
+ def display_current_question(questions, index):
44
+ if index < len(questions):
45
+ question = questions[index]
46
+ return (
47
+ f"**Question {index + 1}:** {question['question']}",
48
+ None, None, True
49
+ )
50
+ else:
51
+ return ("", None, None, False)
52
+
53
+ def calculate_scores(df):
54
+ df['Correct'] = df['User Answer'] == df['Correct Answer']
55
+ df['C'] = df['Correct'].apply(lambda x: 1 if x else 0)
56
+
57
+ # Expected score based on confidence
58
+ df['Expected Score'] = df['Confidence']
59
+ df['Actual Score'] = df['C']
60
+
61
+ # Difference between expected and actual scores
62
+ df['Overconfidence'] = (df['Expected Score'] > df['Actual Score']).astype(float) * (df['Expected Score'] - df['Actual Score'])
63
+ df['Underconfidence'] = (df['Expected Score'] < df['Actual Score']).astype(float) * (df['Actual Score'] - df['Expected Score'])
64
+
65
+ n = len(df)
66
+ sum_C_Conf = (df['C'] * df['Confidence']).sum()
67
+ meta_cog_ratio = 0.5 + (sum_C_Conf / (2 * n))
68
+
69
+ accuracy = df['Correct'].mean()
70
+ overconfidence = df['Overconfidence'].sum() / n
71
+ underconfidence = df['Underconfidence'].sum() / n
72
+
73
+ return {
74
+ 'MetaCog Score': f"{round(meta_cog_ratio * 100, 0)}%",
75
+ 'Accuracy': f"{round(accuracy * 100, 0)}%",
76
+ 'Overconfidence': f"{round(overconfidence * 100, 0)}%",
77
+ 'Underconfidence': f"{round(underconfidence * 100, 0)}%"
78
+ }
79
+
80
+ # Function to analyze results using GPT-4o-mini
81
+ def analyze_results(df, overall_scores, subcategory_scores):
82
+ # Prepare the data for analysis
83
+ questions = df['Question'].tolist()
84
+ correct_answers = df['Correct Answer'].tolist()
85
+ user_answers = df['User Answer'].tolist()
86
+ explanations = df['Explanation'].tolist()
87
+ confidence = df['Confidence'].tolist()
88
+ subcategories = df['Subcategory'].tolist()
89
+
90
+ # Generate a summary of the results
91
+ response = client.chat.completions.create(
92
+ model="gpt-4o-mini",
93
+ messages=[
94
+ {
95
+ "role": "user",
96
+ "content": f"""
97
+ Analyze the following quiz results:
98
+ - Overall MetaCog Score: {overall_scores['MetaCog Score']}
99
+ - Overall Accuracy: {overall_scores['Accuracy']}
100
+ - Overall Overconfidence: {overall_scores['Overconfidence']}
101
+ - Overall Underconfidence: {overall_scores['Underconfidence']}
102
+
103
+ Subcategory scores:
104
+ {subcategory_scores}
105
+
106
+ The following is a list of my answers and confidence levels for each question, with the correct answers and subcategory:
107
+ {list(zip(questions, user_answers, correct_answers, explanations, confidence, subcategories))}
108
+
109
+ Provide an analysis of what I got wrong in terms of overall sections and specific questions, as well as what I was overconfident and underconfident in. Don't use numbers, as they're already displayed elsewhere.
110
+ The analysis should be only about 2 paragraphs. Write the subcategory names in bold when you use them.
111
+ """
112
+ }
113
+ ],
114
+ # response_format={ "type": "json_object" },
115
+ temperature=0.7,
116
+ max_tokens=1024,
117
+ top_p=1,
118
+ frequency_penalty=0,
119
+ presence_penalty=0
120
+ )
121
+
122
+ analysis = response.choices[0].message.content
123
+
124
+ # Start the table with larger column titles using <b> for bold and <span> for custom styling
125
+ question_details = (
126
+ "<table><thead><tr>"
127
+ "<th><b><span style='font-size:16px'>Question</span></b></th>"
128
+ "<th><b><span style='font-size:16px'>User Answer</span></b></th>"
129
+ "<th><b><span style='font-size:16px'>Correct Answer</span></b></th>"
130
+ "<th><b><span style='font-size:16px'>Explanation</span></b></th>"
131
+ "</tr></thead><tbody>"
132
+ )
133
+
134
+ for q, ua, ca, subcategory, e in zip(questions, user_answers, correct_answers, subcategories, explanations):
135
+ user_answer_str = 'True' if ua else 'False'
136
+ correct_answer_str = 'True' if ca else 'False'
137
+
138
+ # Check if the answer is incorrect
139
+ if ua != ca:
140
+ question_details += (
141
+ f"<tr><td><b>{q}</b></td><td><b>{user_answer_str}</b></td>"
142
+ f"<td><b>{correct_answer_str}</b></td><td><b>{e}</b></td></tr>"
143
+ )
144
+ else:
145
+ question_details += (
146
+ f"<tr><td>{q}</td><td>{user_answer_str}</td>"
147
+ f"<td>{correct_answer_str}</td><td>{e}</td></tr>"
148
+ )
149
+
150
+ question_details += "</tbody></table>"
151
+
152
+ return f"## Analysis of Results\n\n{analysis}\n\n## Detailed Questions and Answers\n\n{question_details}"
153
+
154
+
155
+
156
+ # Modify the submit_answer function to include analysis
157
+ def submit_answer(category, questions, index, user_answer, confidence, user_answers):
158
+ question_data = questions[index]
159
+ subcategory = question_data["subcategory"]
160
+
161
+ user_answers.append({
162
+ "Question": question_data["question"],
163
+ "Explanation": question_data["explanation"],
164
+ "User Answer": user_answer == "True",
165
+ "Correct Answer": question_data["answer"],
166
+ "Confidence": confidence,
167
+ "Subcategory": subcategory
168
+ })
169
+ index += 1
170
+
171
+ if index >= len(questions):
172
+ df = pd.DataFrame(user_answers)
173
+ overall_scores = calculate_scores(df)
174
+ subcategory_scores = df.groupby('Subcategory').apply(calculate_scores).to_dict()
175
+ analysis = analyze_results(df, overall_scores, subcategory_scores)
176
+
177
+ overall_score_df = pd.DataFrame([["Overall", *overall_scores.values()]], columns=['Subcategory', 'Accuracy', 'MetaCog Score', 'Overconfidence', 'Underconfidence'])
178
+ subcategory_scores_df = pd.DataFrame([(subcategory, *score.values()) for subcategory, score in subcategory_scores.items()], columns=['Subcategory', 'MetaCog Score', 'Accuracy', 'Overconfidence', 'Underconfidence'])
179
+ results_df = pd.concat([overall_score_df, subcategory_scores_df], ignore_index=True)
180
+ results_df = gr.DataFrame(label="Results", value=results_df, visible=True)
181
+ return "", index, gr.update(visible=False), user_answers, results_df, gr.update(visible=False), gr.update(visible=False), gr.update(value=analysis, visible=True)
182
+ else:
183
+ question_text, _, _, visible = display_current_question(questions, index)
184
+ return question_text, index, gr.update(visible=True), user_answers, gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
185
+
186
+ # Gradio UI setup
187
+ with gr.Blocks(theme="soft") as app:
188
+ gr.Markdown("""## Metacognition Test
189
+ See how well you know what you know. Enter a category and a 30-question quiz will be generated. Answer the True/False questions about it, and scores will then be calculated on your knowledge of the category, and on your ***knowledge of your knowledge*** of the category.""")
190
+ category_input = gr.Textbox(label="Category", placeholder="general knowledge")
191
+ submit_category = gr.Button("Generate Quiz")
192
+ question_area = gr.Markdown(visible=False)
193
+ answer_area = gr.Radio(["True", "False"], label="Your Answer", visible=False)
194
+ confidence_slider = gr.Slider(0, 1, label="Confidence Level", value=0.5, visible=False)
195
+ submit_answer_btn = gr.Button("Submit Answer", visible=False)
196
+ result_area = gr.DataFrame(label="Results", visible=False)
197
+ loading_text = gr.Textbox(label="Generating Test...", visible=False)
198
+ analysis_area = gr.Markdown(visible=False) # Add this line for analysis area
199
+ questions_state = gr.State()
200
+ index_state = gr.State(0)
201
+ user_answers_state = gr.State([])
202
+
203
+ def on_generate_quiz(category):
204
+ questions_data = generate_questions(category)
205
+
206
+ questions = []
207
+ for subcategory, qs in questions_data.items():
208
+ for q in qs:
209
+ q["subcategory"] = subcategory
210
+ questions.append(q)
211
+
212
+ import random
213
+ random.shuffle(questions)
214
+
215
+ index = 0
216
+ question_text, _, _, visible = display_current_question(questions, index)
217
+ return (
218
+ gr.update(value=question_text, visible=visible),
219
+ questions,
220
+ index,
221
+ [],
222
+ gr.update(visible=visible),
223
+ gr.update(visible=True),
224
+ gr.update(visible=True),
225
+ gr.update(visible=True),
226
+ gr.update(visible=False),
227
+ gr.update(visible=False),
228
+ gr.update(visible=False)
229
+ )
230
+
231
+ def remove_button():
232
+ return gr.update(visible=False)
233
+
234
+ def display_loading():
235
+ return gr.update(visible=True)
236
+
237
+ def display_results(index, questions):
238
+ if index >= len(questions):
239
+ return gr.update(visible=True)
240
+
241
+ submit_category.click(remove_button, inputs=[], outputs=[submit_category])
242
+ submit_category.click(display_loading, inputs=[], outputs=[loading_text])
243
+
244
+ submit_category.click(
245
+ on_generate_quiz,
246
+ inputs=[category_input],
247
+ outputs=[
248
+ question_area,
249
+ questions_state,
250
+ index_state,
251
+ user_answers_state,
252
+ question_area,
253
+ answer_area,
254
+ confidence_slider,
255
+ submit_answer_btn,
256
+ result_area,
257
+ submit_category,
258
+ loading_text
259
+ ]
260
+ )
261
+
262
+
263
+ submit_answer_btn.click(
264
+ submit_answer,
265
+ inputs=[category_input, questions_state, index_state, answer_area, confidence_slider, user_answers_state],
266
+ outputs=[question_area, index_state, submit_answer_btn, user_answers_state, result_area, confidence_slider, answer_area, analysis_area] # Add analysis_area here
267
+ )
268
+
269
+ submit_answer_btn.click(display_results, inputs=[index_state, questions_state], outputs=[result_area]
270
+
271
+ # Launch the app
272
+ app.launch(share=False)