Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from openai import OpenAI
|
3 |
+
import json
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
client = OpenAI()
|
7 |
+
|
8 |
+
def generate_questions(category):
|
9 |
+
if category == "":
|
10 |
+
category = "general knowledge"
|
11 |
+
print(category)
|
12 |
+
response = client.chat.completions.create(
|
13 |
+
model="gpt-4o-mini",
|
14 |
+
messages=[
|
15 |
+
{
|
16 |
+
"role": "user",
|
17 |
+
"content": [
|
18 |
+
{
|
19 |
+
"type": "text",
|
20 |
+
"text": "Break the category \""+category+"\" into 6 subcategories, and for each subcategory create 5 True/False questions ranging from a question a Beginner would know to a question only an Expert would know. There should be as many True as False, and the structure of the questions should not make it obvious which is the answer. The harder questions should be tricky and trick non-experts into saying the wrong thing. Provide the correct answers and a field with a 1 sentence explanation. This will total out to 30 questions. Output just a JSON, nothing else. Here's an example JSON output for \"nutrition\":\n\n```json\n{\n \"Macronutrients\": [\n {\n \"question\": \"Protein is one of the three primary macronutrients.\",\n \"answer\": True,\n \"explanation\": \"Protein is one of the three primary macronutrients, along with carbohydrates and fats.\"\n },\n {\n \"question\": \"Carbohydrates are the body's main source of energy.\",\n \"answer\": True,\n \"explanation\": \"Carbohydrates are typically the body's preferred energy source.\"\n },\n {\n \"question\": \"Fats have the same caloric content per gram as carbohydrates.\",\n \"answer\": False,\n \"explanation\": \"Fats have 9 calories per gram, while carbohydrates have 4 calories per gram.\"\n },\n {\n \"question\": \"All proteins are equally effective for muscle growth.\",\n \"answer\": False,\n \"explanation\": \"Different proteins have varying amino acid profiles and bioavailability, affecting their effectiveness.\"\n },\n {\n \"question\": \"Omega-3 fatty acids are a type of fat that can reduce inflammation.\",\n \"answer\": True,\n \"explanation\": \"Omega-3 fatty acids, found in foods like fish, are known to have anti-inflammatory properties.\"\n }\n ],\n \"Micronutrients\": [\n { ..."
|
21 |
+
}
|
22 |
+
]
|
23 |
+
}
|
24 |
+
],
|
25 |
+
response_format={ "type": "json_object" },
|
26 |
+
temperature=1,
|
27 |
+
max_tokens=4071,
|
28 |
+
top_p=1,
|
29 |
+
frequency_penalty=0,
|
30 |
+
presence_penalty=0
|
31 |
+
)
|
32 |
+
return json.loads(response.choices[0].message.content)
|
33 |
+
|
34 |
+
# Function to calculate MetaCog score
|
35 |
+
def calculate_meta_cog_score(df):
|
36 |
+
df['Correct'] = df['User Answer'] == df['Correct Answer']
|
37 |
+
df['C'] = df['Correct'].apply(lambda x: 1 if x else -1)
|
38 |
+
n = len(df)
|
39 |
+
sum_C_Conf = (df['C'] * df['Confidence']).sum()
|
40 |
+
meta_cog_ratio = 0.5 + (sum_C_Conf / (2 * n))
|
41 |
+
return meta_cog_ratio
|
42 |
+
|
43 |
+
def display_current_question(questions, index):
|
44 |
+
if index < len(questions):
|
45 |
+
question = questions[index]
|
46 |
+
return (
|
47 |
+
f"**Question {index + 1}:** {question['question']}",
|
48 |
+
None, None, True
|
49 |
+
)
|
50 |
+
else:
|
51 |
+
return ("", None, None, False)
|
52 |
+
|
53 |
+
def calculate_scores(df):
|
54 |
+
df['Correct'] = df['User Answer'] == df['Correct Answer']
|
55 |
+
df['C'] = df['Correct'].apply(lambda x: 1 if x else 0)
|
56 |
+
|
57 |
+
# Expected score based on confidence
|
58 |
+
df['Expected Score'] = df['Confidence']
|
59 |
+
df['Actual Score'] = df['C']
|
60 |
+
|
61 |
+
# Difference between expected and actual scores
|
62 |
+
df['Overconfidence'] = (df['Expected Score'] > df['Actual Score']).astype(float) * (df['Expected Score'] - df['Actual Score'])
|
63 |
+
df['Underconfidence'] = (df['Expected Score'] < df['Actual Score']).astype(float) * (df['Actual Score'] - df['Expected Score'])
|
64 |
+
|
65 |
+
n = len(df)
|
66 |
+
sum_C_Conf = (df['C'] * df['Confidence']).sum()
|
67 |
+
meta_cog_ratio = 0.5 + (sum_C_Conf / (2 * n))
|
68 |
+
|
69 |
+
accuracy = df['Correct'].mean()
|
70 |
+
overconfidence = df['Overconfidence'].sum() / n
|
71 |
+
underconfidence = df['Underconfidence'].sum() / n
|
72 |
+
|
73 |
+
return {
|
74 |
+
'MetaCog Score': f"{round(meta_cog_ratio * 100, 0)}%",
|
75 |
+
'Accuracy': f"{round(accuracy * 100, 0)}%",
|
76 |
+
'Overconfidence': f"{round(overconfidence * 100, 0)}%",
|
77 |
+
'Underconfidence': f"{round(underconfidence * 100, 0)}%"
|
78 |
+
}
|
79 |
+
|
80 |
+
# Function to analyze results using GPT-4o-mini
|
81 |
+
def analyze_results(df, overall_scores, subcategory_scores):
|
82 |
+
# Prepare the data for analysis
|
83 |
+
questions = df['Question'].tolist()
|
84 |
+
correct_answers = df['Correct Answer'].tolist()
|
85 |
+
user_answers = df['User Answer'].tolist()
|
86 |
+
explanations = df['Explanation'].tolist()
|
87 |
+
confidence = df['Confidence'].tolist()
|
88 |
+
subcategories = df['Subcategory'].tolist()
|
89 |
+
|
90 |
+
# Generate a summary of the results
|
91 |
+
response = client.chat.completions.create(
|
92 |
+
model="gpt-4o-mini",
|
93 |
+
messages=[
|
94 |
+
{
|
95 |
+
"role": "user",
|
96 |
+
"content": f"""
|
97 |
+
Analyze the following quiz results:
|
98 |
+
- Overall MetaCog Score: {overall_scores['MetaCog Score']}
|
99 |
+
- Overall Accuracy: {overall_scores['Accuracy']}
|
100 |
+
- Overall Overconfidence: {overall_scores['Overconfidence']}
|
101 |
+
- Overall Underconfidence: {overall_scores['Underconfidence']}
|
102 |
+
|
103 |
+
Subcategory scores:
|
104 |
+
{subcategory_scores}
|
105 |
+
|
106 |
+
The following is a list of my answers and confidence levels for each question, with the correct answers and subcategory:
|
107 |
+
{list(zip(questions, user_answers, correct_answers, explanations, confidence, subcategories))}
|
108 |
+
|
109 |
+
Provide an analysis of what I got wrong in terms of overall sections and specific questions, as well as what I was overconfident and underconfident in. Don't use numbers, as they're already displayed elsewhere.
|
110 |
+
The analysis should be only about 2 paragraphs. Write the subcategory names in bold when you use them.
|
111 |
+
"""
|
112 |
+
}
|
113 |
+
],
|
114 |
+
# response_format={ "type": "json_object" },
|
115 |
+
temperature=0.7,
|
116 |
+
max_tokens=1024,
|
117 |
+
top_p=1,
|
118 |
+
frequency_penalty=0,
|
119 |
+
presence_penalty=0
|
120 |
+
)
|
121 |
+
|
122 |
+
analysis = response.choices[0].message.content
|
123 |
+
|
124 |
+
# Start the table with larger column titles using <b> for bold and <span> for custom styling
|
125 |
+
question_details = (
|
126 |
+
"<table><thead><tr>"
|
127 |
+
"<th><b><span style='font-size:16px'>Question</span></b></th>"
|
128 |
+
"<th><b><span style='font-size:16px'>User Answer</span></b></th>"
|
129 |
+
"<th><b><span style='font-size:16px'>Correct Answer</span></b></th>"
|
130 |
+
"<th><b><span style='font-size:16px'>Explanation</span></b></th>"
|
131 |
+
"</tr></thead><tbody>"
|
132 |
+
)
|
133 |
+
|
134 |
+
for q, ua, ca, subcategory, e in zip(questions, user_answers, correct_answers, subcategories, explanations):
|
135 |
+
user_answer_str = 'True' if ua else 'False'
|
136 |
+
correct_answer_str = 'True' if ca else 'False'
|
137 |
+
|
138 |
+
# Check if the answer is incorrect
|
139 |
+
if ua != ca:
|
140 |
+
question_details += (
|
141 |
+
f"<tr><td><b>{q}</b></td><td><b>{user_answer_str}</b></td>"
|
142 |
+
f"<td><b>{correct_answer_str}</b></td><td><b>{e}</b></td></tr>"
|
143 |
+
)
|
144 |
+
else:
|
145 |
+
question_details += (
|
146 |
+
f"<tr><td>{q}</td><td>{user_answer_str}</td>"
|
147 |
+
f"<td>{correct_answer_str}</td><td>{e}</td></tr>"
|
148 |
+
)
|
149 |
+
|
150 |
+
question_details += "</tbody></table>"
|
151 |
+
|
152 |
+
return f"## Analysis of Results\n\n{analysis}\n\n## Detailed Questions and Answers\n\n{question_details}"
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
# Modify the submit_answer function to include analysis
|
157 |
+
def submit_answer(category, questions, index, user_answer, confidence, user_answers):
|
158 |
+
question_data = questions[index]
|
159 |
+
subcategory = question_data["subcategory"]
|
160 |
+
|
161 |
+
user_answers.append({
|
162 |
+
"Question": question_data["question"],
|
163 |
+
"Explanation": question_data["explanation"],
|
164 |
+
"User Answer": user_answer == "True",
|
165 |
+
"Correct Answer": question_data["answer"],
|
166 |
+
"Confidence": confidence,
|
167 |
+
"Subcategory": subcategory
|
168 |
+
})
|
169 |
+
index += 1
|
170 |
+
|
171 |
+
if index >= len(questions):
|
172 |
+
df = pd.DataFrame(user_answers)
|
173 |
+
overall_scores = calculate_scores(df)
|
174 |
+
subcategory_scores = df.groupby('Subcategory').apply(calculate_scores).to_dict()
|
175 |
+
analysis = analyze_results(df, overall_scores, subcategory_scores)
|
176 |
+
|
177 |
+
overall_score_df = pd.DataFrame([["Overall", *overall_scores.values()]], columns=['Subcategory', 'Accuracy', 'MetaCog Score', 'Overconfidence', 'Underconfidence'])
|
178 |
+
subcategory_scores_df = pd.DataFrame([(subcategory, *score.values()) for subcategory, score in subcategory_scores.items()], columns=['Subcategory', 'MetaCog Score', 'Accuracy', 'Overconfidence', 'Underconfidence'])
|
179 |
+
results_df = pd.concat([overall_score_df, subcategory_scores_df], ignore_index=True)
|
180 |
+
results_df = gr.DataFrame(label="Results", value=results_df, visible=True)
|
181 |
+
return "", index, gr.update(visible=False), user_answers, results_df, gr.update(visible=False), gr.update(visible=False), gr.update(value=analysis, visible=True)
|
182 |
+
else:
|
183 |
+
question_text, _, _, visible = display_current_question(questions, index)
|
184 |
+
return question_text, index, gr.update(visible=True), user_answers, gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
|
185 |
+
|
186 |
+
# Gradio UI setup
|
187 |
+
with gr.Blocks(theme="soft") as app:
|
188 |
+
gr.Markdown("""## Metacognition Test
|
189 |
+
See how well you know what you know. Enter a category and a 30-question quiz will be generated. Answer the True/False questions about it, and scores will then be calculated on your knowledge of the category, and on your ***knowledge of your knowledge*** of the category.""")
|
190 |
+
category_input = gr.Textbox(label="Category", placeholder="general knowledge")
|
191 |
+
submit_category = gr.Button("Generate Quiz")
|
192 |
+
question_area = gr.Markdown(visible=False)
|
193 |
+
answer_area = gr.Radio(["True", "False"], label="Your Answer", visible=False)
|
194 |
+
confidence_slider = gr.Slider(0, 1, label="Confidence Level", value=0.5, visible=False)
|
195 |
+
submit_answer_btn = gr.Button("Submit Answer", visible=False)
|
196 |
+
result_area = gr.DataFrame(label="Results", visible=False)
|
197 |
+
loading_text = gr.Textbox(label="Generating Test...", visible=False)
|
198 |
+
analysis_area = gr.Markdown(visible=False) # Add this line for analysis area
|
199 |
+
questions_state = gr.State()
|
200 |
+
index_state = gr.State(0)
|
201 |
+
user_answers_state = gr.State([])
|
202 |
+
|
203 |
+
def on_generate_quiz(category):
|
204 |
+
questions_data = generate_questions(category)
|
205 |
+
|
206 |
+
questions = []
|
207 |
+
for subcategory, qs in questions_data.items():
|
208 |
+
for q in qs:
|
209 |
+
q["subcategory"] = subcategory
|
210 |
+
questions.append(q)
|
211 |
+
|
212 |
+
import random
|
213 |
+
random.shuffle(questions)
|
214 |
+
|
215 |
+
index = 0
|
216 |
+
question_text, _, _, visible = display_current_question(questions, index)
|
217 |
+
return (
|
218 |
+
gr.update(value=question_text, visible=visible),
|
219 |
+
questions,
|
220 |
+
index,
|
221 |
+
[],
|
222 |
+
gr.update(visible=visible),
|
223 |
+
gr.update(visible=True),
|
224 |
+
gr.update(visible=True),
|
225 |
+
gr.update(visible=True),
|
226 |
+
gr.update(visible=False),
|
227 |
+
gr.update(visible=False),
|
228 |
+
gr.update(visible=False)
|
229 |
+
)
|
230 |
+
|
231 |
+
def remove_button():
|
232 |
+
return gr.update(visible=False)
|
233 |
+
|
234 |
+
def display_loading():
|
235 |
+
return gr.update(visible=True)
|
236 |
+
|
237 |
+
def display_results(index, questions):
|
238 |
+
if index >= len(questions):
|
239 |
+
return gr.update(visible=True)
|
240 |
+
|
241 |
+
submit_category.click(remove_button, inputs=[], outputs=[submit_category])
|
242 |
+
submit_category.click(display_loading, inputs=[], outputs=[loading_text])
|
243 |
+
|
244 |
+
submit_category.click(
|
245 |
+
on_generate_quiz,
|
246 |
+
inputs=[category_input],
|
247 |
+
outputs=[
|
248 |
+
question_area,
|
249 |
+
questions_state,
|
250 |
+
index_state,
|
251 |
+
user_answers_state,
|
252 |
+
question_area,
|
253 |
+
answer_area,
|
254 |
+
confidence_slider,
|
255 |
+
submit_answer_btn,
|
256 |
+
result_area,
|
257 |
+
submit_category,
|
258 |
+
loading_text
|
259 |
+
]
|
260 |
+
)
|
261 |
+
|
262 |
+
|
263 |
+
submit_answer_btn.click(
|
264 |
+
submit_answer,
|
265 |
+
inputs=[category_input, questions_state, index_state, answer_area, confidence_slider, user_answers_state],
|
266 |
+
outputs=[question_area, index_state, submit_answer_btn, user_answers_state, result_area, confidence_slider, answer_area, analysis_area] # Add analysis_area here
|
267 |
+
)
|
268 |
+
|
269 |
+
submit_answer_btn.click(display_results, inputs=[index_state, questions_state], outputs=[result_area]
|
270 |
+
|
271 |
+
# Launch the app
|
272 |
+
app.launch(share=False)
|