Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,62 +12,82 @@ wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID'))
|
|
12 |
|
13 |
def parse_questions(content):
|
14 |
"""Parse questions and their solutions from Claude's output"""
|
15 |
-
# Split content into questions
|
16 |
questions = []
|
17 |
-
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
parts = parts[1:]
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
else:
|
33 |
-
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
except Exception as e:
|
50 |
-
print(f"Error parsing question {question_num}: {e}")
|
51 |
-
continue
|
52 |
|
53 |
return questions
|
54 |
|
55 |
def verify_solution(problem, answer):
|
56 |
"""Verify a mathematical solution using Wolfram Alpha"""
|
57 |
try:
|
58 |
-
#
|
59 |
-
query = problem.replace('$$', '').replace('$', '')
|
|
|
|
|
|
|
|
|
60 |
result = wolfram_client.query(query)
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
for pod in result.pods:
|
64 |
-
if pod.title in ['Result', 'Solution', 'Numerical result']:
|
65 |
wolfram_answer = pod.text
|
66 |
# Extract numerical value
|
67 |
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
|
68 |
if wolfram_nums:
|
|
|
|
|
|
|
|
|
69 |
return {
|
70 |
-
'verified':
|
71 |
'wolfram_solution': wolfram_answer,
|
72 |
'error': None
|
73 |
}
|
@@ -75,13 +95,13 @@ def verify_solution(problem, answer):
|
|
75 |
return {
|
76 |
'verified': False,
|
77 |
'wolfram_solution': None,
|
78 |
-
'error': "
|
79 |
}
|
80 |
except Exception as e:
|
81 |
return {
|
82 |
'verified': False,
|
83 |
'wolfram_solution': None,
|
84 |
-
'error': str(e)
|
85 |
}
|
86 |
|
87 |
def generate_test(subject):
|
@@ -94,7 +114,9 @@ def generate_test(subject):
|
|
94 |
3. Include "Solution:" before the solution
|
95 |
4. Show step-by-step work
|
96 |
5. End each solution with "Final answer = [number]"
|
97 |
-
6. Keep problems relatively simple (basic calculus, algebra, etc.)
|
|
|
|
|
98 |
|
99 |
message = anthropic.messages.create(
|
100 |
model="claude-3-opus-20240229",
|
@@ -115,17 +137,20 @@ def generate_test(subject):
|
|
115 |
verification_results = []
|
116 |
|
117 |
for q in questions:
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
else:
|
124 |
-
verification_note += "⚠️
|
125 |
-
if result['wolfram_solution']:
|
126 |
-
verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n"
|
127 |
-
if result['error']:
|
128 |
-
verification_note += f"Note: {result['error']}\n"
|
129 |
|
130 |
# Add usage statistics
|
131 |
usage_stats = f"""
|
|
|
12 |
|
13 |
def parse_questions(content):
|
14 |
"""Parse questions and their solutions from Claude's output"""
|
|
|
15 |
questions = []
|
16 |
+
current_question = {}
|
17 |
|
18 |
+
# Split content into lines for more reliable parsing
|
19 |
+
lines = content.split('\n')
|
|
|
20 |
|
21 |
+
for line in lines:
|
22 |
+
# Start of new question
|
23 |
+
if re.match(r'^\s*\d+\)', line):
|
24 |
+
if current_question:
|
25 |
+
questions.append(current_question)
|
26 |
+
current_question = {
|
27 |
+
'number': re.match(r'^\s*(\d+)\)', line).group(1),
|
28 |
+
'problem': line.split(')', 1)[1].strip(),
|
29 |
+
'solution': '',
|
30 |
+
'final_answer': None
|
31 |
+
}
|
32 |
+
# Solution marker
|
33 |
+
elif 'Solution:' in line and current_question:
|
34 |
+
current_question['problem'] = current_question['problem'].strip()
|
35 |
+
current_question['solution'] = line.split('Solution:', 1)[1].strip()
|
36 |
+
# Add to current problem or solution
|
37 |
+
elif current_question:
|
38 |
+
if current_question['solution']:
|
39 |
+
current_question['solution'] += '\n' + line
|
40 |
else:
|
41 |
+
current_question['problem'] += '\n' + line
|
42 |
|
43 |
+
# Extract final answer
|
44 |
+
if current_question and 'final answer' in line.lower():
|
45 |
+
matches = re.findall(r'[-+]?(?:\d*\.)?\d+', line)
|
46 |
+
if matches:
|
47 |
+
current_question['final_answer'] = matches[-1]
|
48 |
+
|
49 |
+
# Add last question
|
50 |
+
if current_question:
|
51 |
+
questions.append(current_question)
|
52 |
+
|
53 |
+
# Clean up questions
|
54 |
+
for q in questions:
|
55 |
+
q['problem'] = q['problem'].strip()
|
56 |
+
q['solution'] = q['solution'].strip()
|
|
|
|
|
|
|
57 |
|
58 |
return questions
|
59 |
|
60 |
def verify_solution(problem, answer):
|
61 |
"""Verify a mathematical solution using Wolfram Alpha"""
|
62 |
try:
|
63 |
+
# Clean up the problem for Wolfram Alpha
|
64 |
+
query = problem.replace('$$', '').replace('$', '')
|
65 |
+
# Remove any text instructions, keep only the mathematical expression
|
66 |
+
query = re.sub(r'(?i)find|calculate|solve|evaluate|determine', '', query)
|
67 |
+
query = query.strip()
|
68 |
+
|
69 |
result = wolfram_client.query(query)
|
70 |
|
71 |
+
if not result.success:
|
72 |
+
return {
|
73 |
+
'verified': False,
|
74 |
+
'wolfram_solution': None,
|
75 |
+
'error': "Wolfram Alpha could not process the query"
|
76 |
+
}
|
77 |
+
|
78 |
+
# Look for numerical results in multiple pods
|
79 |
for pod in result.pods:
|
80 |
+
if pod.title in ['Result', 'Solution', 'Numerical result', 'Decimal approximation']:
|
81 |
wolfram_answer = pod.text
|
82 |
# Extract numerical value
|
83 |
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
|
84 |
if wolfram_nums:
|
85 |
+
wolfram_value = float(wolfram_nums[0])
|
86 |
+
user_value = float(answer)
|
87 |
+
# Allow for small numerical differences
|
88 |
+
is_verified = abs(wolfram_value - user_value) < 0.01
|
89 |
return {
|
90 |
+
'verified': is_verified,
|
91 |
'wolfram_solution': wolfram_answer,
|
92 |
'error': None
|
93 |
}
|
|
|
95 |
return {
|
96 |
'verified': False,
|
97 |
'wolfram_solution': None,
|
98 |
+
'error': "No numerical solution found in Wolfram Alpha response"
|
99 |
}
|
100 |
except Exception as e:
|
101 |
return {
|
102 |
'verified': False,
|
103 |
'wolfram_solution': None,
|
104 |
+
'error': f"Error during verification: {str(e)}"
|
105 |
}
|
106 |
|
107 |
def generate_test(subject):
|
|
|
114 |
3. Include "Solution:" before the solution
|
115 |
4. Show step-by-step work
|
116 |
5. End each solution with "Final answer = [number]"
|
117 |
+
6. Keep problems relatively simple (basic calculus, algebra, etc.)
|
118 |
+
7. Make sure problems have clear numerical answers
|
119 |
+
8. Avoid word problems - focus on pure mathematical expressions"""
|
120 |
|
121 |
message = anthropic.messages.create(
|
122 |
model="claude-3-opus-20240229",
|
|
|
137 |
verification_results = []
|
138 |
|
139 |
for q in questions:
|
140 |
+
if q['final_answer'] is not None:
|
141 |
+
result = verify_solution(q['problem'], q['final_answer'])
|
142 |
+
verification_results.append(result)
|
143 |
+
verification_note += f"\nQuestion {q['number']}:\n"
|
144 |
+
if result['verified']:
|
145 |
+
verification_note += "✅ Solution verified by Wolfram Alpha\n"
|
146 |
+
else:
|
147 |
+
verification_note += "⚠️ Solution needs verification\n"
|
148 |
+
if result['wolfram_solution']:
|
149 |
+
verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n"
|
150 |
+
if result['error']:
|
151 |
+
verification_note += f"Note: {result['error']}\n"
|
152 |
else:
|
153 |
+
verification_note += f"\nQuestion {q['number']}:\n⚠️ Could not extract final answer\n"
|
|
|
|
|
|
|
|
|
154 |
|
155 |
# Add usage statistics
|
156 |
usage_stats = f"""
|