Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -14,83 +14,87 @@ def parse_questions(content):
|
|
14 |
"""Parse questions and their solutions from Claude's output"""
|
15 |
# Split content into questions
|
16 |
questions = []
|
17 |
-
|
18 |
-
question_pattern = re.compile(r'\d+\)')
|
19 |
|
20 |
-
#
|
21 |
-
parts
|
22 |
-
if len(parts) > 1: # Skip the first empty part if it exists
|
23 |
parts = parts[1:]
|
24 |
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
27 |
try:
|
28 |
-
# Split into problem and solution
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
# Extract the final numerical answer if possible
|
35 |
-
# This is a simple example - you'll need to adjust based on your output format
|
36 |
-
final_answer = re.search(r'=\s*([-+]?\d*\.?\d+)', solution)
|
37 |
-
if final_answer:
|
38 |
-
final_answer = final_answer.group(1)
|
39 |
-
else:
|
40 |
-
final_answer = "Not found"
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
except Exception as e:
|
44 |
-
print(f"Error parsing question: {e}")
|
45 |
continue
|
46 |
|
47 |
return questions
|
48 |
|
49 |
-
def verify_solution(problem,
|
50 |
"""Verify a mathematical solution using Wolfram Alpha"""
|
51 |
try:
|
52 |
-
#
|
53 |
-
query =
|
54 |
result = wolfram_client.query(query)
|
55 |
|
56 |
-
#
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
return {
|
63 |
-
'verified':
|
64 |
-
'wolfram_solution':
|
65 |
-
'
|
66 |
}
|
67 |
except Exception as e:
|
68 |
return {
|
69 |
'verified': False,
|
70 |
-
'
|
71 |
-
'
|
72 |
}
|
73 |
|
74 |
-
def compare_solutions(wolfram_sol, claude_sol):
|
75 |
-
"""Compare two solutions for mathematical equivalence"""
|
76 |
-
try:
|
77 |
-
# Convert both solutions to floats for comparison
|
78 |
-
w_val = float(wolfram_sol)
|
79 |
-
c_val = float(claude_sol)
|
80 |
-
return abs(w_val - c_val) < 0.001
|
81 |
-
except (ValueError, TypeError):
|
82 |
-
return False
|
83 |
-
|
84 |
def generate_test(subject):
|
85 |
"""Generate and verify a math test"""
|
86 |
try:
|
87 |
-
|
88 |
-
system_prompt = """Generate 3 university-level math questions with numerical solutions that can be verified.
|
89 |
For each question:
|
90 |
-
1.
|
91 |
-
2.
|
92 |
-
3.
|
93 |
-
|
|
|
|
|
94 |
|
95 |
message = anthropic.messages.create(
|
96 |
model="claude-3-opus-20240229",
|
@@ -98,28 +102,30 @@ def generate_test(subject):
|
|
98 |
temperature=0.7,
|
99 |
messages=[{
|
100 |
"role": "user",
|
101 |
-
"content": f"{system_prompt}\n\nWrite an exam for {subject}."
|
102 |
}]
|
103 |
)
|
104 |
|
105 |
-
#
|
106 |
content = message.content[0].text
|
|
|
107 |
|
108 |
# Add verification results
|
|
|
109 |
verification_results = []
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
verification_note += f"\nQuestion {i+1}:\n"
|
116 |
if result['verified']:
|
117 |
verification_note += "✅ Solution verified by Wolfram Alpha\n"
|
118 |
else:
|
119 |
verification_note += "⚠️ Solution needs verification\n"
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
123 |
|
124 |
# Add usage statistics
|
125 |
usage_stats = f"""
|
@@ -133,47 +139,11 @@ def generate_test(subject):
|
|
133 |
• Wolfram API calls: {len(verification_results)}
|
134 |
"""
|
135 |
|
136 |
-
|
|
|
|
|
137 |
|
138 |
except Exception as e:
|
139 |
return f"Error: {str(e)}"
|
140 |
|
141 |
-
#
|
142 |
-
subjects = [
|
143 |
-
"Single Variable Calculus",
|
144 |
-
"Multivariable Calculus",
|
145 |
-
"Linear Algebra",
|
146 |
-
"Differential Equations",
|
147 |
-
"Real Analysis",
|
148 |
-
"Complex Analysis",
|
149 |
-
"Abstract Algebra",
|
150 |
-
"Probability Theory",
|
151 |
-
"Numerical Analysis",
|
152 |
-
"Topology"
|
153 |
-
]
|
154 |
-
|
155 |
-
# Create Gradio interface
|
156 |
-
interface = gr.Interface(
|
157 |
-
fn=generate_test,
|
158 |
-
inputs=gr.Dropdown(
|
159 |
-
choices=subjects,
|
160 |
-
label="Select Mathematics Subject",
|
161 |
-
info="Choose a subject for the exam questions"
|
162 |
-
),
|
163 |
-
outputs=gr.Markdown(
|
164 |
-
label="Generated Test",
|
165 |
-
latex_delimiters=[
|
166 |
-
{"left": "$$", "right": "$$", "display": True},
|
167 |
-
{"left": "$", "right": "$", "display": False}
|
168 |
-
]
|
169 |
-
),
|
170 |
-
title="Advanced Mathematics Test Generator",
|
171 |
-
description="""Generates university-level mathematics exam questions with solutions using Claude 3 Opus.
|
172 |
-
Limited to 25 requests per day. Please use responsibly.""",
|
173 |
-
theme="default",
|
174 |
-
allow_flagging="never"
|
175 |
-
)
|
176 |
-
|
177 |
-
# Launch the interface
|
178 |
-
if __name__ == "__main__":
|
179 |
-
interface.launch()
|
|
|
14 |
"""Parse questions and their solutions from Claude's output"""
|
15 |
# Split content into questions
|
16 |
questions = []
|
17 |
+
parts = re.split(r'\n\s*(\d+)\)', content)
|
|
|
18 |
|
19 |
+
# Remove any empty first element
|
20 |
+
if parts[0].strip() == '':
|
|
|
21 |
parts = parts[1:]
|
22 |
|
23 |
+
# Process pairs of question numbers and content
|
24 |
+
for i in range(0, len(parts)-1, 2):
|
25 |
+
question_num = parts[i]
|
26 |
+
question_content = parts[i+1]
|
27 |
+
|
28 |
try:
|
29 |
+
# Split into problem and solution
|
30 |
+
if "Solution:" in question_content:
|
31 |
+
problem, solution = question_content.split("Solution:", 1)
|
32 |
+
else:
|
33 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# Clean up the problem and solution
|
36 |
+
problem = problem.strip()
|
37 |
+
solution = solution.strip()
|
38 |
+
|
39 |
+
# Extract the final answer (look for last equation or stated answer)
|
40 |
+
final_answer_match = re.search(r'(?:final answer|=)\s*[-+]?(?:\d*\.)?\d+', solution.lower())
|
41 |
+
if final_answer_match:
|
42 |
+
final_answer = re.findall(r'[-+]?(?:\d*\.)?\d+', final_answer_match.group())[-1]
|
43 |
+
questions.append({
|
44 |
+
'number': question_num,
|
45 |
+
'problem': problem,
|
46 |
+
'solution': solution,
|
47 |
+
'final_answer': final_answer
|
48 |
+
})
|
49 |
except Exception as e:
|
50 |
+
print(f"Error parsing question {question_num}: {e}")
|
51 |
continue
|
52 |
|
53 |
return questions
|
54 |
|
55 |
+
def verify_solution(problem, answer):
|
56 |
"""Verify a mathematical solution using Wolfram Alpha"""
|
57 |
try:
|
58 |
+
# Format the query for Wolfram Alpha
|
59 |
+
query = problem.replace('$$', '').replace('$', '') # Remove LaTeX delimiters
|
60 |
result = wolfram_client.query(query)
|
61 |
|
62 |
+
# Try to get the numerical result
|
63 |
+
for pod in result.pods:
|
64 |
+
if pod.title in ['Result', 'Solution', 'Numerical result']:
|
65 |
+
wolfram_answer = pod.text
|
66 |
+
# Extract numerical value
|
67 |
+
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
|
68 |
+
if wolfram_nums:
|
69 |
+
return {
|
70 |
+
'verified': abs(float(wolfram_nums[0]) - float(answer)) < 0.001,
|
71 |
+
'wolfram_solution': wolfram_answer,
|
72 |
+
'error': None
|
73 |
+
}
|
74 |
|
75 |
return {
|
76 |
+
'verified': False,
|
77 |
+
'wolfram_solution': None,
|
78 |
+
'error': "Could not extract numerical solution from Wolfram Alpha"
|
79 |
}
|
80 |
except Exception as e:
|
81 |
return {
|
82 |
'verified': False,
|
83 |
+
'wolfram_solution': None,
|
84 |
+
'error': str(e)
|
85 |
}
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def generate_test(subject):
|
88 |
"""Generate and verify a math test"""
|
89 |
try:
|
90 |
+
system_prompt = """Generate 3 university-level math questions that can be verified numerically.
|
|
|
91 |
For each question:
|
92 |
+
1. Number the question as 1), 2), 3)
|
93 |
+
2. State the problem clearly using simple $$ for displayed math
|
94 |
+
3. Include "Solution:" before the solution
|
95 |
+
4. Show step-by-step work
|
96 |
+
5. End each solution with "Final answer = [number]"
|
97 |
+
6. Keep problems relatively simple (basic calculus, algebra, etc.)"""
|
98 |
|
99 |
message = anthropic.messages.create(
|
100 |
model="claude-3-opus-20240229",
|
|
|
102 |
temperature=0.7,
|
103 |
messages=[{
|
104 |
"role": "user",
|
105 |
+
"content": f"{system_prompt}\n\nWrite an exam for {subject} with simple numerical answers."
|
106 |
}]
|
107 |
)
|
108 |
|
109 |
+
# Get the content and parse questions
|
110 |
content = message.content[0].text
|
111 |
+
questions = parse_questions(content)
|
112 |
|
113 |
# Add verification results
|
114 |
+
verification_note = "\n\n---\n## Solution Verification:\n"
|
115 |
verification_results = []
|
116 |
|
117 |
+
for q in questions:
|
118 |
+
result = verify_solution(q['problem'], q['final_answer'])
|
119 |
+
verification_results.append(result)
|
120 |
+
verification_note += f"\nQuestion {q['number']}:\n"
|
|
|
121 |
if result['verified']:
|
122 |
verification_note += "✅ Solution verified by Wolfram Alpha\n"
|
123 |
else:
|
124 |
verification_note += "⚠️ Solution needs verification\n"
|
125 |
+
if result['wolfram_solution']:
|
126 |
+
verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n"
|
127 |
+
if result['error']:
|
128 |
+
verification_note += f"Note: {result['error']}\n"
|
129 |
|
130 |
# Add usage statistics
|
131 |
usage_stats = f"""
|
|
|
139 |
• Wolfram API calls: {len(verification_results)}
|
140 |
"""
|
141 |
|
142 |
+
# Combine everything with proper spacing
|
143 |
+
final_output = content + "\n\n" + verification_note + usage_stats
|
144 |
+
return final_output
|
145 |
|
146 |
except Exception as e:
|
147 |
return f"Error: {str(e)}"
|
148 |
|
149 |
+
# Rest of your code (subjects and interface) remains the same...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|