Spaces:
Running
Running
Update app.py
Browse filesrevert to version before solution verification
app.py
CHANGED
@@ -1,215 +1,60 @@
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
from anthropic import Anthropic
|
4 |
-
import wolframalpha
|
5 |
from datetime import datetime, timedelta
|
6 |
from collections import deque
|
7 |
-
import re
|
8 |
|
9 |
-
# Initialize
|
10 |
-
anthropic = Anthropic(
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
def
|
14 |
-
"""
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
for line in lines:
|
22 |
-
# Start of new question
|
23 |
-
if re.match(r'^\s*\d+\)', line):
|
24 |
-
if current_question:
|
25 |
-
questions.append(current_question)
|
26 |
-
current_question = {
|
27 |
-
'number': re.match(r'^\s*(\d+)\)', line).group(1),
|
28 |
-
'problem': line.split(')', 1)[1].strip(),
|
29 |
-
'solution': '',
|
30 |
-
'final_answer': None
|
31 |
-
}
|
32 |
-
# Solution marker
|
33 |
-
elif 'Solution:' in line and current_question:
|
34 |
-
current_question['problem'] = current_question['problem'].strip()
|
35 |
-
current_question['solution'] = line.split('Solution:', 1)[1].strip()
|
36 |
-
# Add to current problem or solution
|
37 |
-
elif current_question:
|
38 |
-
if current_question['solution']:
|
39 |
-
current_question['solution'] += '\n' + line
|
40 |
-
else:
|
41 |
-
current_question['problem'] += '\n' + line
|
42 |
-
|
43 |
-
# Extract final answer
|
44 |
-
if current_question and 'final answer' in line.lower():
|
45 |
-
matches = re.findall(r'[-+]?(?:\d*\.)?\d+', line)
|
46 |
-
if matches:
|
47 |
-
current_question['final_answer'] = matches[-1]
|
48 |
-
|
49 |
-
# Add last question
|
50 |
-
if current_question:
|
51 |
-
questions.append(current_question)
|
52 |
-
|
53 |
-
# Clean up questions
|
54 |
-
for q in questions:
|
55 |
-
q['problem'] = q['problem'].strip()
|
56 |
-
q['solution'] = q['solution'].strip()
|
57 |
-
|
58 |
-
return questions
|
59 |
|
60 |
-
def
|
61 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
62 |
try:
|
63 |
-
#
|
64 |
-
|
65 |
-
|
66 |
-
# Clean the problem text first
|
67 |
-
clean_problem = problem.replace('$$', '').replace('$', '').strip()
|
68 |
-
|
69 |
-
# Case 1: Definite Integral
|
70 |
-
if 'integral' in clean_problem.lower() or '∫' in clean_problem or '\int' in clean_problem:
|
71 |
-
# Use raw string for regex to avoid escape issues
|
72 |
-
integrand_match = re.search(r'(?:\int|∫)_(\d+)\^(\d+)\s*\(?([\dx+\s]+)\)?\s*dx', clean_problem, re.UNICODE)
|
73 |
-
if integrand_match:
|
74 |
-
lower, upper, integrand = integrand_match.groups()
|
75 |
-
# Clean up the integrand
|
76 |
-
integrand = integrand.replace(' ', '')
|
77 |
-
query = f"integrate {integrand} from {lower} to {upper}"
|
78 |
-
print(f"Integral query: {query}")
|
79 |
-
else:
|
80 |
-
# Fallback for simpler pattern
|
81 |
-
integrand_match = re.search(r'(?:\int|∫).*?\(([\dx+\s]+)\)\s*dx', clean_problem, re.UNICODE)
|
82 |
-
if integrand_match:
|
83 |
-
integrand = integrand_match.group(1).replace(' ', '')
|
84 |
-
query = f"integrate {integrand} from 0 to 1" # Common default bounds
|
85 |
-
print(f"Fallback integral query: {query}")
|
86 |
-
|
87 |
-
# Case 2: Simple Differentiation
|
88 |
-
elif 'derivative' in clean_problem.lower() or 'd/dx' in clean_problem:
|
89 |
-
# Look for function after equals sign or f(x) =
|
90 |
-
func_match = re.search(r'[f\(x\)\s*=\s*](.*?)$', clean_problem)
|
91 |
-
if func_match:
|
92 |
-
func = func_match.group(1).strip()
|
93 |
-
query = f"derivative of {func}"
|
94 |
-
print(f"Derivative query: {query}")
|
95 |
-
|
96 |
-
# Case 3: Mean Value Theorem
|
97 |
-
elif 'Mean Value Theorem' in clean_problem:
|
98 |
-
func_match = re.search(r'f\(x\)\s*=\s*(.*?)\s+on', clean_problem)
|
99 |
-
interval_match = re.search(r'\[(\d+),\s*(\d+)\]', clean_problem)
|
100 |
-
if func_match and interval_match:
|
101 |
-
func = func_match.group(1).strip()
|
102 |
-
a, b = interval_match.groups()
|
103 |
-
# Calculate f'(x) first
|
104 |
-
derivative_query = f"derivative of {func}"
|
105 |
-
print(f"MVT derivative query: {derivative_query}")
|
106 |
-
derivative_result = wolfram_client.query(derivative_query)
|
107 |
-
|
108 |
-
if derivative_result.success:
|
109 |
-
for pod in derivative_result.pods:
|
110 |
-
if pod.title in ['Derivative']:
|
111 |
-
derivative = pod.text
|
112 |
-
# Now calculate [f(b) - f(a)]/(b-a)
|
113 |
-
query = f"solve {derivative} = ({func.replace('x', b)} - {func.replace('x', a)})/({b} - {a})"
|
114 |
-
print(f"MVT final query: {query}")
|
115 |
-
break
|
116 |
-
|
117 |
-
# Ensure query is not empty
|
118 |
-
if not query.strip():
|
119 |
-
return {
|
120 |
-
'verified': False,
|
121 |
-
'wolfram_solution': None,
|
122 |
-
'error': "Could not generate valid query from problem"
|
123 |
-
}
|
124 |
-
|
125 |
-
print(f"Final query to Wolfram Alpha: {query}")
|
126 |
-
result = wolfram_client.query(query)
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
'wolfram_solution': None,
|
132 |
-
'error': f"Wolfram Alpha could not process query: {query}"
|
133 |
-
}
|
134 |
|
135 |
-
#
|
136 |
-
|
137 |
-
if pod.title in ['Result', 'Solution', 'Numerical result', 'Decimal approximation', 'Definite integral', 'Solutions']:
|
138 |
-
wolfram_answer = pod.text
|
139 |
-
print(f"Wolfram pod {pod.title}: {wolfram_answer}")
|
140 |
-
|
141 |
-
# For MVT problems, handle sqrt expressions
|
142 |
-
if 'Mean Value Theorem' in clean_problem:
|
143 |
-
# Convert both answers to decimal for comparison
|
144 |
-
if 'sqrt' in str(answer).lower():
|
145 |
-
# Convert sqrt expression to decimal
|
146 |
-
sqrt_match = re.search(r'sqrt\((\d+)/(\d+)\)', str(answer))
|
147 |
-
if sqrt_match:
|
148 |
-
num, denom = map(float, sqrt_match.groups())
|
149 |
-
user_value = (num/denom)**0.5
|
150 |
-
# Look for decimal in Wolfram result
|
151 |
-
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
|
152 |
-
if wolfram_nums:
|
153 |
-
wolfram_value = float(wolfram_nums[0])
|
154 |
-
is_verified = abs(wolfram_value - user_value) < 0.01
|
155 |
-
return {
|
156 |
-
'verified': is_verified,
|
157 |
-
'wolfram_solution': wolfram_answer,
|
158 |
-
'error': None
|
159 |
-
}
|
160 |
-
|
161 |
-
# Handle numerical answers
|
162 |
-
if str(answer).replace('.', '').isdigit():
|
163 |
-
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
|
164 |
-
if wolfram_nums:
|
165 |
-
wolfram_value = float(wolfram_nums[0])
|
166 |
-
user_value = float(answer)
|
167 |
-
is_verified = abs(wolfram_value - user_value) < 0.01
|
168 |
-
return {
|
169 |
-
'verified': is_verified,
|
170 |
-
'wolfram_solution': wolfram_answer,
|
171 |
-
'error': None
|
172 |
-
}
|
173 |
-
# Handle symbolic answers
|
174 |
-
else:
|
175 |
-
clean_wolfram = re.sub(r'\s+', '', wolfram_answer.lower())
|
176 |
-
clean_answer = re.sub(r'\s+', '', str(answer).lower())
|
177 |
-
is_verified = clean_wolfram == clean_answer
|
178 |
-
return {
|
179 |
-
'verified': is_verified,
|
180 |
-
'wolfram_solution': wolfram_answer,
|
181 |
-
'error': None
|
182 |
-
}
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
return {
|
195 |
-
'verified': False,
|
196 |
-
'wolfram_solution': None,
|
197 |
-
'error': error_msg
|
198 |
-
}
|
199 |
-
|
200 |
-
def generate_test(subject):
|
201 |
-
"""Generate and verify a math test"""
|
202 |
-
try:
|
203 |
-
system_prompt = """Generate 3 university-level math questions that can be verified numerically.
|
204 |
-
For each question:
|
205 |
-
1. Number the question as 1), 2), 3)
|
206 |
-
2. State the problem clearly using simple $$ for displayed math
|
207 |
-
3. Include "Solution:" before the solution
|
208 |
-
4. Show step-by-step work
|
209 |
-
5. End each solution with "Final answer = [number]"
|
210 |
-
6. Keep problems relatively simple (basic calculus, algebra, etc.)
|
211 |
-
7. Make sure problems have clear numerical answers
|
212 |
-
8. Avoid word problems - focus on pure mathematical expressions"""
|
213 |
|
214 |
message = anthropic.messages.create(
|
215 |
model="claude-3-opus-20240229",
|
@@ -217,53 +62,42 @@ def generate_test(subject):
|
|
217 |
temperature=0.7,
|
218 |
messages=[{
|
219 |
"role": "user",
|
220 |
-
"content": f"{system_prompt}\n\nWrite an exam for {subject}
|
221 |
}]
|
222 |
)
|
223 |
|
224 |
-
#
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
verification_results = []
|
231 |
-
|
232 |
-
for q in questions:
|
233 |
-
if q['final_answer'] is not None:
|
234 |
-
result = verify_solution(q['problem'], q['final_answer'])
|
235 |
-
verification_results.append(result)
|
236 |
-
verification_note += f"\nQuestion {q['number']}:\n"
|
237 |
-
if result['verified']:
|
238 |
-
verification_note += "✅ Solution verified by Wolfram Alpha\n"
|
239 |
-
else:
|
240 |
-
verification_note += "⚠️ Solution needs verification\n"
|
241 |
-
if result['wolfram_solution']:
|
242 |
-
verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n"
|
243 |
-
if result['error']:
|
244 |
-
verification_note += f"Note: {result['error']}\n"
|
245 |
-
else:
|
246 |
-
verification_note += f"\nQuestion {q['number']}:\n⚠️ Could not extract final answer\n"
|
247 |
|
248 |
-
# Add usage statistics
|
249 |
usage_stats = f"""
|
250 |
\n---\nUsage Statistics:
|
251 |
-
• Input Tokens: {
|
252 |
-
• Output Tokens: {
|
253 |
-
•
|
254 |
|
255 |
Cost Breakdown:
|
256 |
-
•
|
257 |
-
•
|
|
|
258 |
"""
|
259 |
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
263 |
|
|
|
|
|
264 |
except Exception as e:
|
265 |
return f"Error: {str(e)}"
|
266 |
|
|
|
267 |
subjects = [
|
268 |
"Single Variable Calculus",
|
269 |
"Multivariable Calculus",
|
@@ -301,4 +135,4 @@ interface = gr.Interface(
|
|
301 |
|
302 |
# Launch the interface
|
303 |
if __name__ == "__main__":
|
304 |
-
interface.launch()
|
|
|
1 |
+
# app.py
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
from anthropic import Anthropic
|
|
|
5 |
from datetime import datetime, timedelta
|
6 |
from collections import deque
|
|
|
7 |
|
8 |
+
# Initialize Anthropic client - will use the secret key from HuggingFace
|
9 |
+
anthropic = Anthropic(
|
10 |
+
api_key=os.environ.get('ANTHROPIC_API_KEY')
|
11 |
+
)
|
12 |
+
|
13 |
+
# Request tracking
|
14 |
+
MAX_REQUESTS_PER_DAY = 25 # Conservative limit to start
|
15 |
+
request_history = deque(maxlen=1000)
|
16 |
+
|
17 |
+
def check_api_key():
|
18 |
+
"""Verify API key is configured"""
|
19 |
+
if not os.environ.get('ANTHROPIC_API_KEY'):
|
20 |
+
raise ValueError("Anthropic API key not found. Please configure it in HuggingFace Spaces settings.")
|
21 |
|
22 |
+
def check_rate_limit():
|
23 |
+
"""Check if we're within rate limits"""
|
24 |
+
now = datetime.now()
|
25 |
+
# Remove requests older than 24 hours
|
26 |
+
while request_history and (now - request_history[0]) > timedelta(days=1):
|
27 |
+
request_history.popleft()
|
28 |
+
return len(request_history) < MAX_REQUESTS_PER_DAY
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
def clean_latex(text):
|
31 |
+
"""Simple LaTeX cleaning"""
|
32 |
+
text = text.replace('\n', '\n\n')
|
33 |
+
return text
|
34 |
+
|
35 |
+
def generate_test(subject):
|
36 |
+
"""Generate a math test with error handling and rate limiting"""
|
37 |
try:
|
38 |
+
# Check API key
|
39 |
+
check_api_key()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
# Check rate limit
|
42 |
+
if not check_rate_limit():
|
43 |
+
return "Daily request limit reached. Please try again tomorrow."
|
|
|
|
|
|
|
44 |
|
45 |
+
# Record request
|
46 |
+
request_history.append(datetime.now())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
system_prompt = """You will write math exam questions. Follow these requirements EXACTLY:
|
49 |
+
1. Write exactly 3 challenging university-level questions
|
50 |
+
2. For LaTeX math formatting:
|
51 |
+
- Use $ for simple inline math
|
52 |
+
- For equations and solution steps, use $$ on separate lines
|
53 |
+
- For multi-step solutions, put each step on its own line in $$ $$
|
54 |
+
- DO NOT use \\begin{aligned} or any other environments
|
55 |
+
3. Number each question as 1), 2), 3)
|
56 |
+
4. Include solutions after each question
|
57 |
+
5. Keep formatting simple and clear"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
message = anthropic.messages.create(
|
60 |
model="claude-3-opus-20240229",
|
|
|
62 |
temperature=0.7,
|
63 |
messages=[{
|
64 |
"role": "user",
|
65 |
+
"content": f"{system_prompt}\n\nWrite an exam for {subject}."
|
66 |
}]
|
67 |
)
|
68 |
|
69 |
+
# Extract usage information
|
70 |
+
input_tokens = message.usage.input_tokens
|
71 |
+
output_tokens = message.usage.output_tokens
|
72 |
+
input_cost = (input_tokens / 1000) * 0.015
|
73 |
+
output_cost = (output_tokens / 1000) * 0.075
|
74 |
+
total_cost = input_cost + output_cost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
|
|
76 |
usage_stats = f"""
|
77 |
\n---\nUsage Statistics:
|
78 |
+
• Input Tokens: {input_tokens:,}
|
79 |
+
• Output Tokens: {output_tokens:,}
|
80 |
+
• Total Tokens: {input_tokens + output_tokens:,}
|
81 |
|
82 |
Cost Breakdown:
|
83 |
+
• Input Cost: ${input_cost:.4f}
|
84 |
+
• Output Cost: ${output_cost:.4f}
|
85 |
+
• Total Cost: ${total_cost:.4f}
|
86 |
"""
|
87 |
|
88 |
+
if hasattr(message, 'content') and len(message.content) > 0:
|
89 |
+
response_text = message.content[0].text
|
90 |
+
formatted_response = clean_latex(response_text) + usage_stats
|
91 |
+
return formatted_response
|
92 |
+
else:
|
93 |
+
return "Error: No content in response"
|
94 |
|
95 |
+
except ValueError as e:
|
96 |
+
return f"Configuration Error: {str(e)}"
|
97 |
except Exception as e:
|
98 |
return f"Error: {str(e)}"
|
99 |
|
100 |
+
# Subject choices
|
101 |
subjects = [
|
102 |
"Single Variable Calculus",
|
103 |
"Multivariable Calculus",
|
|
|
135 |
|
136 |
# Launch the interface
|
137 |
if __name__ == "__main__":
|
138 |
+
interface.launch()
|