Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from anthropic import Anthropic | |
import wolframalpha | |
from datetime import datetime, timedelta | |
from collections import deque | |
import re | |
# Initialize clients | |
anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY')) | |
wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID')) | |
def parse_questions(content): | |
"""Parse questions and their solutions from Claude's output""" | |
questions = [] | |
current_question = {} | |
# Split content into lines for more reliable parsing | |
lines = content.split('\n') | |
for line in lines: | |
# Start of new question | |
if re.match(r'^\s*\d+\)', line): | |
if current_question: | |
questions.append(current_question) | |
current_question = { | |
'number': re.match(r'^\s*(\d+)\)', line).group(1), | |
'problem': line.split(')', 1)[1].strip(), | |
'solution': '', | |
'final_answer': None | |
} | |
# Solution marker | |
elif 'Solution:' in line and current_question: | |
current_question['problem'] = current_question['problem'].strip() | |
current_question['solution'] = line.split('Solution:', 1)[1].strip() | |
# Add to current problem or solution | |
elif current_question: | |
if current_question['solution']: | |
current_question['solution'] += '\n' + line | |
else: | |
current_question['problem'] += '\n' + line | |
# Extract final answer | |
if current_question and 'final answer' in line.lower(): | |
matches = re.findall(r'[-+]?(?:\d*\.)?\d+', line) | |
if matches: | |
current_question['final_answer'] = matches[-1] | |
# Add last question | |
if current_question: | |
questions.append(current_question) | |
# Clean up questions | |
for q in questions: | |
q['problem'] = q['problem'].strip() | |
q['solution'] = q['solution'].strip() | |
return questions | |
def verify_solution(problem, answer): | |
"""Verify a mathematical solution using Wolfram Alpha""" | |
try: | |
# Initialize query variable | |
query = "" | |
# Clean the problem text first | |
clean_problem = problem.replace('$$', '').replace('$', '').strip() | |
# Case 1: Definite Integral | |
if 'integral' in clean_problem.lower() or '∫' in clean_problem or '\int' in clean_problem: | |
# Use raw string for regex to avoid escape issues | |
integrand_match = re.search(r'(?:\int|∫)_(\d+)\^(\d+)\s*\(?([\dx+\s]+)\)?\s*dx', clean_problem, re.UNICODE) | |
if integrand_match: | |
lower, upper, integrand = integrand_match.groups() | |
# Clean up the integrand | |
integrand = integrand.replace(' ', '') | |
query = f"integrate {integrand} from {lower} to {upper}" | |
print(f"Integral query: {query}") | |
else: | |
# Fallback for simpler pattern | |
integrand_match = re.search(r'(?:\int|∫).*?\(([\dx+\s]+)\)\s*dx', clean_problem, re.UNICODE) | |
if integrand_match: | |
integrand = integrand_match.group(1).replace(' ', '') | |
query = f"integrate {integrand} from 0 to 1" # Common default bounds | |
print(f"Fallback integral query: {query}") | |
# Case 2: Simple Differentiation | |
elif 'derivative' in clean_problem.lower() or 'd/dx' in clean_problem: | |
# Look for function after equals sign or f(x) = | |
func_match = re.search(r'[f\(x\)\s*=\s*](.*?)$', clean_problem) | |
if func_match: | |
func = func_match.group(1).strip() | |
query = f"derivative of {func}" | |
print(f"Derivative query: {query}") | |
# Case 3: Mean Value Theorem | |
elif 'Mean Value Theorem' in clean_problem: | |
func_match = re.search(r'f\(x\)\s*=\s*(.*?)\s+on', clean_problem) | |
interval_match = re.search(r'\[(\d+),\s*(\d+)\]', clean_problem) | |
if func_match and interval_match: | |
func = func_match.group(1).strip() | |
a, b = interval_match.groups() | |
# Calculate f'(x) first | |
derivative_query = f"derivative of {func}" | |
print(f"MVT derivative query: {derivative_query}") | |
derivative_result = wolfram_client.query(derivative_query) | |
if derivative_result.success: | |
for pod in derivative_result.pods: | |
if pod.title in ['Derivative']: | |
derivative = pod.text | |
# Now calculate [f(b) - f(a)]/(b-a) | |
query = f"solve {derivative} = ({func.replace('x', b)} - {func.replace('x', a)})/({b} - {a})" | |
print(f"MVT final query: {query}") | |
break | |
# Ensure query is not empty | |
if not query.strip(): | |
return { | |
'verified': False, | |
'wolfram_solution': None, | |
'error': "Could not generate valid query from problem" | |
} | |
print(f"Final query to Wolfram Alpha: {query}") | |
result = wolfram_client.query(query) | |
if not result.success: | |
return { | |
'verified': False, | |
'wolfram_solution': None, | |
'error': f"Wolfram Alpha could not process query: {query}" | |
} | |
# Process the result | |
for pod in result.pods: | |
if pod.title in ['Result', 'Solution', 'Numerical result', 'Decimal approximation', 'Definite integral', 'Solutions']: | |
wolfram_answer = pod.text | |
print(f"Wolfram pod {pod.title}: {wolfram_answer}") | |
# For MVT problems, handle sqrt expressions | |
if 'Mean Value Theorem' in clean_problem: | |
# Convert both answers to decimal for comparison | |
if 'sqrt' in str(answer).lower(): | |
# Convert sqrt expression to decimal | |
sqrt_match = re.search(r'sqrt\((\d+)/(\d+)\)', str(answer)) | |
if sqrt_match: | |
num, denom = map(float, sqrt_match.groups()) | |
user_value = (num/denom)**0.5 | |
# Look for decimal in Wolfram result | |
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer) | |
if wolfram_nums: | |
wolfram_value = float(wolfram_nums[0]) | |
is_verified = abs(wolfram_value - user_value) < 0.01 | |
return { | |
'verified': is_verified, | |
'wolfram_solution': wolfram_answer, | |
'error': None | |
} | |
# Handle numerical answers | |
if str(answer).replace('.', '').isdigit(): | |
wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer) | |
if wolfram_nums: | |
wolfram_value = float(wolfram_nums[0]) | |
user_value = float(answer) | |
is_verified = abs(wolfram_value - user_value) < 0.01 | |
return { | |
'verified': is_verified, | |
'wolfram_solution': wolfram_answer, | |
'error': None | |
} | |
# Handle symbolic answers | |
else: | |
clean_wolfram = re.sub(r'\s+', '', wolfram_answer.lower()) | |
clean_answer = re.sub(r'\s+', '', str(answer).lower()) | |
is_verified = clean_wolfram == clean_answer | |
return { | |
'verified': is_verified, | |
'wolfram_solution': wolfram_answer, | |
'error': None | |
} | |
return { | |
'verified': False, | |
'wolfram_solution': None, | |
'error': "No suitable solution found in Wolfram Alpha response" | |
} | |
except Exception as e: | |
error_msg = f"Error during verification: {str(e)}" | |
if query: | |
error_msg += f"\nQuery attempted: {query}" | |
return { | |
'verified': False, | |
'wolfram_solution': None, | |
'error': error_msg | |
} | |
def generate_test(subject): | |
"""Generate and verify a math test""" | |
try: | |
system_prompt = """Generate 3 university-level math questions that can be verified numerically. | |
For each question: | |
1. Number the question as 1), 2), 3) | |
2. State the problem clearly using simple $$ for displayed math | |
3. Include "Solution:" before the solution | |
4. Show step-by-step work | |
5. End each solution with "Final answer = [number]" | |
6. Keep problems relatively simple (basic calculus, algebra, etc.) | |
7. Make sure problems have clear numerical answers | |
8. Avoid word problems - focus on pure mathematical expressions""" | |
message = anthropic.messages.create( | |
model="claude-3-opus-20240229", | |
max_tokens=1500, | |
temperature=0.7, | |
messages=[{ | |
"role": "user", | |
"content": f"{system_prompt}\n\nWrite an exam for {subject} with simple numerical answers." | |
}] | |
) | |
# Get the content and parse questions | |
content = message.content[0].text | |
questions = parse_questions(content) | |
# Add verification results | |
verification_note = "\n\n---\n## Solution Verification:\n" | |
verification_results = [] | |
for q in questions: | |
if q['final_answer'] is not None: | |
result = verify_solution(q['problem'], q['final_answer']) | |
verification_results.append(result) | |
verification_note += f"\nQuestion {q['number']}:\n" | |
if result['verified']: | |
verification_note += "✅ Solution verified by Wolfram Alpha\n" | |
else: | |
verification_note += "⚠️ Solution needs verification\n" | |
if result['wolfram_solution']: | |
verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n" | |
if result['error']: | |
verification_note += f"Note: {result['error']}\n" | |
else: | |
verification_note += f"\nQuestion {q['number']}:\n⚠️ Could not extract final answer\n" | |
# Add usage statistics | |
usage_stats = f""" | |
\n---\nUsage Statistics: | |
• Input Tokens: {message.usage.input_tokens:,} | |
• Output Tokens: {message.usage.output_tokens:,} | |
• Wolfram Alpha calls: {len(verification_results)} | |
Cost Breakdown: | |
• Claude Cost: ${((message.usage.input_tokens / 1000) * 0.015) + ((message.usage.output_tokens / 1000) * 0.075):.4f} | |
• Wolfram API calls: {len(verification_results)} | |
""" | |
# Combine everything with proper spacing | |
final_output = content + "\n\n" + verification_note + usage_stats | |
return final_output | |
except Exception as e: | |
return f"Error: {str(e)}" | |
subjects = [ | |
"Single Variable Calculus", | |
"Multivariable Calculus", | |
"Linear Algebra", | |
"Differential Equations", | |
"Real Analysis", | |
"Complex Analysis", | |
"Abstract Algebra", | |
"Probability Theory", | |
"Numerical Analysis", | |
"Topology" | |
] | |
# Create Gradio interface | |
interface = gr.Interface( | |
fn=generate_test, | |
inputs=gr.Dropdown( | |
choices=subjects, | |
label="Select Mathematics Subject", | |
info="Choose a subject for the exam questions" | |
), | |
outputs=gr.Markdown( | |
label="Generated Test", | |
latex_delimiters=[ | |
{"left": "$$", "right": "$$", "display": True}, | |
{"left": "$", "right": "$", "display": False} | |
] | |
), | |
title="Advanced Mathematics Test Generator", | |
description="""Generates university-level mathematics exam questions with solutions using Claude 3 Opus. | |
Limited to 25 requests per day. Please use responsibly.""", | |
theme="default", | |
allow_flagging="never" | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
interface.launch() |