math-exams-symvp-duo

Sleeping

App Files Files Community

math-exams-symvp-duo / app.py

joshuarauh

Update app.py

ac3d1e8 verified 7 months ago

raw

history blame

12.7 kB

	import os
	import gradio as gr
	from anthropic import Anthropic
	import wolframalpha
	from datetime import datetime, timedelta
	from collections import deque
	import re

	# Initialize clients
	anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
	wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID'))

	def parse_questions(content):
	"""Parse questions and their solutions from Claude's output"""
	questions = []
	current_question = {}

	# Split content into lines for more reliable parsing
	lines = content.split('\n')

	for line in lines:
	# Start of new question
	if re.match(r'^\s*\d+\)', line):
	if current_question:
	questions.append(current_question)
	current_question = {
	'number': re.match(r'^\s*(\d+)\)', line).group(1),
	'problem': line.split(')', 1)[1].strip(),
	'solution': '',
	'final_answer': None
	}
	# Solution marker
	elif 'Solution:' in line and current_question:
	current_question['problem'] = current_question['problem'].strip()
	current_question['solution'] = line.split('Solution:', 1)[1].strip()
	# Add to current problem or solution
	elif current_question:
	if current_question['solution']:
	current_question['solution'] += '\n' + line
	else:
	current_question['problem'] += '\n' + line

	# Extract final answer
	if current_question and 'final answer' in line.lower():
	matches = re.findall(r'[-+]?(?:\d*\.)?\d+', line)
	if matches:
	current_question['final_answer'] = matches[-1]

	# Add last question
	if current_question:
	questions.append(current_question)

	# Clean up questions
	for q in questions:
	q['problem'] = q['problem'].strip()
	q['solution'] = q['solution'].strip()

	return questions

	def verify_solution(problem, answer):
	"""Verify a mathematical solution using Wolfram Alpha"""
	try:
	# Initialize query variable
	query = ""

	# Clean the problem text first
	clean_problem = problem.replace('$$', '').replace('$', '').strip()

	# Case 1: Definite Integral
	if 'integral' in clean_problem.lower() or '∫' in clean_problem or '\int' in clean_problem:
	# Use raw string for regex to avoid escape issues
	integrand_match = re.search(r'(?:\int\|∫)_(\d+)\^(\d+)\s$?([\dx+\s]+)$?\sdx', clean_problem, re.UNICODE)
	if integrand_match:
	lower, upper, integrand = integrand_match.groups()
	# Clean up the integrand
	integrand = integrand.replace(' ', '')
	query = f"integrate {integrand} from {lower} to {upper}"
	print(f"Integral query: {query}")
	else:
	# Fallback for simpler pattern
	integrand_match = re.search(r'(?:\int\|∫).?$([\dx+\s]+)$\sdx', clean_problem, re.UNICODE)
	if integrand_match:
	integrand = integrand_match.group(1).replace(' ', '')
	query = f"integrate {integrand} from 0 to 1" # Common default bounds
	print(f"Fallback integral query: {query}")

	# Case 2: Simple Differentiation
	elif 'derivative' in clean_problem.lower() or 'd/dx' in clean_problem:
	# Look for function after equals sign or f(x) =
	func_match = re.search(r'[f$x$\s=\s](.*?)$', clean_problem)
	if func_match:
	func = func_match.group(1).strip()
	query = f"derivative of {func}"
	print(f"Derivative query: {query}")

	# Case 3: Mean Value Theorem
	elif 'Mean Value Theorem' in clean_problem:
	func_match = re.search(r'f$x$\s=\s(.*?)\s+on', clean_problem)
	interval_match = re.search(r'\[(\d+),\s*(\d+)\]', clean_problem)
	if func_match and interval_match:
	func = func_match.group(1).strip()
	a, b = interval_match.groups()
	# Calculate f'(x) first
	derivative_query = f"derivative of {func}"
	print(f"MVT derivative query: {derivative_query}")
	derivative_result = wolfram_client.query(derivative_query)

	if derivative_result.success:
	for pod in derivative_result.pods:
	if pod.title in ['Derivative']:
	derivative = pod.text
	# Now calculate [f(b) - f(a)]/(b-a)
	query = f"solve {derivative} = ({func.replace('x', b)} - {func.replace('x', a)})/({b} - {a})"
	print(f"MVT final query: {query}")
	break

	# Ensure query is not empty
	if not query.strip():
	return {
	'verified': False,
	'wolfram_solution': None,
	'error': "Could not generate valid query from problem"
	}

	print(f"Final query to Wolfram Alpha: {query}")
	result = wolfram_client.query(query)

	if not result.success:
	return {
	'verified': False,
	'wolfram_solution': None,
	'error': f"Wolfram Alpha could not process query: {query}"
	}

	# Process the result
	for pod in result.pods:
	if pod.title in ['Result', 'Solution', 'Numerical result', 'Decimal approximation', 'Definite integral', 'Solutions']:
	wolfram_answer = pod.text
	print(f"Wolfram pod {pod.title}: {wolfram_answer}")

	# For MVT problems, handle sqrt expressions
	if 'Mean Value Theorem' in clean_problem:
	# Convert both answers to decimal for comparison
	if 'sqrt' in str(answer).lower():
	# Convert sqrt expression to decimal
	sqrt_match = re.search(r'sqrt$(\d+)/(\d+)$', str(answer))
	if sqrt_match:
	num, denom = map(float, sqrt_match.groups())
	user_value = (num/denom)**0.5
	# Look for decimal in Wolfram result
	wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
	if wolfram_nums:
	wolfram_value = float(wolfram_nums[0])
	is_verified = abs(wolfram_value - user_value) < 0.01
	return {
	'verified': is_verified,
	'wolfram_solution': wolfram_answer,
	'error': None
	}

	# Handle numerical answers
	if str(answer).replace('.', '').isdigit():
	wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
	if wolfram_nums:
	wolfram_value = float(wolfram_nums[0])
	user_value = float(answer)
	is_verified = abs(wolfram_value - user_value) < 0.01
	return {
	'verified': is_verified,
	'wolfram_solution': wolfram_answer,
	'error': None
	}
	# Handle symbolic answers
	else:
	clean_wolfram = re.sub(r'\s+', '', wolfram_answer.lower())
	clean_answer = re.sub(r'\s+', '', str(answer).lower())
	is_verified = clean_wolfram == clean_answer
	return {
	'verified': is_verified,
	'wolfram_solution': wolfram_answer,
	'error': None
	}

	return {
	'verified': False,
	'wolfram_solution': None,
	'error': "No suitable solution found in Wolfram Alpha response"
	}

	except Exception as e:
	error_msg = f"Error during verification: {str(e)}"
	if query:
	error_msg += f"\nQuery attempted: {query}"
	return {
	'verified': False,
	'wolfram_solution': None,
	'error': error_msg
	}

	def generate_test(subject):
	"""Generate and verify a math test"""
	try:
	system_prompt = """Generate 3 university-level math questions that can be verified numerically.
	For each question:
	1. Number the question as 1), 2), 3)
	2. State the problem clearly using simple $$ for displayed math
	3. Include "Solution:" before the solution
	4. Show step-by-step work
	5. End each solution with "Final answer = [number]"
	6. Keep problems relatively simple (basic calculus, algebra, etc.)
	7. Make sure problems have clear numerical answers
	8. Avoid word problems - focus on pure mathematical expressions"""

	message = anthropic.messages.create(
	model="claude-3-opus-20240229",
	max_tokens=1500,
	temperature=0.7,
	messages=[{
	"role": "user",
	"content": f"{system_prompt}\n\nWrite an exam for {subject} with simple numerical answers."
	}]
	)

	# Get the content and parse questions
	content = message.content[0].text
	questions = parse_questions(content)

	# Add verification results
	verification_note = "\n\n---\n## Solution Verification:\n"
	verification_results = []

	for q in questions:
	if q['final_answer'] is not None:
	result = verify_solution(q['problem'], q['final_answer'])
	verification_results.append(result)
	verification_note += f"\nQuestion {q['number']}:\n"
	if result['verified']:
	verification_note += "✅ Solution verified by Wolfram Alpha\n"
	else:
	verification_note += "⚠️ Solution needs verification\n"
	if result['wolfram_solution']:
	verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n"
	if result['error']:
	verification_note += f"Note: {result['error']}\n"
	else:
	verification_note += f"\nQuestion {q['number']}:\n⚠️ Could not extract final answer\n"

	# Add usage statistics
	usage_stats = f"""
	\n---\nUsage Statistics:
	• Input Tokens: {message.usage.input_tokens:,}
	• Output Tokens: {message.usage.output_tokens:,}
	• Wolfram Alpha calls: {len(verification_results)}

	Cost Breakdown:
	• Claude Cost: ${((message.usage.input_tokens / 1000) * 0.015) + ((message.usage.output_tokens / 1000) * 0.075):.4f}
	• Wolfram API calls: {len(verification_results)}
	"""

	# Combine everything with proper spacing
	final_output = content + "\n\n" + verification_note + usage_stats
	return final_output

	except Exception as e:
	return f"Error: {str(e)}"

	subjects = [
	"Single Variable Calculus",
	"Multivariable Calculus",
	"Linear Algebra",
	"Differential Equations",
	"Real Analysis",
	"Complex Analysis",
	"Abstract Algebra",
	"Probability Theory",
	"Numerical Analysis",
	"Topology"
	]

	# Create Gradio interface
	interface = gr.Interface(
	fn=generate_test,
	inputs=gr.Dropdown(
	choices=subjects,
	label="Select Mathematics Subject",
	info="Choose a subject for the exam questions"
	),
	outputs=gr.Markdown(
	label="Generated Test",
	latex_delimiters=[
	{"left": "$$", "right": "$$", "display": True},
	{"left": "$", "right": "$", "display": False}
	]
	),
	title="Advanced Mathematics Test Generator",
	description="""Generates university-level mathematics exam questions with solutions using Claude 3 Opus.
	Limited to 25 requests per day. Please use responsibly.""",
	theme="default",
	allow_flagging="never"
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()