math-exams-symvp-duo

Running

App Files Files Community

math-exams-symvp-duo / app.py

joshuarauh

Update app.py

91aaf8c verified 10 months ago

raw

history blame

6.73 kB

	import os
	import gradio as gr
	from anthropic import Anthropic
	import wolframalpha
	from datetime import datetime, timedelta
	from collections import deque
	import re

	# Initialize clients
	anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
	wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID'))

	def parse_questions(content):
	"""Parse questions and their solutions from Claude's output"""
	# Split content into questions
	questions = []
	parts = re.split(r'\n\s*(\d+)\)', content)

	# Remove any empty first element
	if parts[0].strip() == '':
	parts = parts[1:]

	# Process pairs of question numbers and content
	for i in range(0, len(parts)-1, 2):
	question_num = parts[i]
	question_content = parts[i+1]

	try:
	# Split into problem and solution
	if "Solution:" in question_content:
	problem, solution = question_content.split("Solution:", 1)
	else:
	continue

	# Clean up the problem and solution
	problem = problem.strip()
	solution = solution.strip()

	# Extract the final answer (look for last equation or stated answer)
	final_answer_match = re.search(r'(?:final answer\|=)\s[-+]?(?:\d\.)?\d+', solution.lower())
	if final_answer_match:
	final_answer = re.findall(r'[-+]?(?:\d*\.)?\d+', final_answer_match.group())[-1]
	questions.append({
	'number': question_num,
	'problem': problem,
	'solution': solution,
	'final_answer': final_answer
	})
	except Exception as e:
	print(f"Error parsing question {question_num}: {e}")
	continue

	return questions

	def verify_solution(problem, answer):
	"""Verify a mathematical solution using Wolfram Alpha"""
	try:
	# Format the query for Wolfram Alpha
	query = problem.replace('$$', '').replace('$', '') # Remove LaTeX delimiters
	result = wolfram_client.query(query)

	# Try to get the numerical result
	for pod in result.pods:
	if pod.title in ['Result', 'Solution', 'Numerical result']:
	wolfram_answer = pod.text
	# Extract numerical value
	wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer)
	if wolfram_nums:
	return {
	'verified': abs(float(wolfram_nums[0]) - float(answer)) < 0.001,
	'wolfram_solution': wolfram_answer,
	'error': None
	}

	return {
	'verified': False,
	'wolfram_solution': None,
	'error': "Could not extract numerical solution from Wolfram Alpha"
	}
	except Exception as e:
	return {
	'verified': False,
	'wolfram_solution': None,
	'error': str(e)
	}

	def generate_test(subject):
	"""Generate and verify a math test"""
	try:
	system_prompt = """Generate 3 university-level math questions that can be verified numerically.
	For each question:
	1. Number the question as 1), 2), 3)
	2. State the problem clearly using simple $$ for displayed math
	3. Include "Solution:" before the solution
	4. Show step-by-step work
	5. End each solution with "Final answer = [number]"
	6. Keep problems relatively simple (basic calculus, algebra, etc.)"""

	message = anthropic.messages.create(
	model="claude-3-opus-20240229",
	max_tokens=1500,
	temperature=0.7,
	messages=[{
	"role": "user",
	"content": f"{system_prompt}\n\nWrite an exam for {subject} with simple numerical answers."
	}]
	)

	# Get the content and parse questions
	content = message.content[0].text
	questions = parse_questions(content)

	# Add verification results
	verification_note = "\n\n---\n## Solution Verification:\n"
	verification_results = []

	for q in questions:
	result = verify_solution(q['problem'], q['final_answer'])
	verification_results.append(result)
	verification_note += f"\nQuestion {q['number']}:\n"
	if result['verified']:
	verification_note += "✅ Solution verified by Wolfram Alpha\n"
	else:
	verification_note += "⚠️ Solution needs verification\n"
	if result['wolfram_solution']:
	verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n"
	if result['error']:
	verification_note += f"Note: {result['error']}\n"

	# Add usage statistics
	usage_stats = f"""
	\n---\nUsage Statistics:
	• Input Tokens: {message.usage.input_tokens:,}
	• Output Tokens: {message.usage.output_tokens:,}
	• Wolfram Alpha calls: {len(verification_results)}

	Cost Breakdown:
	• Claude Cost: ${((message.usage.input_tokens / 1000) * 0.015) + ((message.usage.output_tokens / 1000) * 0.075):.4f}
	• Wolfram API calls: {len(verification_results)}
	"""

	# Combine everything with proper spacing
	final_output = content + "\n\n" + verification_note + usage_stats
	return final_output

	except Exception as e:
	return f"Error: {str(e)}"

	subjects = [
	"Single Variable Calculus",
	"Multivariable Calculus",
	"Linear Algebra",
	"Differential Equations",
	"Real Analysis",
	"Complex Analysis",
	"Abstract Algebra",
	"Probability Theory",
	"Numerical Analysis",
	"Topology"
	]

	# Create Gradio interface
	interface = gr.Interface(
	fn=generate_test,
	inputs=gr.Dropdown(
	choices=subjects,
	label="Select Mathematics Subject",
	info="Choose a subject for the exam questions"
	),
	outputs=gr.Markdown(
	label="Generated Test",
	latex_delimiters=[
	{"left": "$$", "right": "$$", "display": True},
	{"left": "$", "right": "$", "display": False}
	]
	),
	title="Advanced Mathematics Test Generator",
	description="""Generates university-level mathematics exam questions with solutions using Claude 3 Opus.
	Limited to 25 requests per day. Please use responsibly.""",
	theme="default",
	allow_flagging="never"
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()