math-exams-symvp-duo

Sleeping

App Files Files Community

math-exams-symvp-duo / app.py

joshuarauh

Update app.py

109daa0 verified 7 months ago

raw

history blame

6.4 kB

	import os
	import gradio as gr
	from anthropic import Anthropic
	import wolframalpha
	from datetime import datetime, timedelta
	from collections import deque
	import re

	# Initialize clients
	anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
	wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID'))

	def parse_questions(content):
	"""Parse questions and their solutions from Claude's output"""
	# Split content into questions
	questions = []
	current_text = ""
	question_pattern = re.compile(r'\d+\)')

	# Split the content by question numbers
	parts = re.split(question_pattern, content)
	if len(parts) > 1: # Skip the first empty part if it exists
	parts = parts[1:]

	for part in parts:
	# Try to extract the problem and solution
	try:
	# Split into problem and solution (assuming "Solution:" marks the divide)
	problem_solution = part.split("Solution:", 1)
	if len(problem_solution) == 2:
	problem = problem_solution[0].strip()
	solution = problem_solution[1].strip()

	# Extract the final numerical answer if possible
	# This is a simple example - you'll need to adjust based on your output format
	final_answer = re.search(r'=\s([-+]?\d\.?\d+)', solution)
	if final_answer:
	final_answer = final_answer.group(1)
	else:
	final_answer = "Not found"

	questions.append((problem, final_answer))
	except Exception as e:
	print(f"Error parsing question: {e}")
	continue

	return questions

	def verify_solution(problem, claimed_solution):
	"""Verify a mathematical solution using Wolfram Alpha"""
	try:
	# Clean up the problem and solution for Wolfram Alpha
	query = f"Solve {problem}"
	result = wolfram_client.query(query)

	# Extract the solution from Wolfram Alpha
	wolfram_solution = next(result.results).text

	# Compare solutions (needs sophisticated parsing based on your problem types)
	solutions_match = compare_solutions(wolfram_solution, claimed_solution)

	return {
	'verified': solutions_match,
	'wolfram_solution': wolfram_solution,
	'match': solutions_match
	}
	except Exception as e:
	return {
	'verified': False,
	'error': str(e),
	'wolfram_solution': None
	}

	def compare_solutions(wolfram_sol, claude_sol):
	"""Compare two solutions for mathematical equivalence"""
	try:
	# Convert both solutions to floats for comparison
	w_val = float(wolfram_sol)
	c_val = float(claude_sol)
	return abs(w_val - c_val) < 0.001
	except (ValueError, TypeError):
	return False

	def generate_test(subject):
	"""Generate and verify a math test"""
	try:
	# Generate the test using Claude
	system_prompt = """Generate 3 university-level math questions with numerical solutions that can be verified.
	For each question:
	1. State the problem clearly
	2. Provide your step-by-step solution
	3. End each solution with a clear final numerical answer in the format: "Final answer = [number]"
	Use simple $$ for all math expressions."""

	message = anthropic.messages.create(
	model="claude-3-opus-20240229",
	max_tokens=1500,
	temperature=0.7,
	messages=[{
	"role": "user",
	"content": f"{system_prompt}\n\nWrite an exam for {subject}."
	}]
	)

	# Extract questions and solutions
	content = message.content[0].text

	# Add verification results
	verification_results = []

	# Parse and verify each question
	verification_note = "\n\n## Solution Verification:\n"
	for i, (problem, solution) in enumerate(parse_questions(content)):
	result = verify_solution(problem, solution)
	verification_note += f"\nQuestion {i+1}:\n"
	if result['verified']:
	verification_note += "✅ Solution verified by Wolfram Alpha\n"
	else:
	verification_note += "⚠️ Solution needs verification\n"
	if result['wolfram_solution']:
	verification_note += f"Wolfram Alpha got: {result['wolfram_solution']}\n"
	verification_results.append(result)

	# Add usage statistics
	usage_stats = f"""
	\n---\nUsage Statistics:
	• Input Tokens: {message.usage.input_tokens:,}
	• Output Tokens: {message.usage.output_tokens:,}
	• Wolfram Alpha calls: {len(verification_results)}

	Cost Breakdown:
	• Claude Cost: ${((message.usage.input_tokens / 1000) * 0.015) + ((message.usage.output_tokens / 1000) * 0.075):.4f}
	• Wolfram API calls: {len(verification_results)}
	"""

	return content + verification_note + usage_stats

	except Exception as e:
	return f"Error: {str(e)}"

	# Subject choices and interface configuration remain the same...
	subjects = [
	"Single Variable Calculus",
	"Multivariable Calculus",
	"Linear Algebra",
	"Differential Equations",
	"Real Analysis",
	"Complex Analysis",
	"Abstract Algebra",
	"Probability Theory",
	"Numerical Analysis",
	"Topology"
	]

	# Create Gradio interface
	interface = gr.Interface(
	fn=generate_test,
	inputs=gr.Dropdown(
	choices=subjects,
	label="Select Mathematics Subject",
	info="Choose a subject for the exam questions"
	),
	outputs=gr.Markdown(
	label="Generated Test",
	latex_delimiters=[
	{"left": "$$", "right": "$$", "display": True},
	{"left": "$", "right": "$", "display": False}
	]
	),
	title="Advanced Mathematics Test Generator",
	description="""Generates university-level mathematics exam questions with solutions using Claude 3 Opus.
	Limited to 25 requests per day. Please use responsibly.""",
	theme="default",
	allow_flagging="never"
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()