import os import gradio as gr from anthropic import Anthropic import wolframalpha from datetime import datetime, timedelta from collections import deque import re # Initialize clients anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY')) wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID')) def parse_questions(content): """Parse questions and their solutions from Claude's output""" questions = [] current_question = {} # Split content into lines for more reliable parsing lines = content.split('\n') for line in lines: # Start of new question if re.match(r'^\s*\d+\)', line): if current_question: questions.append(current_question) current_question = { 'number': re.match(r'^\s*(\d+)\)', line).group(1), 'problem': line.split(')', 1)[1].strip(), 'solution': '', 'final_answer': None } # Solution marker elif 'Solution:' in line and current_question: current_question['problem'] = current_question['problem'].strip() current_question['solution'] = line.split('Solution:', 1)[1].strip() # Add to current problem or solution elif current_question: if current_question['solution']: current_question['solution'] += '\n' + line else: current_question['problem'] += '\n' + line # Extract final answer if current_question and 'final answer' in line.lower(): matches = re.findall(r'[-+]?(?:\d*\.)?\d+', line) if matches: current_question['final_answer'] = matches[-1] # Add last question if current_question: questions.append(current_question) # Clean up questions for q in questions: q['problem'] = q['problem'].strip() q['solution'] = q['solution'].strip() return questions def verify_solution(problem, answer): """Verify a mathematical solution using Wolfram Alpha""" try: # Clean up the problem for Wolfram Alpha query = problem.replace('$$', '').replace('$', '') # Remove any text instructions, keep only the mathematical expression query = re.sub(r'(?i)find|calculate|solve|evaluate|determine', '', query) query = query.strip() result = wolfram_client.query(query) if not result.success: return { 'verified': False, 'wolfram_solution': None, 'error': "Wolfram Alpha could not process the query" } # Look for numerical results in multiple pods for pod in result.pods: if pod.title in ['Result', 'Solution', 'Numerical result', 'Decimal approximation']: wolfram_answer = pod.text # Extract numerical value wolfram_nums = re.findall(r'[-+]?(?:\d*\.)?\d+', wolfram_answer) if wolfram_nums: wolfram_value = float(wolfram_nums[0]) user_value = float(answer) # Allow for small numerical differences is_verified = abs(wolfram_value - user_value) < 0.01 return { 'verified': is_verified, 'wolfram_solution': wolfram_answer, 'error': None } return { 'verified': False, 'wolfram_solution': None, 'error': "No numerical solution found in Wolfram Alpha response" } except Exception as e: return { 'verified': False, 'wolfram_solution': None, 'error': f"Error during verification: {str(e)}" } def generate_test(subject): """Generate and verify a math test""" try: system_prompt = """Generate 3 university-level math questions that can be verified numerically. For each question: 1. Number the question as 1), 2), 3) 2. State the problem clearly using simple $$ for displayed math 3. Include "Solution:" before the solution 4. Show step-by-step work 5. End each solution with "Final answer = [number]" 6. Keep problems relatively simple (basic calculus, algebra, etc.) 7. Make sure problems have clear numerical answers 8. Avoid word problems - focus on pure mathematical expressions""" message = anthropic.messages.create( model="claude-3-opus-20240229", max_tokens=1500, temperature=0.7, messages=[{ "role": "user", "content": f"{system_prompt}\n\nWrite an exam for {subject} with simple numerical answers." }] ) # Get the content and parse questions content = message.content[0].text questions = parse_questions(content) # Add verification results verification_note = "\n\n---\n## Solution Verification:\n" verification_results = [] for q in questions: if q['final_answer'] is not None: result = verify_solution(q['problem'], q['final_answer']) verification_results.append(result) verification_note += f"\nQuestion {q['number']}:\n" if result['verified']: verification_note += "✅ Solution verified by Wolfram Alpha\n" else: verification_note += "⚠️ Solution needs verification\n" if result['wolfram_solution']: verification_note += f"Wolfram Alpha result: {result['wolfram_solution']}\n" if result['error']: verification_note += f"Note: {result['error']}\n" else: verification_note += f"\nQuestion {q['number']}:\n⚠️ Could not extract final answer\n" # Add usage statistics usage_stats = f""" \n---\nUsage Statistics: • Input Tokens: {message.usage.input_tokens:,} • Output Tokens: {message.usage.output_tokens:,} • Wolfram Alpha calls: {len(verification_results)} Cost Breakdown: • Claude Cost: ${((message.usage.input_tokens / 1000) * 0.015) + ((message.usage.output_tokens / 1000) * 0.075):.4f} • Wolfram API calls: {len(verification_results)} """ # Combine everything with proper spacing final_output = content + "\n\n" + verification_note + usage_stats return final_output except Exception as e: return f"Error: {str(e)}" subjects = [ "Single Variable Calculus", "Multivariable Calculus", "Linear Algebra", "Differential Equations", "Real Analysis", "Complex Analysis", "Abstract Algebra", "Probability Theory", "Numerical Analysis", "Topology" ] # Create Gradio interface interface = gr.Interface( fn=generate_test, inputs=gr.Dropdown( choices=subjects, label="Select Mathematics Subject", info="Choose a subject for the exam questions" ), outputs=gr.Markdown( label="Generated Test", latex_delimiters=[ {"left": "$$", "right": "$$", "display": True}, {"left": "$", "right": "$", "display": False} ] ), title="Advanced Mathematics Test Generator", description="""Generates university-level mathematics exam questions with solutions using Claude 3 Opus. Limited to 25 requests per day. Please use responsibly.""", theme="default", allow_flagging="never" ) # Launch the interface if __name__ == "__main__": interface.launch()