Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,51 @@
|
|
| 1 |
-
# app.py
|
| 2 |
import os
|
| 3 |
import gradio as gr
|
| 4 |
from anthropic import Anthropic
|
| 5 |
import wolframalpha
|
| 6 |
from datetime import datetime, timedelta
|
| 7 |
from collections import deque
|
|
|
|
| 8 |
|
| 9 |
# Initialize clients
|
| 10 |
anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
|
| 11 |
wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID'))
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def verify_solution(problem, claimed_solution):
|
| 14 |
"""Verify a mathematical solution using Wolfram Alpha"""
|
| 15 |
try:
|
|
@@ -37,9 +73,13 @@ def verify_solution(problem, claimed_solution):
|
|
| 37 |
|
| 38 |
def compare_solutions(wolfram_sol, claude_sol):
|
| 39 |
"""Compare two solutions for mathematical equivalence"""
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
def generate_test(subject):
|
| 45 |
"""Generate and verify a math test"""
|
|
@@ -49,7 +89,7 @@ def generate_test(subject):
|
|
| 49 |
For each question:
|
| 50 |
1. State the problem clearly
|
| 51 |
2. Provide your step-by-step solution
|
| 52 |
-
3.
|
| 53 |
Use simple $$ for all math expressions."""
|
| 54 |
|
| 55 |
message = anthropic.messages.create(
|
|
@@ -68,8 +108,7 @@ def generate_test(subject):
|
|
| 68 |
# Add verification results
|
| 69 |
verification_results = []
|
| 70 |
|
| 71 |
-
#
|
| 72 |
-
# Example structure:
|
| 73 |
verification_note = "\n\n## Solution Verification:\n"
|
| 74 |
for i, (problem, solution) in enumerate(parse_questions(content)):
|
| 75 |
result = verify_solution(problem, solution)
|
|
@@ -80,6 +119,7 @@ def generate_test(subject):
|
|
| 80 |
verification_note += "⚠️ Solution needs verification\n"
|
| 81 |
if result['wolfram_solution']:
|
| 82 |
verification_note += f"Wolfram Alpha got: {result['wolfram_solution']}\n"
|
|
|
|
| 83 |
|
| 84 |
# Add usage statistics
|
| 85 |
usage_stats = f"""
|
|
@@ -98,13 +138,7 @@ def generate_test(subject):
|
|
| 98 |
except Exception as e:
|
| 99 |
return f"Error: {str(e)}"
|
| 100 |
|
| 101 |
-
#
|
| 102 |
-
# wolframalpha==5.0.0
|
| 103 |
-
# Update environment variables to include WOLFRAM_APPID
|
| 104 |
-
|
| 105 |
-
# Rest of your Gradio interface code remains the same...
|
| 106 |
-
|
| 107 |
-
# Subject choices
|
| 108 |
subjects = [
|
| 109 |
"Single Variable Calculus",
|
| 110 |
"Multivariable Calculus",
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
from anthropic import Anthropic
|
| 4 |
import wolframalpha
|
| 5 |
from datetime import datetime, timedelta
|
| 6 |
from collections import deque
|
| 7 |
+
import re
|
| 8 |
|
| 9 |
# Initialize clients
|
| 10 |
anthropic = Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
|
| 11 |
wolfram_client = wolframalpha.Client(os.environ.get('WOLFRAM_APPID'))
|
| 12 |
|
| 13 |
+
def parse_questions(content):
|
| 14 |
+
"""Parse questions and their solutions from Claude's output"""
|
| 15 |
+
# Split content into questions
|
| 16 |
+
questions = []
|
| 17 |
+
current_text = ""
|
| 18 |
+
question_pattern = re.compile(r'\d+\)')
|
| 19 |
+
|
| 20 |
+
# Split the content by question numbers
|
| 21 |
+
parts = re.split(question_pattern, content)
|
| 22 |
+
if len(parts) > 1: # Skip the first empty part if it exists
|
| 23 |
+
parts = parts[1:]
|
| 24 |
+
|
| 25 |
+
for part in parts:
|
| 26 |
+
# Try to extract the problem and solution
|
| 27 |
+
try:
|
| 28 |
+
# Split into problem and solution (assuming "Solution:" marks the divide)
|
| 29 |
+
problem_solution = part.split("Solution:", 1)
|
| 30 |
+
if len(problem_solution) == 2:
|
| 31 |
+
problem = problem_solution[0].strip()
|
| 32 |
+
solution = problem_solution[1].strip()
|
| 33 |
+
|
| 34 |
+
# Extract the final numerical answer if possible
|
| 35 |
+
# This is a simple example - you'll need to adjust based on your output format
|
| 36 |
+
final_answer = re.search(r'=\s*([-+]?\d*\.?\d+)', solution)
|
| 37 |
+
if final_answer:
|
| 38 |
+
final_answer = final_answer.group(1)
|
| 39 |
+
else:
|
| 40 |
+
final_answer = "Not found"
|
| 41 |
+
|
| 42 |
+
questions.append((problem, final_answer))
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"Error parsing question: {e}")
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
return questions
|
| 48 |
+
|
| 49 |
def verify_solution(problem, claimed_solution):
|
| 50 |
"""Verify a mathematical solution using Wolfram Alpha"""
|
| 51 |
try:
|
|
|
|
| 73 |
|
| 74 |
def compare_solutions(wolfram_sol, claude_sol):
|
| 75 |
"""Compare two solutions for mathematical equivalence"""
|
| 76 |
+
try:
|
| 77 |
+
# Convert both solutions to floats for comparison
|
| 78 |
+
w_val = float(wolfram_sol)
|
| 79 |
+
c_val = float(claude_sol)
|
| 80 |
+
return abs(w_val - c_val) < 0.001
|
| 81 |
+
except (ValueError, TypeError):
|
| 82 |
+
return False
|
| 83 |
|
| 84 |
def generate_test(subject):
|
| 85 |
"""Generate and verify a math test"""
|
|
|
|
| 89 |
For each question:
|
| 90 |
1. State the problem clearly
|
| 91 |
2. Provide your step-by-step solution
|
| 92 |
+
3. End each solution with a clear final numerical answer in the format: "Final answer = [number]"
|
| 93 |
Use simple $$ for all math expressions."""
|
| 94 |
|
| 95 |
message = anthropic.messages.create(
|
|
|
|
| 108 |
# Add verification results
|
| 109 |
verification_results = []
|
| 110 |
|
| 111 |
+
# Parse and verify each question
|
|
|
|
| 112 |
verification_note = "\n\n## Solution Verification:\n"
|
| 113 |
for i, (problem, solution) in enumerate(parse_questions(content)):
|
| 114 |
result = verify_solution(problem, solution)
|
|
|
|
| 119 |
verification_note += "⚠️ Solution needs verification\n"
|
| 120 |
if result['wolfram_solution']:
|
| 121 |
verification_note += f"Wolfram Alpha got: {result['wolfram_solution']}\n"
|
| 122 |
+
verification_results.append(result)
|
| 123 |
|
| 124 |
# Add usage statistics
|
| 125 |
usage_stats = f"""
|
|
|
|
| 138 |
except Exception as e:
|
| 139 |
return f"Error: {str(e)}"
|
| 140 |
|
| 141 |
+
# Subject choices and interface configuration remain the same...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
subjects = [
|
| 143 |
"Single Variable Calculus",
|
| 144 |
"Multivariable Calculus",
|