Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -520,6 +520,7 @@ def get_solution_for_verification(response_text, sympy_correct, final_verificati
|
|
520 |
"""
|
521 |
Extract the relevant parts of the solution for verification based on whether
|
522 |
the original solution was correct or not. Always preserves the original question.
|
|
|
523 |
"""
|
524 |
# Extract the question using the specific markers
|
525 |
question_start = "Here is a test question"
|
@@ -531,7 +532,13 @@ def get_solution_for_verification(response_text, sympy_correct, final_verificati
|
|
531 |
|
532 |
if q_start == -1 or q_end == -1:
|
533 |
logger.error("Could not find question markers")
|
534 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
535 |
|
536 |
# Extract question and solution sections
|
537 |
question = response_text[q_start:q_end].strip()
|
@@ -562,19 +569,28 @@ def get_solution_for_verification(response_text, sympy_correct, final_verificati
|
|
562 |
# Fallback to original if no final verification
|
563 |
solution = original_solution.strip()
|
564 |
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
def verify_with_chatgpt(question_and_solution):
|
570 |
"""
|
571 |
Send the solution to ChatGPT for verification and grading.
|
572 |
Returns the verification response.
|
573 |
"""
|
574 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
# Construct the prompt for ChatGPT
|
576 |
verification_prompt = f"""As an expert mathematician, please verify and grade this mathematics solution.
|
577 |
-
|
578 |
Analyze the following aspects:
|
579 |
1. Mathematical Correctness (50 points):
|
580 |
- Are all calculations correct?
|
@@ -586,7 +602,7 @@ Analyze the following aspects:
|
|
586 |
- Are edge cases addressed?
|
587 |
- Are all required steps shown?
|
588 |
|
589 |
-
3. Clarity and Presentation (
|
590 |
- Is the solution well-organized?
|
591 |
- Are steps clearly explained?
|
592 |
- Is mathematical notation used correctly?
|
@@ -596,15 +612,20 @@ Analyze the following aspects:
|
|
596 |
- Are efficient methods used?
|
597 |
- Is mathematical insight demonstrated?
|
598 |
|
599 |
-
Question
|
600 |
-
{
|
|
|
|
|
|
|
|
|
|
|
601 |
|
602 |
Please provide:
|
603 |
1. A brief point-by-point analysis of the solution
|
604 |
2. Specific comments on any errors or oversights
|
605 |
3. Suggestions for improvement (if any)
|
606 |
4. A numerical score out of 100 based on the criteria above
|
607 |
-
5. Finally, present a complete revised solution to which you would award a score of 100
|
608 |
|
609 |
Important: When writing mathematical expressions, use these formatting rules re LaTeX
|
610 |
- Use $ for inline math
|
@@ -613,6 +634,10 @@ Important: When writing mathematical expressions, use these formatting rules re
|
|
613 |
- DO NOT use \\begin{{aligned}} or similar environments
|
614 |
- When writing questions involving currency expressed in dollars NEVER use the `$` symbol as it will be interepreted as math mode. ALWAYS write out the word dollars.
|
615 |
* Example: 1000 dollars
|
|
|
|
|
|
|
|
|
616 |
|
617 |
Format your response with clear headers and bullet points."""
|
618 |
|
@@ -642,10 +667,10 @@ def append_chatgpt_verification(initial_response, sympy_correct, final_verificat
|
|
642 |
"""
|
643 |
try:
|
644 |
# Get the appropriate solution text for verification
|
645 |
-
solution_text = get_solution_for_verification(initial_response, sympy_correct, final_verification)
|
646 |
|
647 |
# Get ChatGPT's verification
|
648 |
-
chatgpt_verification = verify_with_chatgpt(solution_text)
|
649 |
|
650 |
# Append verification to the response
|
651 |
full_response = f"{initial_response}\n\nChatGPT Verification and Grading:\n{chatgpt_verification}"
|
|
|
520 |
"""
|
521 |
Extract the relevant parts of the solution for verification based on whether
|
522 |
the original solution was correct or not. Always preserves the original question.
|
523 |
+
Returns tuple of (question, solution).
|
524 |
"""
|
525 |
# Extract the question using the specific markers
|
526 |
question_start = "Here is a test question"
|
|
|
532 |
|
533 |
if q_start == -1 or q_end == -1:
|
534 |
logger.error("Could not find question markers")
|
535 |
+
# Return best effort split - assume first paragraph is question
|
536 |
+
paragraphs = response_text.split('\n\n')
|
537 |
+
if len(paragraphs) > 1:
|
538 |
+
return paragraphs[0].strip(), '\n\n'.join(paragraphs[1:]).strip()
|
539 |
+
else:
|
540 |
+
# If we can't even split paragraphs, just return the whole text as both question and solution
|
541 |
+
return response_text.strip(), response_text.strip()
|
542 |
|
543 |
# Extract question and solution sections
|
544 |
question = response_text[q_start:q_end].strip()
|
|
|
569 |
# Fallback to original if no final verification
|
570 |
solution = original_solution.strip()
|
571 |
|
572 |
+
return question, solution
|
573 |
+
|
574 |
+
def verify_with_chatgpt(question, solution):
|
|
|
|
|
575 |
"""
|
576 |
Send the solution to ChatGPT for verification and grading.
|
577 |
Returns the verification response.
|
578 |
"""
|
579 |
try:
|
580 |
+
# Extract SymPy verification if it exists
|
581 |
+
sympy_start = solution.find("Here's the SymPy code")
|
582 |
+
sympy_end = solution.find("Verification Analysis:")
|
583 |
+
|
584 |
+
sympy_section = ""
|
585 |
+
main_solution = solution
|
586 |
+
|
587 |
+
if sympy_start != -1 and sympy_end != -1:
|
588 |
+
sympy_section = solution[sympy_start:sympy_end].strip()
|
589 |
+
main_solution = solution[:sympy_start].strip()
|
590 |
+
|
591 |
# Construct the prompt for ChatGPT
|
592 |
verification_prompt = f"""As an expert mathematician, please verify and grade this mathematics solution.
|
593 |
+
|
594 |
Analyze the following aspects:
|
595 |
1. Mathematical Correctness (50 points):
|
596 |
- Are all calculations correct?
|
|
|
602 |
- Are edge cases addressed?
|
603 |
- Are all required steps shown?
|
604 |
|
605 |
+
3. Clarity and Presentation (20 points):
|
606 |
- Is the solution well-organized?
|
607 |
- Are steps clearly explained?
|
608 |
- Is mathematical notation used correctly?
|
|
|
612 |
- Are efficient methods used?
|
613 |
- Is mathematical insight demonstrated?
|
614 |
|
615 |
+
Question:
|
616 |
+
{question}
|
617 |
+
|
618 |
+
Student's Solution:
|
619 |
+
{main_solution}
|
620 |
+
|
621 |
+
{f"The student also provided the following SymPy verification of their solution:\\n{sympy_section}" if sympy_section else ""}
|
622 |
|
623 |
Please provide:
|
624 |
1. A brief point-by-point analysis of the solution
|
625 |
2. Specific comments on any errors or oversights
|
626 |
3. Suggestions for improvement (if any)
|
627 |
4. A numerical score out of 100 based on the criteria above
|
628 |
+
5. Finally, if the numerical is less than 90 out of 100, present a complete revised solution to which you would award a score of 100
|
629 |
|
630 |
Important: When writing mathematical expressions, use these formatting rules re LaTeX
|
631 |
- Use $ for inline math
|
|
|
634 |
- DO NOT use \\begin{{aligned}} or similar environments
|
635 |
- When writing questions involving currency expressed in dollars NEVER use the `$` symbol as it will be interepreted as math mode. ALWAYS write out the word dollars.
|
636 |
* Example: 1000 dollars
|
637 |
+
- Never use parentheses or brackets around expressions, just use the dollar signs. For example:
|
638 |
+
* Write: $\\sin(x) \\approx x - \\frac{x^3}{6}$
|
639 |
+
* Not: (\\sin(x) \\approx x - \\frac{x^3}{6})
|
640 |
+
- Never use \\[ or \\] - always use $$ instead
|
641 |
|
642 |
Format your response with clear headers and bullet points."""
|
643 |
|
|
|
667 |
"""
|
668 |
try:
|
669 |
# Get the appropriate solution text for verification
|
670 |
+
question, solution_text = get_solution_for_verification(initial_response, sympy_correct, final_verification)
|
671 |
|
672 |
# Get ChatGPT's verification
|
673 |
+
chatgpt_verification = verify_with_chatgpt(question, solution_text)
|
674 |
|
675 |
# Append verification to the response
|
676 |
full_response = f"{initial_response}\n\nChatGPT Verification and Grading:\n{chatgpt_verification}"
|