Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on Apr 25

Commit

e34ea28

verified ·

1 Parent(s): 794a977

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -35

app.py CHANGED Viewed

@@ -4,9 +4,44 @@ import json
 import os
 import re
 from PyPDF2 import PdfReader
 # ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
 def parse_transcript(file):
     if file.name.endswith('.csv'):
         df = pd.read_csv(file)
@@ -17,59 +52,40 @@ def parse_transcript(file):
         reader = PdfReader(file)
         for page in reader.pages:
             text += page.extract_text() + '\n'
-        return extract_info_from_pdf(text)
     else:
         return "Unsupported file format", None
-    # Extract GPA (try multiple possible column names)
     gpa = "N/A"
     for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
         if col in df.columns:
             gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
             break
-    # Extract grade level (try multiple possible column names)
     grade_level = "N/A"
     for col in ['Grade Level', 'Grade', 'Class', 'Year']:
         if col in df.columns:
             grade_level = df[col].iloc[0]
             break
-    # Extract courses (current and past)
     courses = []
     for col in ['Course', 'Subject', 'Course Name', 'Class']:
         if col in df.columns:
             courses = df[col].tolist()
             break
-    # Create output display
-    output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
-    output_text += "\n".join(f"- {course}" for course in courses)
-    return output_text, {
-        "gpa": gpa,
-        "grade_level": grade_level,
-        "courses": courses
-    }
-def extract_info_from_pdf(text):
-    # Extract GPA
-    gpa_match = re.search(r"(GPA|Grade Point Average)[:\s]+(\d\.\d+)", text, re.IGNORECASE)
-    gpa = float(gpa_match.group(2)) if gpa_match else "N/A"
-    # Extract grade level
-    grade_match = re.search(r"(Grade|Year)[:\s]+(\d+|Freshman|Sophomore|Junior|Senior)", text, re.IGNORECASE)
-    grade_level = grade_match.group(2) if grade_match else "N/A"
-    # Extract courses - improved pattern to catch more course formats
-    course_pattern = r"""
-        (?:[A-Z]{2,}\s?\d{3})          # Course codes like 'MATH 101' or 'ENG101'
-        |[A-Z][a-z]+(?:\s[A-Z][a-z]+)*  # Or full course names
-    """
-    courses = re.findall(course_pattern, text, re.VERBOSE)
-    courses = list(set(courses))  # Remove duplicates
-    # Create output display
     output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
     output_text += "\n".join(f"- {course}" for course in courses)
@@ -131,7 +147,17 @@ def save_profile(name, age, interests, transcript, learning_style, favorites, bl
 def transcript_display(transcript_dict):
     if not transcript_dict:
         return "No transcript uploaded."
-    return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
                      [f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])
 # ========== GRADIO INTERFACE ==========
@@ -190,10 +216,10 @@ with gr.Blocks() as app:
             }
             return save_profile(name, age, interests, transcript, learning_style, favorites, blog)
-        save_btn.click(fn=gather_and_save,
                        inputs=[name, age, interests, movie, movie_reason, show, show_reason,
                                book, book_reason, character, character_reason, blog_text,
                                transcript_data, learning_output],
                        outputs=output_summary)
-app.launch()

 import os
 import re
 from PyPDF2 import PdfReader
+from collections import defaultdict
 # ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
+def extract_courses_with_grade_levels(text):
+    grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
+    grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
+    current_grade_level = grade_match.group(2) if grade_match else "Unknown"
+    course_pattern = r"""
+        (?:^|\n)
+        (?: (Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\s*[\n-]* )?
+        (
+            (?:[A-Z]{2,}\s?\d{3})
+            |
+            [A-Z][a-z]+(?:\s[A-Z][a-z]+)*
+        )
+        \s*
+        (?: [:\-]?\s* ([A-F][+-]?|\d{2,3}%)? )?
+    """
+    courses_by_grade = defaultdict(list)
+    current_grade = current_grade_level
+    for match in re.finditer(course_pattern, text, re.VERBOSE | re.MULTILINE):
+        grade_context, grade_level, course, grade = match.groups()
+        if grade_context:
+            current_grade = grade_level
+        if course:
+            course_info = {"course": course.strip()}
+            if grade:
+                course_info["grade"] = grade.strip()
+            courses_by_grade[current_grade].append(course_info)
+    return dict(courses_by_grade)
 def parse_transcript(file):
     if file.name.endswith('.csv'):
         df = pd.read_csv(file)
         reader = PdfReader(file)
         for page in reader.pages:
             text += page.extract_text() + '\n'
+        courses_by_grade = extract_courses_with_grade_levels(text)
+        output_text = "Courses by Grade Level:\n\n"
+        for grade_level, courses in courses_by_grade.items():
+            output_text += f"Grade {grade_level}:\n"
+            for course in courses:
+                output_text += f"- {course['course']}"
+                if 'grade' in course:
+                    output_text += f" (Grade: {course['grade']})"
+                output_text += "\n"
+            output_text += "\n"
+        return output_text, courses_by_grade
     else:
         return "Unsupported file format", None
     gpa = "N/A"
     for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
         if col in df.columns:
             gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
             break
     grade_level = "N/A"
     for col in ['Grade Level', 'Grade', 'Class', 'Year']:
         if col in df.columns:
             grade_level = df[col].iloc[0]
             break
     courses = []
     for col in ['Course', 'Subject', 'Course Name', 'Class']:
         if col in df.columns:
             courses = df[col].tolist()
             break
     output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
     output_text += "\n".join(f"- {course}" for course in courses)
 def transcript_display(transcript_dict):
     if not transcript_dict:
         return "No transcript uploaded."
+    if isinstance(transcript_dict, dict) and all(isinstance(v, list) for v in transcript_dict.values()):
+        display = ""
+        for grade_level, courses in transcript_dict.items():
+            display += f"\n**Grade {grade_level}**\n"
+            for course in courses:
+                display += f"- {course['course']}"
+                if 'grade' in course:
+                    display += f" (Grade: {course['grade']})"
+                display += "\n"
+        return display
+    return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
                      [f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])
 # ========== GRADIO INTERFACE ==========
             }
             return save_profile(name, age, interests, transcript, learning_style, favorites, blog)
+        save_btn.click(fn=gather_and_save,
                        inputs=[name, age, interests, movie, movie_reason, show, show_reason,
                                book, book_reason, character, character_reason, blog_text,
                                transcript_data, learning_output],
                        outputs=output_summary)
+app.launch()