Dannyar608 commited on
Commit
e34ea28
·
verified ·
1 Parent(s): 794a977

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -35
app.py CHANGED
@@ -4,9 +4,44 @@ import json
4
  import os
5
  import re
6
  from PyPDF2 import PdfReader
 
7
 
8
  # ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def parse_transcript(file):
11
  if file.name.endswith('.csv'):
12
  df = pd.read_csv(file)
@@ -17,59 +52,40 @@ def parse_transcript(file):
17
  reader = PdfReader(file)
18
  for page in reader.pages:
19
  text += page.extract_text() + '\n'
20
- return extract_info_from_pdf(text)
 
 
 
 
 
 
 
 
 
 
 
 
21
  else:
22
  return "Unsupported file format", None
23
 
24
- # Extract GPA (try multiple possible column names)
25
  gpa = "N/A"
26
  for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
27
  if col in df.columns:
28
  gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
29
  break
30
 
31
- # Extract grade level (try multiple possible column names)
32
  grade_level = "N/A"
33
  for col in ['Grade Level', 'Grade', 'Class', 'Year']:
34
  if col in df.columns:
35
  grade_level = df[col].iloc[0]
36
  break
37
 
38
- # Extract courses (current and past)
39
  courses = []
40
  for col in ['Course', 'Subject', 'Course Name', 'Class']:
41
  if col in df.columns:
42
  courses = df[col].tolist()
43
  break
44
 
45
- # Create output display
46
- output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
47
- output_text += "\n".join(f"- {course}" for course in courses)
48
-
49
- return output_text, {
50
- "gpa": gpa,
51
- "grade_level": grade_level,
52
- "courses": courses
53
- }
54
-
55
- def extract_info_from_pdf(text):
56
- # Extract GPA
57
- gpa_match = re.search(r"(GPA|Grade Point Average)[:\s]+(\d\.\d+)", text, re.IGNORECASE)
58
- gpa = float(gpa_match.group(2)) if gpa_match else "N/A"
59
-
60
- # Extract grade level
61
- grade_match = re.search(r"(Grade|Year)[:\s]+(\d+|Freshman|Sophomore|Junior|Senior)", text, re.IGNORECASE)
62
- grade_level = grade_match.group(2) if grade_match else "N/A"
63
-
64
- # Extract courses - improved pattern to catch more course formats
65
- course_pattern = r"""
66
- (?:[A-Z]{2,}\s?\d{3}) # Course codes like 'MATH 101' or 'ENG101'
67
- |[A-Z][a-z]+(?:\s[A-Z][a-z]+)* # Or full course names
68
- """
69
- courses = re.findall(course_pattern, text, re.VERBOSE)
70
- courses = list(set(courses)) # Remove duplicates
71
-
72
- # Create output display
73
  output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
74
  output_text += "\n".join(f"- {course}" for course in courses)
75
 
@@ -131,7 +147,17 @@ def save_profile(name, age, interests, transcript, learning_style, favorites, bl
131
  def transcript_display(transcript_dict):
132
  if not transcript_dict:
133
  return "No transcript uploaded."
134
- return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
 
 
 
 
 
 
 
 
 
 
135
  [f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])
136
 
137
  # ========== GRADIO INTERFACE ==========
@@ -190,10 +216,10 @@ with gr.Blocks() as app:
190
  }
191
  return save_profile(name, age, interests, transcript, learning_style, favorites, blog)
192
 
193
- save_btn.click(fn=gather_and_save,
194
  inputs=[name, age, interests, movie, movie_reason, show, show_reason,
195
  book, book_reason, character, character_reason, blog_text,
196
  transcript_data, learning_output],
197
  outputs=output_summary)
198
 
199
- app.launch()
 
4
  import os
5
  import re
6
  from PyPDF2 import PdfReader
7
+ from collections import defaultdict
8
 
9
  # ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
10
 
11
+ def extract_courses_with_grade_levels(text):
12
+ grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
13
+ grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
14
+ current_grade_level = grade_match.group(2) if grade_match else "Unknown"
15
+
16
+ course_pattern = r"""
17
+ (?:^|\n)
18
+ (?: (Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\s*[\n-]* )?
19
+ (
20
+ (?:[A-Z]{2,}\s?\d{3})
21
+ |
22
+ [A-Z][a-z]+(?:\s[A-Z][a-z]+)*
23
+ )
24
+ \s*
25
+ (?: [:\-]?\s* ([A-F][+-]?|\d{2,3}%)? )?
26
+ """
27
+
28
+ courses_by_grade = defaultdict(list)
29
+ current_grade = current_grade_level
30
+
31
+ for match in re.finditer(course_pattern, text, re.VERBOSE | re.MULTILINE):
32
+ grade_context, grade_level, course, grade = match.groups()
33
+
34
+ if grade_context:
35
+ current_grade = grade_level
36
+
37
+ if course:
38
+ course_info = {"course": course.strip()}
39
+ if grade:
40
+ course_info["grade"] = grade.strip()
41
+ courses_by_grade[current_grade].append(course_info)
42
+
43
+ return dict(courses_by_grade)
44
+
45
  def parse_transcript(file):
46
  if file.name.endswith('.csv'):
47
  df = pd.read_csv(file)
 
52
  reader = PdfReader(file)
53
  for page in reader.pages:
54
  text += page.extract_text() + '\n'
55
+ courses_by_grade = extract_courses_with_grade_levels(text)
56
+
57
+ output_text = "Courses by Grade Level:\n\n"
58
+ for grade_level, courses in courses_by_grade.items():
59
+ output_text += f"Grade {grade_level}:\n"
60
+ for course in courses:
61
+ output_text += f"- {course['course']}"
62
+ if 'grade' in course:
63
+ output_text += f" (Grade: {course['grade']})"
64
+ output_text += "\n"
65
+ output_text += "\n"
66
+
67
+ return output_text, courses_by_grade
68
  else:
69
  return "Unsupported file format", None
70
 
 
71
  gpa = "N/A"
72
  for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
73
  if col in df.columns:
74
  gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
75
  break
76
 
 
77
  grade_level = "N/A"
78
  for col in ['Grade Level', 'Grade', 'Class', 'Year']:
79
  if col in df.columns:
80
  grade_level = df[col].iloc[0]
81
  break
82
 
 
83
  courses = []
84
  for col in ['Course', 'Subject', 'Course Name', 'Class']:
85
  if col in df.columns:
86
  courses = df[col].tolist()
87
  break
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
90
  output_text += "\n".join(f"- {course}" for course in courses)
91
 
 
147
  def transcript_display(transcript_dict):
148
  if not transcript_dict:
149
  return "No transcript uploaded."
150
+ if isinstance(transcript_dict, dict) and all(isinstance(v, list) for v in transcript_dict.values()):
151
+ display = ""
152
+ for grade_level, courses in transcript_dict.items():
153
+ display += f"\n**Grade {grade_level}**\n"
154
+ for course in courses:
155
+ display += f"- {course['course']}"
156
+ if 'grade' in course:
157
+ display += f" (Grade: {course['grade']})"
158
+ display += "\n"
159
+ return display
160
+ return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
161
  [f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])
162
 
163
  # ========== GRADIO INTERFACE ==========
 
216
  }
217
  return save_profile(name, age, interests, transcript, learning_style, favorites, blog)
218
 
219
+ save_btn.click(fn=gather_and_save,
220
  inputs=[name, age, interests, movie, movie_reason, show, show_reason,
221
  book, book_reason, character, character_reason, blog_text,
222
  transcript_data, learning_output],
223
  outputs=output_summary)
224
 
225
+ app.launch()