Dannyar608 commited on
Commit
88b63b2
·
verified ·
1 Parent(s): 85bd875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -3
app.py CHANGED
@@ -32,7 +32,7 @@ class UniversalTranscriptParser:
32
  '07': '7th Grade', '08': '8th Grade', 'MA': 'Middle School'
33
  }
34
 
35
- def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]:
36
  """Determine transcript type and parse accordingly"""
37
  transcript_type = self._identify_transcript_type(text)
38
 
@@ -51,7 +51,7 @@ class UniversalTranscriptParser:
51
  return 'doral_academy'
52
  return 'miami_dade'
53
 
54
- def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]:
55
  """Parse homeschool transcript format"""
56
  courses = []
57
  current_grade = None
@@ -176,7 +176,7 @@ class UniversalTranscriptParser:
176
  'grade_level': grade_level
177
  }
178
 
179
- def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]:
180
  """Parse standard Miami-Dade format"""
181
  courses = []
182
  courses_by_grade = defaultdict(list)
@@ -230,6 +230,33 @@ class UniversalTranscriptParser:
230
  'grade_level': grade_level
231
  }
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  def extract_gpa(text, gpa_type):
234
  pattern = rf'{gpa_type}\s*([\d\.]+)'
235
  match = re.search(pattern, text)
 
32
  '07': '7th Grade', '08': '8th Grade', 'MA': 'Middle School'
33
  }
34
 
35
+ def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
36
  """Determine transcript type and parse accordingly"""
37
  transcript_type = self._identify_transcript_type(text)
38
 
 
51
  return 'doral_academy'
52
  return 'miami_dade'
53
 
54
+ def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
55
  """Parse homeschool transcript format"""
56
  courses = []
57
  current_grade = None
 
176
  'grade_level': grade_level
177
  }
178
 
179
+ def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
180
  """Parse standard Miami-Dade format"""
181
  courses = []
182
  courses_by_grade = defaultdict(list)
 
230
  'grade_level': grade_level
231
  }
232
 
233
+ # Helper methods for pattern compilation
234
+ def _compile_miami_dade_patterns(self):
235
+ return {
236
+ 'student': re.compile(r'Current Grade:\s*(\d+).*YOG\s*(\d{4})'),
237
+ 'course': re.compile(
238
+ r'([A-Z]-[A-Za-z\s&]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([A-Z]?)\s*\|\s*([A-Z]?)\s*\|\s*([^\|]+)',
239
+ re.MULTILINE
240
+ )
241
+ }
242
+
243
+ def _compile_homeschool_patterns(self):
244
+ return {
245
+ 'student': re.compile(r'Student Name:\s*(.+)\s*SSN:'),
246
+ 'course': re.compile(
247
+ r'^\|?\s*([^\|]+?)\s*\|\s*([A-Z][+*]?)\s*\|\s*([^\|]+)\s*\|\s*(\d+\.?\d*)\s*\|\s*(\d+)'
248
+ )
249
+ }
250
+
251
+ def _compile_doral_academy_patterns(self):
252
+ return {
253
+ 'student': re.compile(r'LEGAL NAME:\s*([^\n]+)'),
254
+ 'course': re.compile(
255
+ r'(\d)\s+(\d{7})\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+(\d\.\d{2})\s+(\d\.\d{2})',
256
+ re.MULTILINE
257
+ )
258
+ }
259
+
260
  def extract_gpa(text, gpa_type):
261
  pattern = rf'{gpa_type}\s*([\d\.]+)'
262
  match = re.search(pattern, text)