Dannyar608 commited on
Commit
ebc14af
·
verified ·
1 Parent(s): df3101e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -36
app.py CHANGED
@@ -373,23 +373,24 @@ class TranscriptParser:
373
  'assessments': {}
374
  }
375
 
376
- # Extract student info
377
- student_info_match = re.search(r"(\d{7}) - (.*?)\n", text)
 
 
 
 
378
  if student_info_match:
379
  parsed_data['student_info']['id'] = student_info_match.group(1)
380
  parsed_data['student_info']['name'] = student_info_match.group(2).strip()
 
 
381
 
382
- # Extract grade and year info
383
- grade_match = re.search(r"Current Grade:\s*(\d+)", text)
384
- if grade_match:
385
- parsed_data['student_info']['grade'] = grade_match.group(1)
386
-
387
- yog_match = re.search(r"YOG\s*(\d{4})", text)
388
- if yog_match:
389
- parsed_data['student_info']['year_of_graduation'] = yog_match.group(1)
390
-
391
- # Extract GPA information
392
- gpa_matches = re.findall(r"(?:Un-weighted|Weighted)\s*GPA\s*([\d.]+)", text)
393
  if len(gpa_matches) >= 1:
394
  parsed_data['student_info']['unweighted_gpa'] = float(gpa_matches[0])
395
  if len(gpa_matches) >= 2:
@@ -414,7 +415,7 @@ class TranscriptParser:
414
  if virtual_grade_match:
415
  parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
416
 
417
- # Extract requirements - specific to this format
418
  req_section = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
419
  if req_section:
420
  req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
@@ -456,30 +457,44 @@ class TranscriptParser:
456
  status = parts[4]
457
  parsed_data['assessments'][name] = status
458
 
459
- # Extract course history - specific to this format
460
- course_section = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNum\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
 
 
 
 
 
461
  if course_section:
462
- course_lines = [line.strip() for line in course_section.group(1).split('\n') if line.strip()]
 
 
 
 
463
  for line in course_lines:
464
- if '|' in line:
465
- parts = [part.strip() for part in line.split('|')]
466
- if len(parts) >= 9:
467
- course = {
468
- 'requirement': parts[0],
469
- 'school_year': parts[1],
470
- 'grade_level': parts[2],
471
- 'course_code': parts[3],
472
- 'description': parts[4],
473
- 'term': parts[5],
474
- 'district_number': parts[6],
475
- 'fg': parts[7],
476
- 'included': parts[8],
477
- 'credits': parts[9] if len(parts) > 9 else "0"
478
- }
479
- # Handle inProgress credits
480
- if "inProgress" in course['credits'].lower():
481
- course['credits'] = "0"
482
- parsed_data['course_history'].append(course)
 
 
 
 
 
483
 
484
  return parsed_data
485
 
 
373
  'assessments': {}
374
  }
375
 
376
+ # Extract student info with more robust pattern
377
+ student_info_match = re.search(
378
+ r"(\d{7})\s*-\s*(.*?)\s*\n.*?Current Grade:\s*(\d+).*?YOG\s*(\d{4})",
379
+ text,
380
+ re.DOTALL
381
+ )
382
  if student_info_match:
383
  parsed_data['student_info']['id'] = student_info_match.group(1)
384
  parsed_data['student_info']['name'] = student_info_match.group(2).strip()
385
+ parsed_data['student_info']['grade'] = student_info_match.group(3)
386
+ parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4)
387
 
388
+ # More robust GPA extraction
389
+ gpa_matches = re.findall(
390
+ r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
391
+ text,
392
+ re.IGNORECASE
393
+ )
 
 
 
 
 
394
  if len(gpa_matches) >= 1:
395
  parsed_data['student_info']['unweighted_gpa'] = float(gpa_matches[0])
396
  if len(gpa_matches) >= 2:
 
415
  if virtual_grade_match:
416
  parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
417
 
418
+ # Extract requirements
419
  req_section = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
420
  if req_section:
421
  req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
 
457
  status = parts[4]
458
  parsed_data['assessments'][name] = status
459
 
460
+ # Extract course history with more fault-tolerant parsing
461
+ course_section = re.search(
462
+ r"Requirement.*?School Year.*?GradeLv1.*?CrsNum.*?Description.*?Term.*?DstNumber.*?FG.*?Incl.*?Credits(.*?)(?:Legend|\Z)",
463
+ text,
464
+ re.DOTALL | re.IGNORECASE
465
+ )
466
+
467
  if course_section:
468
+ course_lines = [
469
+ line.strip() for line in course_section.group(1).split('\n')
470
+ if line.strip() and '|' in line
471
+ ]
472
+
473
  for line in course_lines:
474
+ parts = [part.strip() for part in line.split('|')]
475
+
476
+ # Handle varying number of columns
477
+ if len(parts) >= 9:
478
+ course = {
479
+ 'requirement': parts[0] if len(parts) > 0 else "",
480
+ 'school_year': parts[1] if len(parts) > 1 else "",
481
+ 'grade_level': parts[2] if len(parts) > 2 else "",
482
+ 'course_code': parts[3] if len(parts) > 3 else "",
483
+ 'description': parts[4] if len(parts) > 4 else "",
484
+ 'term': parts[5] if len(parts) > 5 else "",
485
+ 'district_number': parts[6] if len(parts) > 6 else "",
486
+ 'fg': parts[7] if len(parts) > 7 else "",
487
+ 'included': parts[8] if len(parts) > 8 else "",
488
+ 'credits': parts[9] if len(parts) > 9 else "0"
489
+ }
490
+
491
+ # Handle "inProgress" and empty credits
492
+ if "inProgress" in course['credits'].lower() or not course['credits']:
493
+ course['credits'] = "0"
494
+ elif not course['credits'].replace('.','').isdigit():
495
+ course['credits'] = "0"
496
+
497
+ parsed_data['course_history'].append(course)
498
 
499
  return parsed_data
500