Dannyar608 commited on
Commit
ce9371b
·
verified ·
1 Parent(s): 55e2010

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -154
app.py CHANGED
@@ -173,7 +173,7 @@ def validate_age(age: Union[int, float, str]) -> int:
173
  def validate_file(file_obj) -> None:
174
  """Validate uploaded file."""
175
  if not file_obj:
176
- raise ValueError("Please upload a file first.")
177
 
178
  file_ext = os.path.splitext(file_obj.name)[1].lower()
179
  if file_ext not in ALLOWED_FILE_TYPES:
@@ -319,149 +319,122 @@ class TranscriptParser:
319
  raise ValueError(f"Couldn't parse transcript: {str(e)}")
320
 
321
  def _parse_miami_dade_format(self, text: str) -> Dict:
322
- """More flexible parser for Miami-Dade County Public Schools transcripts."""
323
- # Normalize text first
324
- text = re.sub(r'\s+', ' ', text) # Collapse multiple spaces
325
-
326
- # More flexible student info extraction
327
- student_match = re.search(
328
- r'(?:Student\s*ID[:]?\s*(\d+).*?Name[:]?\s*([A-Za-z\s,]+).*?'
329
- r'(?:Grade|Level)[:]?\s*(\d+).*?'
330
- r'(?:Grad|YOG)[:]?\s*(\d{4}).*?'
331
- r'(?:Unweighted\s*GPA)[:]?\s*([\d.]+).*?'
332
- r'(?:Weighted\s*GPA)[:]?\s*([\d.]+).*?'
333
- r'(?:Total\s*Credits)[:]?\s*([\d.]+).*?'
334
- r'(?:Comm\s*Serv|Service\s*Hours)[:]?\s*(\d+)',
335
- text, re.IGNORECASE | re.DOTALL
336
- )
337
-
338
- if student_match:
339
- self.student_data = {
340
- "id": student_match.group(1).strip(),
341
- "name": student_match.group(2).replace(",", ", ").strip().title(),
342
- "current_grade": student_match.group(3),
343
- "graduation_year": student_match.group(4),
344
- "unweighted_gpa": float(student_match.group(5)),
345
- "weighted_gpa": float(student_match.group(6)),
346
- "total_credits": float(student_match.group(7)),
347
- "community_service_hours": int(student_match.group(8))
348
- }
349
- else:
350
- # Fallback pattern if first one fails
351
- student_match = re.search(
352
- r'(\d{7})\s*(.*?)\s*(?:Grade|Grd)[:]?\s*(\d+)',
353
- text, re.IGNORECASE
354
- )
355
- if student_match:
356
- self.student_data = {
357
- "id": student_match.group(1).strip(),
358
- "name": student_match.group(2).strip().title(),
359
- "current_grade": student_match.group(3),
360
- "graduation_year": "",
361
- "unweighted_gpa": 0.0,
362
- "weighted_gpa": 0.0,
363
- "total_credits": 0.0,
364
- "community_service_hours": 0
365
- }
366
-
367
- # Extract requirements
368
- self.requirements = {}
369
- req_section = re.search(
370
- r"Code\s+Description\s+Required\s+Waived\s+Completed\s+Status(.*?)Total\s+\d+\.\d+\s+\d+\.\d+\s+\d+\.\d+\s+\d+%",
371
- text, re.DOTALL
372
- )
373
- if req_section:
374
- req_lines = req_section.group(1).strip().split('\n')
375
- for line in req_lines:
376
- line = line.strip()
377
- if not line:
378
- continue
379
-
380
- req_match = re.match(r"([A-Z]-[^\s]+)\s+(.+?)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)%", line)
381
- if req_match:
382
- code = req_match.group(1).strip()
383
- self.requirements[code] = {
384
- "description": req_match.group(2).strip(),
385
- "required": float(req_match.group(3)),
386
- "waived": float(req_match.group(4)),
387
- "completed": float(req_match.group(5)),
388
- "status": f"{req_match.group(6)}%"
389
- }
390
-
391
- # Extract course history
392
- self.course_history = []
393
- course_section = re.search(
394
- r"Requirement\s+School Year\s+GradeLv1\s+CrsNum\s+Description\s+Term\s+DstNumber\s+FG\s+Incl\s+Credits(.*?)Legend for Incl",
395
- text, re.DOTALL
396
- )
397
 
398
- if course_section:
399
- course_lines = course_section.group(1).strip().split('\n')
400
- for line in course_lines:
401
- line = line.strip()
402
- if not line or line.startswith('='):
403
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
- # Handle both regular and in-progress courses
406
- course_match = re.match(
407
- r"([A-Z]-[^\s]+)?\s*(\d{4}-\d{4}|\d{4})?\s*(\d{2})?\s*([A-Z0-9]+)?\s*(.+?)\s+([AT12]+)?\s*([A-Z0-9]+)?\s*([A-Z])?\s*([A-Z])?\s*(inProgress|\d+\.\d+)?",
408
- line
409
- )
410
-
411
- if course_match:
412
- self.course_history.append({
413
- "requirement_category": course_match.group(1) if course_match.group(1) else None,
414
- "school_year": course_match.group(2) if course_match.group(2) else None,
415
- "grade_level": course_match.group(3) if course_match.group(3) else None,
416
- "course_code": course_match.group(4) if course_match.group(4) else None,
417
- "description": course_match.group(5).strip() if course_match.group(5) else None,
418
- "term": course_match.group(6) if course_match.group(6) else None,
419
- "district_number": course_match.group(7) if course_match.group(7) else None,
420
- "grade": course_match.group(8) if course_match.group(8) else None,
421
- "inclusion_status": course_match.group(9) if course_match.group(9) else None,
422
- "credits": course_match.group(10) if course_match.group(10) else None
423
- })
424
-
425
- # Extract in-progress courses
426
- self._extract_current_courses()
427
- self._calculate_completion()
428
-
429
- return {
430
- "student_info": self.student_data,
431
- "requirements": self.requirements,
432
- "current_courses": self.current_courses,
433
- "course_history": self.course_history,
434
- "graduation_status": self.graduation_status,
435
- "format": "miami_dade"
436
  }
437
-
438
- def _extract_current_courses(self):
439
- """Identify in-progress courses."""
440
- self.current_courses = [
441
- {
442
- "course": c["description"],
443
- "code": c["course_code"],
444
- "category": c["requirement_category"],
445
- "term": c["term"],
446
- "credits": c["credits"],
447
- "grade_level": c["grade_level"]
448
- }
449
- for c in self.course_history
450
- if c.get("credits") and isinstance(c["credits"], str) and c["credits"].lower() == "inprogress"
451
- ]
452
-
453
- def _calculate_completion(self):
454
- """Compute graduation readiness."""
455
- total_required = sum(req["required"] for req in self.requirements.values())
456
- total_completed = sum(req["completed"] for req in self.requirements.values())
457
-
458
- self.graduation_status.update({
459
- "total_required_credits": total_required,
460
- "total_completed_credits": total_completed,
461
- "percent_complete": round((total_completed / total_required) * 100, 1) if total_required > 0 else 0,
462
- "remaining_credits": total_required - total_completed,
463
- "on_track": (total_completed / total_required) >= 0.75 if total_required > 0 else False
464
- })
465
 
466
  def format_transcript_output(data: Dict) -> str:
467
  """Enhanced formatting for transcript output with format awareness"""
@@ -500,24 +473,25 @@ def format_transcript_output(data: Dict) -> str:
500
 
501
  # Detailed Requirements
502
  output.append("### Detailed Requirements:")
503
- for code, req in data.get("requirements", {}).items():
504
  output.append(
505
- f"- **{code}**: {req.get('description', '')}\n"
506
  f" Required: {req['required']} | Completed: {req['completed']} | "
507
  f"Status: {req['status']}"
508
  )
509
  output.append("")
510
 
511
  # Current Courses
512
- if data.get("current_courses"):
513
  output.append("## Current Courses (In Progress)\n" + '='*50)
514
- for course in data["current_courses"]:
515
- output.append(
516
- f"- **{course['code']} {course['course']}**\n"
517
- f" Category: {course['category']} | "
518
- f"Grade Level: {course['grade_level']} | "
519
- f"Term: {course['term']} | Credits: {course['credits']}"
520
- )
 
521
  output.append("")
522
 
523
  # Course History by Year
@@ -532,9 +506,9 @@ def format_transcript_output(data: Dict) -> str:
532
  output.append(f"\n### {year}")
533
  for course in courses_by_year[year]:
534
  output.append(
535
- f"- **{course.get('course_code', '')} {course.get('description', 'Unnamed course')}**\n"
536
- f" Subject: {course.get('requirement_category', 'N/A')} | "
537
- f"Grade: {course.get('grade', 'N/A')} | "
538
  f"Credits: {course.get('credits', 'N/A')}"
539
  )
540
 
 
173
  def validate_file(file_obj) -> None:
174
  """Validate uploaded file."""
175
  if not file_obj:
176
+ raise ValueError("Please upload a file first")
177
 
178
  file_ext = os.path.splitext(file_obj.name)[1].lower()
179
  if file_ext not in ALLOWED_FILE_TYPES:
 
319
  raise ValueError(f"Couldn't parse transcript: {str(e)}")
320
 
321
  def _parse_miami_dade_format(self, text: str) -> Dict:
322
+ """Parse Miami-Dade County Public Schools transcripts."""
323
+ # Initialize PDF reader from text (simulating the PDF structure)
324
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
325
+
326
+ # Initialize data structure
327
+ data = {
328
+ 'student_info': {},
329
+ 'graduation_requirements': [],
330
+ 'course_history': [],
331
+ 'summary': {}
332
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
+ # Parse student information
335
+ student_info_lines = []
336
+ for line in lines:
337
+ if "DORAL ACADEMY HIGH SCHOOL" in line:
338
+ student_info_lines = lines[lines.index(line):lines.index(line)+5]
339
+ break
340
+
341
+ if student_info_lines:
342
+ # Parse school and cohort info
343
+ school_info = student_info_lines[0].split('|')
344
+ data['student_info']['school'] = school_info[1].strip()
345
+ data['student_info']['district'] = school_info[2].strip()
346
+
347
+ # Parse student name and ID
348
+ name_id_line = student_info_lines[1].split('-')
349
+ data['student_info']['student_id'] = name_id_line[0].strip()
350
+ data['student_info']['student_name'] = name_id_line[1].split(',')[1].strip() + " " + name_id_line[1].split(',')[0].strip()
351
+
352
+ # Parse academic info
353
+ academic_info = student_info_lines[2].split('|')
354
+ data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip()
355
+ data['student_info']['graduation_year'] = academic_info[2].strip()
356
+ data['student_info']['weighted_gpa'] = academic_info[3].split(':')[1].strip()
357
+ data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip()
358
+ data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip()
359
+
360
+ # Parse graduation requirements
361
+ requirements_start = None
362
+ requirements_end = None
363
+ for i, line in enumerate(lines):
364
+ if "Code" in line and "Description" in line and "Required" in line:
365
+ requirements_start = i + 1
366
+ if requirements_start and "Total" in line:
367
+ requirements_end = i
368
+ break
369
+
370
+ if requirements_start and requirements_end:
371
+ for line in lines[requirements_start:requirements_end]:
372
+ if '|' in line:
373
+ parts = [p.strip() for p in line.split('|') if p.strip()]
374
+ if len(parts) >= 6:
375
+ req = {
376
+ 'code': parts[0],
377
+ 'description': parts[1],
378
+ 'required': parts[2],
379
+ 'waived': parts[3],
380
+ 'completed': parts[4],
381
+ 'status': parts[5]
382
+ }
383
+ data['graduation_requirements'].append(req)
384
+
385
+ # Parse total line
386
+ total_line = lines[requirements_end]
387
+ total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
388
+ if len(total_parts) >= 5:
389
+ data['summary']['total_required'] = total_parts[1]
390
+ data['summary']['total_waived'] = total_parts[2]
391
+ data['summary']['total_completed'] = total_parts[3]
392
+ data['summary']['completion_percentage'] = total_parts[4]
393
+
394
+ # Parse course history
395
+ course_history_start = None
396
+ for i, line in enumerate(lines):
397
+ if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
398
+ course_history_start = i + 1
399
+ break
400
+
401
+ if course_history_start:
402
+ current_requirement = None
403
+ for line in lines[course_history_start:]:
404
+ if '|' in line:
405
+ parts = [p.strip() for p in line.split('|') if p.strip()]
406
 
407
+ # Check if this is a new requirement line
408
+ if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
409
+ current_requirement = parts[0]
410
+ parts = parts[1:] # Remove the requirement code
411
+
412
+ if len(parts) >= 9:
413
+ course = {
414
+ 'requirement': current_requirement,
415
+ 'school_year': parts[0],
416
+ 'grade_level': parts[1],
417
+ 'course_number': parts[2],
418
+ 'description': parts[3],
419
+ 'term': parts[4],
420
+ 'district_number': parts[5],
421
+ 'fg': parts[6],
422
+ 'included': parts[7],
423
+ 'credits': parts[8]
424
+ }
425
+ data['course_history'].append(course)
426
+
427
+ # Calculate graduation status
428
+ graduation_status = {
429
+ 'total_required_credits': float(data['summary']['total_required']),
430
+ 'total_completed_credits': float(data['summary']['total_completed']),
431
+ 'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
432
+ 'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
433
+ 'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
 
 
 
 
434
  }
435
+ data['graduation_status'] = graduation_status
436
+
437
+ return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  def format_transcript_output(data: Dict) -> str:
440
  """Enhanced formatting for transcript output with format awareness"""
 
473
 
474
  # Detailed Requirements
475
  output.append("### Detailed Requirements:")
476
+ for req in data.get("graduation_requirements", []):
477
  output.append(
478
+ f"- **{req['code']}**: {req['description']}\n"
479
  f" Required: {req['required']} | Completed: {req['completed']} | "
480
  f"Status: {req['status']}"
481
  )
482
  output.append("")
483
 
484
  # Current Courses
485
+ if any(c.get('credits', '') == 'inProgress' for c in data.get("course_history", [])):
486
  output.append("## Current Courses (In Progress)\n" + '='*50)
487
+ for course in data["course_history"]:
488
+ if course.get('credits', '') == 'inProgress':
489
+ output.append(
490
+ f"- **{course['course_number']} {course['description']}**\n"
491
+ f" Category: {course['requirement']} | "
492
+ f"Grade Level: {course['grade_level']} | "
493
+ f"Term: {course['term']} | Credits: {course['credits']}"
494
+ )
495
  output.append("")
496
 
497
  # Course History by Year
 
506
  output.append(f"\n### {year}")
507
  for course in courses_by_year[year]:
508
  output.append(
509
+ f"- **{course.get('course_number', '')} {course.get('description', 'Unnamed course')}**\n"
510
+ f" Subject: {course.get('requirement', 'N/A')} | "
511
+ f"Grade: {course.get('fg', 'N/A')} | "
512
  f"Credits: {course.get('credits', 'N/A')}"
513
  )
514