Dannyar608 commited on
Commit
e21d148
·
verified ·
1 Parent(s): ce9371b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -73
app.py CHANGED
@@ -36,9 +36,9 @@ SESSION_TIMEOUT = 3600 # 1 hour session timeout
36
 
37
  # Initialize logging
38
  logging.basicConfig(
39
- filename='transcript_parser.log',
40
  level=logging.DEBUG,
41
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 
42
  )
43
 
44
  # Model configuration - Only DeepSeek
@@ -318,7 +318,7 @@ class TranscriptParser:
318
  logging.error(f"Error parsing transcript: {str(e)}")
319
  raise ValueError(f"Couldn't parse transcript: {str(e)}")
320
 
321
- def _parse_miami_dade_format(self, text: str) -> Dict:
322
  """Parse Miami-Dade County Public Schools transcripts."""
323
  # Initialize PDF reader from text (simulating the PDF structure)
324
  lines = [line.strip() for line in text.split('\n') if line.strip()]
@@ -328,39 +328,62 @@ class TranscriptParser:
328
  'student_info': {},
329
  'graduation_requirements': [],
330
  'course_history': [],
331
- 'summary': {}
 
332
  }
333
 
334
- # Parse student information
335
  student_info_lines = []
336
- for line in lines:
 
337
  if "DORAL ACADEMY HIGH SCHOOL" in line:
338
- student_info_lines = lines[lines.index(line):lines.index(line)+5]
 
339
  break
340
 
341
  if student_info_lines:
342
- # Parse school and cohort info
343
- school_info = student_info_lines[0].split('|')
344
- data['student_info']['school'] = school_info[1].strip()
345
- data['student_info']['district'] = school_info[2].strip()
346
-
347
- # Parse student name and ID
348
- name_id_line = student_info_lines[1].split('-')
349
- data['student_info']['student_id'] = name_id_line[0].strip()
350
- data['student_info']['student_name'] = name_id_line[1].split(',')[1].strip() + " " + name_id_line[1].split(',')[0].strip()
351
-
352
- # Parse academic info
353
- academic_info = student_info_lines[2].split('|')
354
- data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip()
355
- data['student_info']['graduation_year'] = academic_info[2].strip()
356
- data['student_info']['weighted_gpa'] = academic_info[3].split(':')[1].strip()
357
- data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip()
358
- data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
  # Parse graduation requirements
361
  requirements_start = None
362
  requirements_end = None
363
  for i, line in enumerate(lines):
 
364
  if "Code" in line and "Description" in line and "Required" in line:
365
  requirements_start = i + 1
366
  if requirements_start and "Total" in line:
@@ -369,31 +392,42 @@ class TranscriptParser:
369
 
370
  if requirements_start and requirements_end:
371
  for line in lines[requirements_start:requirements_end]:
372
- if '|' in line:
373
- parts = [p.strip() for p in line.split('|') if p.strip()]
374
- if len(parts) >= 6:
375
- req = {
376
- 'code': parts[0],
377
- 'description': parts[1],
378
- 'required': parts[2],
379
- 'waived': parts[3],
380
- 'completed': parts[4],
381
- 'status': parts[5]
382
- }
383
- data['graduation_requirements'].append(req)
 
 
 
 
 
384
 
385
  # Parse total line
386
- total_line = lines[requirements_end]
387
- total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
388
- if len(total_parts) >= 5:
389
- data['summary']['total_required'] = total_parts[1]
390
- data['summary']['total_waived'] = total_parts[2]
391
- data['summary']['total_completed'] = total_parts[3]
392
- data['summary']['completion_percentage'] = total_parts[4]
 
 
 
 
 
393
 
394
  # Parse course history
395
  course_history_start = None
396
  for i, line in enumerate(lines):
 
397
  if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
398
  course_history_start = i + 1
399
  break
@@ -401,38 +435,49 @@ class TranscriptParser:
401
  if course_history_start:
402
  current_requirement = None
403
  for line in lines[course_history_start:]:
404
- if '|' in line:
405
- parts = [p.strip() for p in line.split('|') if p.strip()]
406
-
407
- # Check if this is a new requirement line
408
- if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
409
- current_requirement = parts[0]
410
- parts = parts[1:] # Remove the requirement code
411
-
412
- if len(parts) >= 9:
413
- course = {
414
- 'requirement': current_requirement,
415
- 'school_year': parts[0],
416
- 'grade_level': parts[1],
417
- 'course_number': parts[2],
418
- 'description': parts[3],
419
- 'term': parts[4],
420
- 'district_number': parts[5],
421
- 'fg': parts[6],
422
- 'included': parts[7],
423
- 'credits': parts[8]
424
- }
425
- data['course_history'].append(course)
 
 
 
 
 
426
 
427
  # Calculate graduation status
428
- graduation_status = {
429
- 'total_required_credits': float(data['summary']['total_required']),
430
- 'total_completed_credits': float(data['summary']['total_completed']),
431
- 'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
432
- 'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
433
- 'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
434
- }
435
- data['graduation_status'] = graduation_status
 
 
 
 
 
 
436
 
437
  return data
438
 
 
36
 
37
  # Initialize logging
38
  logging.basicConfig(
 
39
  level=logging.DEBUG,
40
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
41
+ filename='transcript_parser.log'
42
  )
43
 
44
  # Model configuration - Only DeepSeek
 
318
  logging.error(f"Error parsing transcript: {str(e)}")
319
  raise ValueError(f"Couldn't parse transcript: {str(e)}")
320
 
321
+ def _parse_miami_dade_format(self, text: str, strict_mode: bool = False) -> Dict:
322
  """Parse Miami-Dade County Public Schools transcripts."""
323
  # Initialize PDF reader from text (simulating the PDF structure)
324
  lines = [line.strip() for line in text.split('\n') if line.strip()]
 
328
  'student_info': {},
329
  'graduation_requirements': [],
330
  'course_history': [],
331
+ 'summary': {},
332
+ 'format': 'miami_dade' # Add format identifier
333
  }
334
 
335
+ # Parse student information with more robust checks
336
  student_info_lines = []
337
+ for i, line in enumerate(lines):
338
+ logging.debug(f"Processing line: {line}") # Added debug logging
339
  if "DORAL ACADEMY HIGH SCHOOL" in line:
340
+ # Get the next 4 lines (or fewer if we're at the end)
341
+ student_info_lines = lines[i:i+5]
342
  break
343
 
344
  if student_info_lines:
345
+ try:
346
+ # Parse school and cohort info - more defensive
347
+ school_info_parts = student_info_lines[0].split('|')
348
+ if len(school_info_parts) > 2:
349
+ data['student_info']['school'] = school_info_parts[1].strip() if len(school_info_parts) > 1 else ''
350
+ data['student_info']['district'] = school_info_parts[2].strip() if len(school_info_parts) > 2 else ''
351
+
352
+ # Parse student name and ID - more defensive
353
+ if len(student_info_lines) > 1:
354
+ name_id_line = student_info_lines[1].split('-')
355
+ if len(name_id_line) > 1:
356
+ name_parts = name_id_line[1].split(',')
357
+ if len(name_parts) > 1:
358
+ data['student_info']['student_id'] = name_id_line[0].strip()
359
+ data['student_info']['student_name'] = name_parts[1].strip() + " " + name_parts[0].strip()
360
+
361
+ # Parse academic info - more defensive
362
+ if len(student_info_lines) > 2:
363
+ academic_info = student_info_lines[2].split('|')
364
+ if len(academic_info) > 5:
365
+ data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip() if ':' in academic_info[1] else ''
366
+ data['student_info']['graduation_year'] = academic_info[2].strip()
367
+ data['student_info']['weighted_gpa'] = academic_info[3].split(':')[1].strip() if ':' in academic_info[3] else ''
368
+ data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip() if ':' in academic_info[4] else ''
369
+ data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip() if ':' in academic_info[5] else ''
370
+
371
+ # Validate we got the essential student info
372
+ if not data['student_info'].get('student_name'):
373
+ logging.warning("Failed to parse student name")
374
+ if strict_mode:
375
+ raise ValueError("Could not parse student name from transcript")
376
+
377
+ except Exception as e:
378
+ logging.warning(f"Error parsing student info: {str(e)}")
379
+ if strict_mode:
380
+ raise
381
 
382
  # Parse graduation requirements
383
  requirements_start = None
384
  requirements_end = None
385
  for i, line in enumerate(lines):
386
+ logging.debug(f"Processing line: {line}") # Added debug logging
387
  if "Code" in line and "Description" in line and "Required" in line:
388
  requirements_start = i + 1
389
  if requirements_start and "Total" in line:
 
392
 
393
  if requirements_start and requirements_end:
394
  for line in lines[requirements_start:requirements_end]:
395
+ try:
396
+ if '|' in line:
397
+ parts = [p.strip() for p in line.split('|') if p.strip()]
398
+ if len(parts) >= 6:
399
+ req = {
400
+ 'code': parts[0],
401
+ 'description': parts[1],
402
+ 'required': parts[2],
403
+ 'waived': parts[3],
404
+ 'completed': parts[4],
405
+ 'status': parts[5]
406
+ }
407
+ data['graduation_requirements'].append(req)
408
+ except Exception as e:
409
+ logging.warning(f"Error parsing requirement line: {line} - {str(e)}")
410
+ if strict_mode:
411
+ raise
412
 
413
  # Parse total line
414
+ try:
415
+ total_line = lines[requirements_end]
416
+ total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
417
+ if len(total_parts) >= 5:
418
+ data['summary']['total_required'] = total_parts[1]
419
+ data['summary']['total_waived'] = total_parts[2]
420
+ data['summary']['total_completed'] = total_parts[3]
421
+ data['summary']['completion_percentage'] = total_parts[4]
422
+ except Exception as e:
423
+ logging.warning(f"Error parsing requirements summary: {str(e)}")
424
+ if strict_mode:
425
+ raise
426
 
427
  # Parse course history
428
  course_history_start = None
429
  for i, line in enumerate(lines):
430
+ logging.debug(f"Processing line: {line}") # Added debug logging
431
  if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
432
  course_history_start = i + 1
433
  break
 
435
  if course_history_start:
436
  current_requirement = None
437
  for line in lines[course_history_start:]:
438
+ try:
439
+ if '|' in line:
440
+ parts = [p.strip() for p in line.split('|') if p.strip()]
441
+
442
+ # Check if this is a new requirement line
443
+ if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
444
+ current_requirement = parts[0]
445
+ parts = parts[1:] # Remove the requirement code
446
+
447
+ if len(parts) >= 9:
448
+ course = {
449
+ 'requirement': current_requirement,
450
+ 'school_year': parts[0],
451
+ 'grade_level': parts[1],
452
+ 'course_number': parts[2],
453
+ 'description': parts[3],
454
+ 'term': parts[4],
455
+ 'district_number': parts[5],
456
+ 'fg': parts[6],
457
+ 'included': parts[7],
458
+ 'credits': parts[8]
459
+ }
460
+ data['course_history'].append(course)
461
+ except Exception as e:
462
+ logging.warning(f"Error parsing course line: {line} - {str(e)}")
463
+ if strict_mode:
464
+ raise
465
 
466
  # Calculate graduation status
467
+ try:
468
+ if data['summary'].get('total_required') and data['summary'].get('total_completed'):
469
+ graduation_status = {
470
+ 'total_required_credits': float(data['summary']['total_required']),
471
+ 'total_completed_credits': float(data['summary']['total_completed']),
472
+ 'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
473
+ 'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
474
+ 'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
475
+ }
476
+ data['graduation_status'] = graduation_status
477
+ except Exception as e:
478
+ logging.warning(f"Error calculating graduation status: {str(e)}")
479
+ if strict_mode:
480
+ raise
481
 
482
  return data
483