Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -36,9 +36,9 @@ SESSION_TIMEOUT = 3600 # 1 hour session timeout
|
|
36 |
|
37 |
# Initialize logging
|
38 |
logging.basicConfig(
|
39 |
-
filename='transcript_parser.log',
|
40 |
level=logging.DEBUG,
|
41 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
|
42 |
)
|
43 |
|
44 |
# Model configuration - Only DeepSeek
|
@@ -318,7 +318,7 @@ class TranscriptParser:
|
|
318 |
logging.error(f"Error parsing transcript: {str(e)}")
|
319 |
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
320 |
|
321 |
-
def _parse_miami_dade_format(self, text: str) -> Dict:
|
322 |
"""Parse Miami-Dade County Public Schools transcripts."""
|
323 |
# Initialize PDF reader from text (simulating the PDF structure)
|
324 |
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
@@ -328,39 +328,62 @@ class TranscriptParser:
|
|
328 |
'student_info': {},
|
329 |
'graduation_requirements': [],
|
330 |
'course_history': [],
|
331 |
-
'summary': {}
|
|
|
332 |
}
|
333 |
|
334 |
-
# Parse student information
|
335 |
student_info_lines = []
|
336 |
-
for line in lines:
|
|
|
337 |
if "DORAL ACADEMY HIGH SCHOOL" in line:
|
338 |
-
|
|
|
339 |
break
|
340 |
|
341 |
if student_info_lines:
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
|
360 |
# Parse graduation requirements
|
361 |
requirements_start = None
|
362 |
requirements_end = None
|
363 |
for i, line in enumerate(lines):
|
|
|
364 |
if "Code" in line and "Description" in line and "Required" in line:
|
365 |
requirements_start = i + 1
|
366 |
if requirements_start and "Total" in line:
|
@@ -369,31 +392,42 @@ class TranscriptParser:
|
|
369 |
|
370 |
if requirements_start and requirements_end:
|
371 |
for line in lines[requirements_start:requirements_end]:
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
|
|
|
|
|
|
|
|
|
|
384 |
|
385 |
# Parse total line
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
|
|
|
|
|
|
|
|
393 |
|
394 |
# Parse course history
|
395 |
course_history_start = None
|
396 |
for i, line in enumerate(lines):
|
|
|
397 |
if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
|
398 |
course_history_start = i + 1
|
399 |
break
|
@@ -401,38 +435,49 @@ class TranscriptParser:
|
|
401 |
if course_history_start:
|
402 |
current_requirement = None
|
403 |
for line in lines[course_history_start:]:
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
# Calculate graduation status
|
428 |
-
|
429 |
-
'
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
|
437 |
return data
|
438 |
|
|
|
36 |
|
37 |
# Initialize logging
|
38 |
logging.basicConfig(
|
|
|
39 |
level=logging.DEBUG,
|
40 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
41 |
+
filename='transcript_parser.log'
|
42 |
)
|
43 |
|
44 |
# Model configuration - Only DeepSeek
|
|
|
318 |
logging.error(f"Error parsing transcript: {str(e)}")
|
319 |
raise ValueError(f"Couldn't parse transcript: {str(e)}")
|
320 |
|
321 |
+
def _parse_miami_dade_format(self, text: str, strict_mode: bool = False) -> Dict:
|
322 |
"""Parse Miami-Dade County Public Schools transcripts."""
|
323 |
# Initialize PDF reader from text (simulating the PDF structure)
|
324 |
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
|
|
328 |
'student_info': {},
|
329 |
'graduation_requirements': [],
|
330 |
'course_history': [],
|
331 |
+
'summary': {},
|
332 |
+
'format': 'miami_dade' # Add format identifier
|
333 |
}
|
334 |
|
335 |
+
# Parse student information with more robust checks
|
336 |
student_info_lines = []
|
337 |
+
for i, line in enumerate(lines):
|
338 |
+
logging.debug(f"Processing line: {line}") # Added debug logging
|
339 |
if "DORAL ACADEMY HIGH SCHOOL" in line:
|
340 |
+
# Get the next 4 lines (or fewer if we're at the end)
|
341 |
+
student_info_lines = lines[i:i+5]
|
342 |
break
|
343 |
|
344 |
if student_info_lines:
|
345 |
+
try:
|
346 |
+
# Parse school and cohort info - more defensive
|
347 |
+
school_info_parts = student_info_lines[0].split('|')
|
348 |
+
if len(school_info_parts) > 2:
|
349 |
+
data['student_info']['school'] = school_info_parts[1].strip() if len(school_info_parts) > 1 else ''
|
350 |
+
data['student_info']['district'] = school_info_parts[2].strip() if len(school_info_parts) > 2 else ''
|
351 |
+
|
352 |
+
# Parse student name and ID - more defensive
|
353 |
+
if len(student_info_lines) > 1:
|
354 |
+
name_id_line = student_info_lines[1].split('-')
|
355 |
+
if len(name_id_line) > 1:
|
356 |
+
name_parts = name_id_line[1].split(',')
|
357 |
+
if len(name_parts) > 1:
|
358 |
+
data['student_info']['student_id'] = name_id_line[0].strip()
|
359 |
+
data['student_info']['student_name'] = name_parts[1].strip() + " " + name_parts[0].strip()
|
360 |
+
|
361 |
+
# Parse academic info - more defensive
|
362 |
+
if len(student_info_lines) > 2:
|
363 |
+
academic_info = student_info_lines[2].split('|')
|
364 |
+
if len(academic_info) > 5:
|
365 |
+
data['student_info']['current_grade'] = academic_info[1].split(':')[1].strip() if ':' in academic_info[1] else ''
|
366 |
+
data['student_info']['graduation_year'] = academic_info[2].strip()
|
367 |
+
data['student_info']['weighted_gpa'] = academic_info[3].split(':')[1].strip() if ':' in academic_info[3] else ''
|
368 |
+
data['student_info']['community_service_date'] = academic_info[4].split(':')[1].strip() if ':' in academic_info[4] else ''
|
369 |
+
data['student_info']['total_credits_earned'] = academic_info[5].split(':')[1].strip() if ':' in academic_info[5] else ''
|
370 |
+
|
371 |
+
# Validate we got the essential student info
|
372 |
+
if not data['student_info'].get('student_name'):
|
373 |
+
logging.warning("Failed to parse student name")
|
374 |
+
if strict_mode:
|
375 |
+
raise ValueError("Could not parse student name from transcript")
|
376 |
+
|
377 |
+
except Exception as e:
|
378 |
+
logging.warning(f"Error parsing student info: {str(e)}")
|
379 |
+
if strict_mode:
|
380 |
+
raise
|
381 |
|
382 |
# Parse graduation requirements
|
383 |
requirements_start = None
|
384 |
requirements_end = None
|
385 |
for i, line in enumerate(lines):
|
386 |
+
logging.debug(f"Processing line: {line}") # Added debug logging
|
387 |
if "Code" in line and "Description" in line and "Required" in line:
|
388 |
requirements_start = i + 1
|
389 |
if requirements_start and "Total" in line:
|
|
|
392 |
|
393 |
if requirements_start and requirements_end:
|
394 |
for line in lines[requirements_start:requirements_end]:
|
395 |
+
try:
|
396 |
+
if '|' in line:
|
397 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
398 |
+
if len(parts) >= 6:
|
399 |
+
req = {
|
400 |
+
'code': parts[0],
|
401 |
+
'description': parts[1],
|
402 |
+
'required': parts[2],
|
403 |
+
'waived': parts[3],
|
404 |
+
'completed': parts[4],
|
405 |
+
'status': parts[5]
|
406 |
+
}
|
407 |
+
data['graduation_requirements'].append(req)
|
408 |
+
except Exception as e:
|
409 |
+
logging.warning(f"Error parsing requirement line: {line} - {str(e)}")
|
410 |
+
if strict_mode:
|
411 |
+
raise
|
412 |
|
413 |
# Parse total line
|
414 |
+
try:
|
415 |
+
total_line = lines[requirements_end]
|
416 |
+
total_parts = [p.strip() for p in total_line.split('|') if p.strip()]
|
417 |
+
if len(total_parts) >= 5:
|
418 |
+
data['summary']['total_required'] = total_parts[1]
|
419 |
+
data['summary']['total_waived'] = total_parts[2]
|
420 |
+
data['summary']['total_completed'] = total_parts[3]
|
421 |
+
data['summary']['completion_percentage'] = total_parts[4]
|
422 |
+
except Exception as e:
|
423 |
+
logging.warning(f"Error parsing requirements summary: {str(e)}")
|
424 |
+
if strict_mode:
|
425 |
+
raise
|
426 |
|
427 |
# Parse course history
|
428 |
course_history_start = None
|
429 |
for i, line in enumerate(lines):
|
430 |
+
logging.debug(f"Processing line: {line}") # Added debug logging
|
431 |
if "Requirement" in line and "School Year" in line and "GradeLv1" in line:
|
432 |
course_history_start = i + 1
|
433 |
break
|
|
|
435 |
if course_history_start:
|
436 |
current_requirement = None
|
437 |
for line in lines[course_history_start:]:
|
438 |
+
try:
|
439 |
+
if '|' in line:
|
440 |
+
parts = [p.strip() for p in line.split('|') if p.strip()]
|
441 |
+
|
442 |
+
# Check if this is a new requirement line
|
443 |
+
if len(parts) >= 2 and parts[0] and parts[0] in [req['code'] for req in data['graduation_requirements']]:
|
444 |
+
current_requirement = parts[0]
|
445 |
+
parts = parts[1:] # Remove the requirement code
|
446 |
+
|
447 |
+
if len(parts) >= 9:
|
448 |
+
course = {
|
449 |
+
'requirement': current_requirement,
|
450 |
+
'school_year': parts[0],
|
451 |
+
'grade_level': parts[1],
|
452 |
+
'course_number': parts[2],
|
453 |
+
'description': parts[3],
|
454 |
+
'term': parts[4],
|
455 |
+
'district_number': parts[5],
|
456 |
+
'fg': parts[6],
|
457 |
+
'included': parts[7],
|
458 |
+
'credits': parts[8]
|
459 |
+
}
|
460 |
+
data['course_history'].append(course)
|
461 |
+
except Exception as e:
|
462 |
+
logging.warning(f"Error parsing course line: {line} - {str(e)}")
|
463 |
+
if strict_mode:
|
464 |
+
raise
|
465 |
|
466 |
# Calculate graduation status
|
467 |
+
try:
|
468 |
+
if data['summary'].get('total_required') and data['summary'].get('total_completed'):
|
469 |
+
graduation_status = {
|
470 |
+
'total_required_credits': float(data['summary']['total_required']),
|
471 |
+
'total_completed_credits': float(data['summary']['total_completed']),
|
472 |
+
'percent_complete': float(data['summary']['completion_percentage'].replace('%', '')),
|
473 |
+
'remaining_credits': float(data['summary']['total_required']) - float(data['summary']['total_completed']),
|
474 |
+
'on_track': float(data['summary']['completion_percentage'].replace('%', '')) >= 75.0
|
475 |
+
}
|
476 |
+
data['graduation_status'] = graduation_status
|
477 |
+
except Exception as e:
|
478 |
+
logging.warning(f"Error calculating graduation status: {str(e)}")
|
479 |
+
if strict_mode:
|
480 |
+
raise
|
481 |
|
482 |
return data
|
483 |
|