Dannyar608 commited on
Commit
0d7fd90
·
verified ·
1 Parent(s): 8dff1ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -160
app.py CHANGED
@@ -272,115 +272,184 @@ class TranscriptParser:
272
  self.requirements = {}
273
  self.current_courses = []
274
  self.course_history = []
 
275
 
276
  def parse_transcript(self, text: str) -> Dict:
277
- """Main method to parse transcript text"""
278
- self._extract_student_info(text)
279
- self._extract_requirements(text)
280
- self._extract_course_history(text)
281
- self._extract_current_courses(text)
282
-
283
- return {
284
- "student_info": self.student_data,
285
- "requirements": self.requirements,
286
- "current_courses": self.current_courses,
287
- "course_history": self.course_history,
288
- "completion_status": self._calculate_completion()
289
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
  def _extract_student_info(self, text: str):
292
- """Enhanced student info extraction with more robust regex"""
293
- # Unified pattern that handles variations in transcript formats
294
- header_pattern = (
295
- r"(?:Student\s*[:]?\s*|Name\s*[:]?\s*)?"
296
- r"(\d{7})\s*[-]?\s*([\w\s,]+?)\s*"
297
- r"(?:\||Cohort\s*\w+\s*\||Un-weighted\s*GPA\s*([\d.]+)\s*\||Comm\s*Serv\s*Hours\s*(\d+))?"
298
- )
299
 
300
- header_match = re.search(header_pattern, text, re.IGNORECASE)
301
- if header_match:
302
  self.student_data = {
303
- "id": header_match.group(1) if header_match.group(1) else "Unknown",
304
- "name": header_match.group(2).strip() if header_match.group(2) else "Unknown",
305
- "unweighted_gpa": float(header_match.group(3)) if header_match.group(3) else 0.0,
306
- "community_service_hours": int(header_match.group(4)) if header_match.group(4) else 0
307
  }
308
 
309
- # More flexible grade info pattern
310
- grade_pattern = (
311
- r"(?:Grade|Level)\s*[:]?\s*(\d+)\s*"
312
- r"(?:\||YOG\s*[:]?\s*(\d{4})\s*\||Weighted\s*GPA\s*([\d.]+)\s*\||Total\s*Credits\s*Earned\s*([\d.]+))?"
313
- )
314
 
315
- grade_match = re.search(grade_pattern, text, re.IGNORECASE)
316
- if grade_match:
317
  self.student_data.update({
318
- "current_grade": grade_match.group(1) if grade_match.group(1) else "Unknown",
319
- "graduation_year": grade_match.group(2) if grade_match.group(2) else "Unknown",
320
- "weighted_gpa": float(grade_match.group(3)) if grade_match.group(3) else 0.0,
321
- "total_credits": float(grade_match.group(4)) if grade_match.group(4) else 0.0
 
 
 
 
 
 
 
 
322
  })
323
 
324
  def _extract_requirements(self, text: str):
325
- """Parse the graduation requirements section"""
326
- req_table = re.findall(
327
- r"\|([A-Z]-[\w\s]+)\s*\|([^\|]+)\|([\d.]+)\s*\|([\d.]+)\s*\|([\d.]+)\s*\|([^\|]+)\|",
328
- text
 
 
 
 
 
 
 
 
 
 
 
 
329
  )
330
 
331
- for row in req_table:
332
- req_name = row[0].strip()
333
- self.requirements[req_name] = {
334
- "required": float(row[2]),
335
- "completed": float(row[4]),
336
- "status": f"{row[5].strip()}%"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  }
338
 
339
  def _extract_course_history(self, text: str):
340
- """Parse the detailed course history"""
341
- course_lines = re.findall(
342
- r"\|([A-Z]-[\w\s&\(\)]+)\s*\|(\d{4}-\d{4})\s*\|(\d{2})\s*\|([A-Z0-9]+)\s*\|([^\|]+)\|([^\|]+)\|([^\|]+)\|([A-Z])\s*\|([YRXW]?)\s*\|([^\|]+)\|",
343
- text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  )
345
 
346
- for course in course_lines:
 
 
347
  self.course_history.append({
348
- "requirement_category": course[0].strip(),
349
- "school_year": course[1],
350
- "grade_level": course[2],
351
- "course_code": course[3],
352
- "description": course[4].strip(),
353
- "term": course[5].strip(),
354
- "district_number": course[6].strip(),
355
- "grade": course[7],
356
- "inclusion_status": course[8],
357
- "credits": course[9].strip()
358
  })
359
 
360
  def _extract_current_courses(self, text: str):
361
  """Identify courses currently in progress"""
362
- in_progress = [c for c in self.course_history if "inProgress" in c["credits"]]
363
  self.current_courses = [
364
  {
365
  "course": c["description"],
 
366
  "category": c["requirement_category"],
367
  "term": c["term"],
368
- "credits": c["credits"]
 
369
  }
370
- for c in in_progress
 
371
  ]
372
 
373
- def _calculate_completion(self) -> Dict:
374
- """Calculate overall completion status"""
375
  total_required = sum(req["required"] for req in self.requirements.values())
376
  total_completed = sum(req["completed"] for req in self.requirements.values())
377
 
378
- return {
379
- "total_required": total_required,
380
- "total_completed": total_completed,
381
  "percent_complete": round((total_completed / total_required) * 100, 1),
382
- "remaining_credits": total_required - total_completed
383
- }
 
384
 
385
  def to_json(self) -> str:
386
  """Export parsed data as JSON"""
@@ -389,51 +458,92 @@ class TranscriptParser:
389
  "requirements": self.requirements,
390
  "current_courses": self.current_courses,
391
  "course_history": self.course_history,
392
- "completion_status": self._calculate_completion()
393
  }, indent=2)
394
 
395
- def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
396
- """Use AI model to parse transcript text with progress feedback"""
397
- model, tokenizer = model_loader.load_model(progress)
398
- if model is None or tokenizer is None:
399
- raise gr.Error(f"Model failed to load. {model_loader.error or 'Please try loading a model first.'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
- # First try the structured parser
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  try:
 
403
  if progress:
404
- progress(0.1, desc="Parsing transcript structure...")
 
405
  parser = TranscriptParser()
406
  parsed_data = parser.parse_transcript(text)
 
407
  if progress:
408
- progress(0.9, desc="Formatting results...")
409
-
410
- # Convert to expected format
411
- formatted_data = {
412
- "grade_level": parsed_data["student_info"].get("current_grade", "Unknown"),
413
- "gpa": {
414
- "weighted": parsed_data["student_info"].get("weighted_gpa", "N/A"),
415
- "unweighted": parsed_data["student_info"].get("unweighted_gpa", "N/A")
416
- },
417
- "courses": []
418
- }
419
 
420
- # Add courses
421
- for course in parsed_data["course_history"]:
422
- formatted_data["courses"].append({
423
- "code": course["course_code"],
424
- "name": course["description"],
425
- "grade": course["grade"],
426
- "credits": course["credits"],
427
- "year": course["school_year"],
428
- "grade_level": course["grade_level"]
429
- })
430
 
431
- if progress:
432
- progress(1.0)
433
- return formatted_data
434
-
435
  except Exception as e:
436
  logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
 
437
  # Fall back to AI parsing if structured parsing fails
438
  return parse_transcript_with_ai_fallback(text, progress)
439
 
@@ -447,6 +557,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
447
  - Current grade level
448
  - Weighted GPA (if available)
449
  - Unweighted GPA (if available)
 
 
450
  - List of all courses with:
451
  * Course code
452
  * Course name
@@ -454,6 +566,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
454
  * Credits earned
455
  * Year/semester taken
456
  * Grade level when taken
 
457
  Return the data in JSON format.
458
 
459
  Transcript Text:
@@ -464,6 +577,10 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
464
  if progress:
465
  progress(0.1, desc="Processing transcript with AI...")
466
 
 
 
 
 
467
  # Tokenize and generate response
468
  inputs = tokenizer(prompt, return_tensors="pt").to(model_loader.device)
469
  if progress:
@@ -471,7 +588,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
471
 
472
  outputs = model.generate(
473
  **inputs,
474
- max_new_tokens=1500,
475
  temperature=0.1,
476
  do_sample=True
477
  )
@@ -500,43 +617,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
500
  logging.error(f"AI parsing error: {str(e)}")
501
  raise gr.Error(f"Error processing transcript: {str(e)}")
502
 
503
- def format_transcript_output(data: Dict) -> str:
504
- """Format the parsed data into human-readable text."""
505
- output = []
506
- output.append(f"Student Transcript Summary\n{'='*40}")
507
- output.append(f"Current Grade Level: {data.get('grade_level', 'Unknown')}")
508
-
509
- if 'gpa' in data:
510
- output.append(f"\nGPA:")
511
- output.append(f"- Weighted: {data['gpa'].get('weighted', 'N/A')}")
512
- output.append(f"- Unweighted: {data['gpa'].get('unweighted', 'N/A')}")
513
-
514
- if 'courses' in data:
515
- output.append("\nCourse History:\n" + '='*40)
516
-
517
- # Group courses by grade level
518
- courses_by_grade = defaultdict(list)
519
- for course in data['courses']:
520
- grade_level = course.get('grade_level', 'Unknown')
521
- courses_by_grade[grade_level].append(course)
522
-
523
- # Sort grades numerically
524
- for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
525
- output.append(f"\nGrade {grade}:\n{'-'*30}")
526
- for course in courses_by_grade[grade]:
527
- course_str = f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
528
- if 'grade' in course:
529
- course_str += f" (Grade: {course['grade']})"
530
- if 'credits' in course:
531
- course_str += f" | Credits: {course['credits']}"
532
- if 'year' in course:
533
- course_str += f" | Year: {course['year']}"
534
- output.append(course_str)
535
-
536
- return '\n'.join(output)
537
-
538
  def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
539
- """Main function to parse transcript files."""
540
  try:
541
  if not file_obj:
542
  raise ValueError("Please upload a file first")
@@ -544,32 +626,40 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
544
  validate_file(file_obj)
545
  file_ext = os.path.splitext(file_obj.name)[1].lower()
546
 
547
- # Extract text from file
 
 
 
548
  text = extract_text_from_file(file_obj.name, file_ext)
549
 
550
- # Use AI for parsing
 
 
 
 
 
 
551
  parsed_data = parse_transcript_with_ai(text, progress)
552
 
553
  # Format output text
554
- output_text = format_transcript_output(parsed_data)
555
-
556
- # Prepare the data structure for saving
557
- transcript_data = {
558
- "grade_level": parsed_data.get('grade_level', 'Unknown'),
559
- "gpa": parsed_data.get('gpa', {}),
560
- "courses": defaultdict(list)
561
- }
562
 
563
- # Organize courses by grade level
564
- for course in parsed_data.get('courses', []):
565
- grade_level = course.get('grade_level', 'Unknown')
566
- transcript_data["courses"][grade_level].append(course)
567
 
568
- return output_text, transcript_data
569
 
570
  except Exception as e:
571
- logging.error(f"Transcript processing error: {str(e)}")
572
- return f"Error processing transcript: {str(e)}", None
 
 
 
 
 
 
 
 
573
 
574
  # ========== LEARNING STYLE QUIZ ==========
575
  class LearningStyleQuiz:
@@ -1449,7 +1539,7 @@ def create_interface():
1449
  "Your profile summary will appear here after saving.",
1450
  label="Profile Summary"
1451
  )
1452
- blog = gr.Textbox(label="Personal Blog", visible=False) # Added blog component
1453
 
1454
  def save_profile_and_update(name, age, interests, transcript_data, learning_style,
1455
  movie, movie_reason, show, show_reason,
@@ -1606,3 +1696,4 @@ app = create_interface()
1606
 
1607
  if __name__ == "__main__":
1608
  app.launch()
 
 
272
  self.requirements = {}
273
  self.current_courses = []
274
  self.course_history = []
275
+ self.graduation_status = {}
276
 
277
  def parse_transcript(self, text: str) -> Dict:
278
+ """Enhanced parsing method for Miami-Dade format"""
279
+ try:
280
+ # First normalize the text (replace multiple spaces, normalize line breaks)
281
+ text = re.sub(r'\s+', ' ', text)
282
+
283
+ # Extract student info with more flexible patterns
284
+ self._extract_student_info(text)
285
+
286
+ # Extract requirements with better table parsing
287
+ self._extract_requirements(text)
288
+
289
+ # Extract course history with improved pattern matching
290
+ self._extract_course_history(text)
291
+
292
+ # Identify current courses
293
+ self._extract_current_courses(text)
294
+
295
+ # Calculate completion status
296
+ self._calculate_completion()
297
+
298
+ return {
299
+ "student_info": self.student_data,
300
+ "requirements": self.requirements,
301
+ "current_courses": self.current_courses,
302
+ "course_history": self.course_history,
303
+ "graduation_status": self.graduation_status
304
+ }
305
+
306
+ except Exception as e:
307
+ logging.error(f"Error parsing transcript: {str(e)}")
308
+ raise gr.Error(f"Error parsing transcript: {str(e)}")
309
 
310
  def _extract_student_info(self, text: str):
311
+ """Enhanced student info extraction for Miami-Dade format"""
312
+ # Extract basic student info
313
+ student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
314
+ student_match = re.search(student_pattern, text, re.IGNORECASE)
 
 
 
315
 
316
+ if student_match:
 
317
  self.student_data = {
318
+ "id": student_match.group(1),
319
+ "name": student_match.group(2).replace(",", ", "),
320
+ "current_grade": student_match.group(3),
321
+ "graduation_year": student_match.group(4)
322
  }
323
 
324
+ # Extract GPA info
325
+ gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
326
+ gpa_match = re.search(gpa_pattern, text, re.IGNORECASE)
 
 
327
 
328
+ if gpa_match:
 
329
  self.student_data.update({
330
+ "unweighted_gpa": float(gpa_match.group(1)),
331
+ "weighted_gpa": float(gpa_match.group(2))
332
+ })
333
+
334
+ # Extract credits and service hours
335
+ credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
336
+ credits_match = re.search(credits_pattern, text, re.IGNORECASE)
337
+
338
+ if credits_match:
339
+ self.student_data.update({
340
+ "total_credits": float(credits_match.group(1)),
341
+ "community_service_hours": int(credits_match.group(2))
342
  })
343
 
344
  def _extract_requirements(self, text: str):
345
+ """Parse the graduation requirements section with improved table parsing"""
346
+ # Find the requirements table
347
+ req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
348
+ if not req_table_start:
349
+ raise ValueError("Could not find requirements table header")
350
+
351
+ req_text = text[req_table_start.start():]
352
+
353
+ # Extract individual requirements
354
+ req_pattern = (
355
+ r"([A-Z]-[\w\s\(\)&]+)\s*" # Code
356
+ r"([^\|]+)\s*" # Description
357
+ r"([\d.]+)\s*" # Required
358
+ r"([\d.]+)\s*" # Waived
359
+ r"([\d.]+)\s*" # Completed
360
+ r"([\d.]+)\s*%" # Status
361
  )
362
 
363
+ req_matches = re.finditer(req_pattern, req_text)
364
+
365
+ for match in req_matches:
366
+ req_code = match.group(1).strip()
367
+ self.requirements[req_code] = {
368
+ "description": match.group(2).strip(),
369
+ "required": float(match.group(3)),
370
+ "waived": float(match.group(4)),
371
+ "completed": float(match.group(5)),
372
+ "status": f"{match.group(6)}%"
373
+ }
374
+
375
+ # Extract total requirements
376
+ total_pattern = r"Total\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%"
377
+ total_match = re.search(total_pattern, req_text)
378
+
379
+ if total_match:
380
+ self.graduation_status["total_requirements"] = {
381
+ "required": float(total_match.group(1)),
382
+ "waived": float(total_match.group(2)),
383
+ "completed": float(total_match.group(3)),
384
+ "percent_complete": float(total_match.group(4))
385
  }
386
 
387
  def _extract_course_history(self, text: str):
388
+ """Parse the detailed course history with improved pattern matching"""
389
+ # Find the course history table
390
+ course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
391
+ if not course_header:
392
+ raise ValueError("Could not find course history table header")
393
+
394
+ course_text = text[course_header.start():]
395
+
396
+ # Extract individual courses
397
+ course_pattern = (
398
+ r"([A-Z]-[\w\s\(\)&-]+)\s*" # Requirement
399
+ r"(\d{4}-\d{4})\s*" # School Year
400
+ r"(\d{2})\s*" # Grade Level
401
+ r"([A-Z0-9]+)\s*" # Course Number
402
+ r"([^\|]+)\s*" # Description
403
+ r"([A-Z0-9]+)\s*" # Term
404
+ r"([A-Z0-9]+)\s*" # District Number
405
+ r"([A-Z])\s*" # Final Grade
406
+ r"([A-Z])\s*" # Inclusion Status
407
+ r"([\d.]+|inProgress)" # Credits
408
  )
409
 
410
+ course_matches = re.finditer(course_pattern, course_text)
411
+
412
+ for match in course_matches:
413
  self.course_history.append({
414
+ "requirement_category": match.group(1).strip(),
415
+ "school_year": match.group(2),
416
+ "grade_level": match.group(3),
417
+ "course_code": match.group(4),
418
+ "description": match.group(5).strip(),
419
+ "term": match.group(6),
420
+ "district_number": match.group(7),
421
+ "grade": match.group(8),
422
+ "inclusion_status": match.group(9),
423
+ "credits": match.group(10)
424
  })
425
 
426
  def _extract_current_courses(self, text: str):
427
  """Identify courses currently in progress"""
 
428
  self.current_courses = [
429
  {
430
  "course": c["description"],
431
+ "code": c["course_code"],
432
  "category": c["requirement_category"],
433
  "term": c["term"],
434
+ "credits": c["credits"],
435
+ "grade_level": c["grade_level"]
436
  }
437
+ for c in self.course_history
438
+ if c["credits"].lower() == "inprogress"
439
  ]
440
 
441
+ def _calculate_completion(self):
442
+ """Calculate overall completion status with more detailed info"""
443
  total_required = sum(req["required"] for req in self.requirements.values())
444
  total_completed = sum(req["completed"] for req in self.requirements.values())
445
 
446
+ self.graduation_status.update({
447
+ "total_required_credits": total_required,
448
+ "total_completed_credits": total_completed,
449
  "percent_complete": round((total_completed / total_required) * 100, 1),
450
+ "remaining_credits": total_required - total_completed,
451
+ "on_track": (total_completed / total_required) >= 0.75 # 75% completion considered on track
452
+ })
453
 
454
  def to_json(self) -> str:
455
  """Export parsed data as JSON"""
 
458
  "requirements": self.requirements,
459
  "current_courses": self.current_courses,
460
  "course_history": self.course_history,
461
+ "graduation_status": self.graduation_status
462
  }, indent=2)
463
 
464
+ def format_transcript_output(data: Dict) -> str:
465
+ """Enhanced formatting for Miami-Dade transcript output"""
466
+ output = []
467
+
468
+ # Student Info Section
469
+ student = data.get("student_info", {})
470
+ output.append(f"## Student Transcript Summary\n{'='*50}")
471
+ output.append(f"**Name:** {student.get('name', 'Unknown')}")
472
+ output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
473
+ output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
474
+ output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
475
+ output.append(f"**Unweighted GPA:** {student.get('unweighted_gpa', 'N/A')}")
476
+ output.append(f"**Weighted GPA:** {student.get('weighted_gpa', 'N/A')}")
477
+ output.append(f"**Total Credits Earned:** {student.get('total_credits', 'N/A')}")
478
+ output.append(f"**Community Service Hours:** {student.get('community_service_hours', 'N/A')}\n")
479
+
480
+ # Graduation Requirements Section
481
+ grad_status = data.get("graduation_status", {})
482
+ output.append(f"## Graduation Progress\n{'='*50}")
483
+ output.append(f"**Overall Completion:** {grad_status.get('percent_complete', 0)}%")
484
+ output.append(f"**Credits Required:** {grad_status.get('total_required_credits', 0)}")
485
+ output.append(f"**Credits Completed:** {grad_status.get('total_completed_credits', 0)}")
486
+ output.append(f"**Credits Remaining:** {grad_status.get('remaining_credits', 0)}")
487
+ output.append(f"**On Track to Graduate:** {'Yes' if grad_status.get('on_track', False) else 'No'}\n")
488
+
489
+ # Detailed Requirements
490
+ output.append("### Detailed Requirements:")
491
+ for code, req in data.get("requirements", {}).items():
492
+ output.append(
493
+ f"- **{code}**: {req.get('description', '')}\n"
494
+ f" Required: {req['required']} | Completed: {req['completed']} | "
495
+ f"Status: {req['status']}"
496
+ )
497
+ output.append("")
498
+
499
+ # Current Courses
500
+ if data.get("current_courses"):
501
+ output.append("## Current Courses (In Progress)\n" + '='*50)
502
+ for course in data["current_courses"]:
503
+ output.append(
504
+ f"- **{course['code']} {course['course']}**\n"
505
+ f" Category: {course['category']} | "
506
+ f"Grade Level: {course['grade_level']} | "
507
+ f"Term: {course['term']} | Credits: {course['credits']}"
508
+ )
509
+ output.append("")
510
+
511
+ # Course History by Year
512
+ courses_by_year = defaultdict(list)
513
+ for course in data.get("course_history", []):
514
+ courses_by_year[course["school_year"]].append(course)
515
 
516
+ if courses_by_year:
517
+ output.append("## Course History\n" + '='*50)
518
+ for year in sorted(courses_by_year.keys()):
519
+ output.append(f"\n### {year}")
520
+ for course in courses_by_year[year]:
521
+ output.append(
522
+ f"- **{course['course_code']} {course['description']}**\n"
523
+ f" Grade: {course['grade']} | Credits: {course['credits']} | "
524
+ f"Category: {course['requirement_category']} | Term: {course['term']}"
525
+ )
526
+
527
+ return '\n'.join(output)
528
+
529
+ def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
530
+ """Enhanced AI parsing with fallback to structured parsing"""
531
  try:
532
+ # First try structured parsing
533
  if progress:
534
+ progress(0.1, desc="Attempting structured parsing...")
535
+
536
  parser = TranscriptParser()
537
  parsed_data = parser.parse_transcript(text)
538
+
539
  if progress:
540
+ progress(0.8, desc="Formatting results...")
 
 
 
 
 
 
 
 
 
 
541
 
542
+ return parsed_data
 
 
 
 
 
 
 
 
 
543
 
 
 
 
 
544
  except Exception as e:
545
  logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
546
+
547
  # Fall back to AI parsing if structured parsing fails
548
  return parse_transcript_with_ai_fallback(text, progress)
549
 
 
557
  - Current grade level
558
  - Weighted GPA (if available)
559
  - Unweighted GPA (if available)
560
+ - Total credits earned
561
+ - Community service hours (if available)
562
  - List of all courses with:
563
  * Course code
564
  * Course name
 
566
  * Credits earned
567
  * Year/semester taken
568
  * Grade level when taken
569
+ - Graduation requirements status
570
  Return the data in JSON format.
571
 
572
  Transcript Text:
 
577
  if progress:
578
  progress(0.1, desc="Processing transcript with AI...")
579
 
580
+ model, tokenizer = model_loader.load_model(progress)
581
+ if model is None or tokenizer is None:
582
+ raise gr.Error(f"Model failed to load. {model_loader.error or 'Please try loading a model first.'}")
583
+
584
  # Tokenize and generate response
585
  inputs = tokenizer(prompt, return_tensors="pt").to(model_loader.device)
586
  if progress:
 
588
 
589
  outputs = model.generate(
590
  **inputs,
591
+ max_new_tokens=2000,
592
  temperature=0.1,
593
  do_sample=True
594
  )
 
617
  logging.error(f"AI parsing error: {str(e)}")
618
  raise gr.Error(f"Error processing transcript: {str(e)}")
619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
  def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
621
+ """Main function to parse transcript files with better error handling"""
622
  try:
623
  if not file_obj:
624
  raise ValueError("Please upload a file first")
 
626
  validate_file(file_obj)
627
  file_ext = os.path.splitext(file_obj.name)[1].lower()
628
 
629
+ # Extract text from file with better error reporting
630
+ if progress:
631
+ progress(0.2, desc="Extracting text from file...")
632
+
633
  text = extract_text_from_file(file_obj.name, file_ext)
634
 
635
+ if not text.strip():
636
+ raise ValueError("No text could be extracted from the file")
637
+
638
+ # Use AI for parsing with progress updates
639
+ if progress:
640
+ progress(0.4, desc="Analyzing transcript content...")
641
+
642
  parsed_data = parse_transcript_with_ai(text, progress)
643
 
644
  # Format output text
645
+ if progress:
646
+ progress(0.9, desc="Generating report...")
 
 
 
 
 
 
647
 
648
+ output_text = format_transcript_output(parsed_data)
 
 
 
649
 
650
+ return output_text, parsed_data
651
 
652
  except Exception as e:
653
+ error_msg = f"Error processing transcript: {str(e)}"
654
+ logging.error(error_msg)
655
+
656
+ # Provide helpful tips based on error type
657
+ if "No text could be extracted" in str(e):
658
+ error_msg += "\n\nTips: Please ensure your file is clear and readable. Try scanning at a higher resolution if it's an image."
659
+ elif "requirements table header" in str(e):
660
+ error_msg += "\n\nTips: This appears to be an unsupported transcript format. Please contact support."
661
+
662
+ return error_msg, None
663
 
664
  # ========== LEARNING STYLE QUIZ ==========
665
  class LearningStyleQuiz:
 
1539
  "Your profile summary will appear here after saving.",
1540
  label="Profile Summary"
1541
  )
1542
+ blog = gr.Textbox(label="Personal Blog", visible=False)
1543
 
1544
  def save_profile_and_update(name, age, interests, transcript_data, learning_style,
1545
  movie, movie_reason, show, show_reason,
 
1696
 
1697
  if __name__ == "__main__":
1698
  app.launch()
1699
+