Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -75,7 +75,6 @@ if HF_TOKEN:
|
|
75 |
logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
|
76 |
time.sleep(2 ** attempt)
|
77 |
|
78 |
-
# ========== UTILITY FUNCTIONS ==========
|
79 |
class DataEncryptor:
|
80 |
def __init__(self, key: str):
|
81 |
self.cipher = Fernet(key.encode())
|
@@ -146,7 +145,6 @@ def remove_sensitive_info(text: str) -> str:
|
|
146 |
text = re.sub(pattern, replacement, text)
|
147 |
return text
|
148 |
|
149 |
-
# ========== LEARNING STYLE QUIZ ==========
|
150 |
class LearningStyleQuiz:
|
151 |
def __init__(self):
|
152 |
self.questions = [
|
@@ -252,7 +250,6 @@ class LearningStyleQuiz:
|
|
252 |
# Initialize learning style quiz
|
253 |
learning_style_quiz = LearningStyleQuiz()
|
254 |
|
255 |
-
# ========== ENHANCED TRANSCRIPT PARSER ==========
|
256 |
class EnhancedMiamiDadeTranscriptParser:
|
257 |
def __init__(self):
|
258 |
self.patterns = {
|
@@ -268,7 +265,7 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
268 |
re.DOTALL
|
269 |
),
|
270 |
'credits': re.compile(
|
271 |
-
r"\*\s+([A-Z\s]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\*",
|
272 |
re.DOTALL
|
273 |
),
|
274 |
'course': re.compile(
|
@@ -291,72 +288,143 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
291 |
def parse_transcript(self, file_path: str) -> Dict:
|
292 |
"""Parse Miami-Dade transcript PDF with enhanced pattern matching"""
|
293 |
try:
|
|
|
294 |
with pdfplumber.open(file_path) as pdf:
|
295 |
text = "\n".join(page.extract_text() for page in pdf.pages)
|
296 |
|
297 |
-
|
298 |
-
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
-
|
302 |
except Exception as e:
|
303 |
logger.error(f"Error parsing transcript: {str(e)}")
|
304 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
305 |
|
306 |
def _parse_format(self, text: str) -> Dict:
|
307 |
-
"""Parse the transcript format
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
def _parse_student_info(self, text: str) -> Dict:
|
318 |
-
"""Extract student information"""
|
319 |
match = self.patterns['student_info'].search(text)
|
320 |
if not match:
|
321 |
-
|
322 |
-
|
|
|
|
|
|
|
323 |
return {
|
324 |
-
'name': match.group(1).replace(',', ' ').strip(),
|
325 |
-
'grade': match.group(2),
|
326 |
-
'student_id': match.group(3),
|
327 |
-
'school': match.group(4).strip(),
|
328 |
'birth_date': self._extract_birth_date(text),
|
329 |
'ethnicity': self._extract_ethnicity(text)
|
330 |
}
|
331 |
|
332 |
def _extract_birth_date(self, text: str) -> Optional[str]:
|
333 |
-
"""Extract birth date from transcript"""
|
334 |
-
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
|
337 |
def _extract_ethnicity(self, text: str) -> Optional[str]:
|
338 |
-
"""Extract ethnicity information"""
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
def _parse_academic_summary(self, text: str) -> Dict:
|
343 |
-
"""Parse academic summary section"""
|
344 |
-
gpa_match = self.patterns['gpa'].search(text)
|
345 |
-
credits_matches = self.patterns['credits'].finditer(text)
|
346 |
-
rank_match = self.patterns['class_rank'].search(text)
|
347 |
-
|
348 |
summary = {
|
349 |
-
'gpa': {
|
350 |
-
'district': float(gpa_match.group(1)) if gpa_match else None,
|
351 |
-
'state': float(gpa_match.group(2)) if gpa_match else None
|
352 |
-
},
|
353 |
'credits': {},
|
354 |
-
'class_rank': {
|
355 |
-
'percentile': int(rank_match.group(1)) if rank_match else None,
|
356 |
-
'class_size': int(rank_match.group(2)) if rank_match else None
|
357 |
-
}
|
358 |
}
|
359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
for match in credits_matches:
|
361 |
subject = match.group(1).strip()
|
362 |
summary['credits'][subject] = {
|
@@ -365,28 +433,60 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
365 |
'remaining': float(match.group(4)) if match.group(4) else None
|
366 |
}
|
367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
return summary
|
369 |
|
370 |
def _parse_courses(self, text: str) -> List[Dict]:
|
371 |
-
"""Parse course history section"""
|
372 |
courses = []
|
|
|
|
|
373 |
for match in self.patterns['course'].finditer(text):
|
374 |
-
courses.append(
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
return courses
|
386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
def _parse_assessments(self, text: str) -> Dict:
|
388 |
-
"""Parse assessment and requirement information"""
|
389 |
-
matches = self.patterns['assessment'].finditer(text)
|
390 |
assessments = {
|
391 |
'ela_passed_date': None,
|
392 |
'algebra_passed': False,
|
@@ -397,17 +497,22 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
397 |
}
|
398 |
}
|
399 |
|
|
|
|
|
|
|
|
|
|
|
400 |
for match in matches:
|
401 |
if match.group(1): # ELA date
|
402 |
assessments['ela_passed_date'] = match.group(1)
|
403 |
elif match.group(2): # Algebra
|
404 |
assessments['algebra_passed'] = match.group(2) == "YES"
|
405 |
-
elif "BIOLOGY
|
406 |
assessments['biology_passed'] = True
|
407 |
-
elif match.group(
|
408 |
assessments['community_service'] = {
|
409 |
'met': True,
|
410 |
-
'hours': int(match.group(4))
|
411 |
}
|
412 |
|
413 |
return assessments
|
@@ -415,7 +520,6 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
415 |
# Initialize the enhanced parser
|
416 |
transcript_parser = EnhancedMiamiDadeTranscriptParser()
|
417 |
|
418 |
-
# ========== ACADEMIC ANALYZER ==========
|
419 |
class AcademicAnalyzer:
|
420 |
def __init__(self):
|
421 |
self.gpa_scale = {
|
@@ -439,12 +543,16 @@ class AcademicAnalyzer:
|
|
439 |
}
|
440 |
|
441 |
try:
|
|
|
442 |
if parsed_data.get('format') == 'progress_summary':
|
443 |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
|
444 |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
|
445 |
-
|
446 |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
|
447 |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
|
|
|
|
|
|
|
448 |
|
449 |
if weighted_gpa >= 4.5:
|
450 |
analysis['rating'] = 'Excellent'
|
@@ -502,12 +610,16 @@ class AcademicAnalyzer:
|
|
502 |
analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses."
|
503 |
|
504 |
return analysis
|
505 |
-
except:
|
|
|
506 |
return {
|
507 |
'rating': 'Unknown',
|
508 |
-
'description': 'Could not analyze GPA',
|
509 |
-
'comparison': '',
|
510 |
-
'improvement_tips': [
|
|
|
|
|
|
|
511 |
}
|
512 |
|
513 |
def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
|
@@ -603,13 +715,14 @@ class AcademicAnalyzer:
|
|
603 |
)
|
604 |
|
605 |
return analysis
|
606 |
-
except:
|
|
|
607 |
return {
|
608 |
-
'status': 'Could not analyze graduation status',
|
609 |
'completion_percentage': 0,
|
610 |
'missing_requirements': [],
|
611 |
'on_track': False,
|
612 |
-
'timeline': ''
|
613 |
}
|
614 |
|
615 |
def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
|
@@ -678,7 +791,8 @@ class AcademicAnalyzer:
|
|
678 |
]
|
679 |
|
680 |
return analysis
|
681 |
-
except:
|
|
|
682 |
return {
|
683 |
'advanced_courses': 0,
|
684 |
'honors_courses': 0,
|
@@ -686,7 +800,10 @@ class AcademicAnalyzer:
|
|
686 |
'ib_courses': 0,
|
687 |
'de_courses': 0,
|
688 |
'rating': 'Unknown',
|
689 |
-
'recommendations': [
|
|
|
|
|
|
|
690 |
}
|
691 |
|
692 |
def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
|
@@ -782,13 +899,17 @@ class AcademicAnalyzer:
|
|
782 |
recommendations['improvement_areas'].append("Increase community service involvement")
|
783 |
|
784 |
return recommendations
|
785 |
-
except:
|
|
|
786 |
return {
|
787 |
-
'reach': ["Could not generate recommendations"],
|
788 |
'target': [],
|
789 |
'safety': [],
|
790 |
'scholarships': [],
|
791 |
-
'improvement_areas': [
|
|
|
|
|
|
|
792 |
}
|
793 |
|
794 |
def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
|
@@ -867,18 +988,30 @@ class AcademicAnalyzer:
|
|
867 |
])
|
868 |
|
869 |
return plan
|
870 |
-
except:
|
|
|
871 |
return {
|
872 |
-
'weekly_schedule': {'Error': ["Could not generate schedule"]},
|
873 |
-
'study_strategies': [
|
874 |
-
|
875 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
876 |
}
|
877 |
|
878 |
# Initialize academic analyzer
|
879 |
academic_analyzer = AcademicAnalyzer()
|
880 |
|
881 |
-
# ========== DATA VISUALIZER ==========
|
882 |
class DataVisualizer:
|
883 |
def __init__(self):
|
884 |
self.color_palette = {
|
@@ -1146,7 +1279,6 @@ class DataVisualizer:
|
|
1146 |
# Initialize visualizer
|
1147 |
data_visualizer = DataVisualizer()
|
1148 |
|
1149 |
-
# ========== PROFILE MANAGER ==========
|
1150 |
class EnhancedProfileManager:
|
1151 |
def __init__(self):
|
1152 |
self.profiles_dir = Path(PROFILES_DIR)
|
@@ -1353,7 +1485,6 @@ class EnhancedProfileManager:
|
|
1353 |
# Initialize profile manager
|
1354 |
profile_manager = EnhancedProfileManager()
|
1355 |
|
1356 |
-
# ========== TEACHING ASSISTANT ==========
|
1357 |
class EnhancedTeachingAssistant:
|
1358 |
def __init__(self):
|
1359 |
self.context_history = []
|
@@ -1723,7 +1854,6 @@ class EnhancedTeachingAssistant:
|
|
1723 |
# Initialize teaching assistant
|
1724 |
teaching_assistant = EnhancedTeachingAssistant()
|
1725 |
|
1726 |
-
# ========== GRADIO INTERFACE ==========
|
1727 |
def create_enhanced_interface():
|
1728 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
1729 |
session_token = gr.State(value=generate_session_token())
|
@@ -2454,5 +2584,4 @@ def create_enhanced_interface():
|
|
2454 |
app = create_enhanced_interface()
|
2455 |
|
2456 |
if __name__ == "__main__":
|
2457 |
-
app.launch(server_name="0.0.0.0", server_port=7860)
|
2458 |
-
|
|
|
75 |
logger.error(f"Attempt {attempt + 1} failed to initialize Hugging Face API: {str(e)}")
|
76 |
time.sleep(2 ** attempt)
|
77 |
|
|
|
78 |
class DataEncryptor:
|
79 |
def __init__(self, key: str):
|
80 |
self.cipher = Fernet(key.encode())
|
|
|
145 |
text = re.sub(pattern, replacement, text)
|
146 |
return text
|
147 |
|
|
|
148 |
class LearningStyleQuiz:
|
149 |
def __init__(self):
|
150 |
self.questions = [
|
|
|
250 |
# Initialize learning style quiz
|
251 |
learning_style_quiz = LearningStyleQuiz()
|
252 |
|
|
|
253 |
class EnhancedMiamiDadeTranscriptParser:
|
254 |
def __init__(self):
|
255 |
self.patterns = {
|
|
|
265 |
re.DOTALL
|
266 |
),
|
267 |
'credits': re.compile(
|
268 |
+
r"\*\s+([A-Z\s/]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\*",
|
269 |
re.DOTALL
|
270 |
),
|
271 |
'course': re.compile(
|
|
|
288 |
def parse_transcript(self, file_path: str) -> Dict:
|
289 |
"""Parse Miami-Dade transcript PDF with enhanced pattern matching"""
|
290 |
try:
|
291 |
+
# First try pdfplumber
|
292 |
with pdfplumber.open(file_path) as pdf:
|
293 |
text = "\n".join(page.extract_text() for page in pdf.pages)
|
294 |
|
295 |
+
# Fallback to PyMuPDF if text extraction is poor
|
296 |
+
if len(text) < 500: # If we got very little text
|
297 |
+
doc = fitz.open(file_path)
|
298 |
+
text = ""
|
299 |
+
for page in doc:
|
300 |
+
text += page.get_text()
|
301 |
+
|
302 |
+
# Debug: Save extracted text
|
303 |
+
with open("debug_transcript.txt", "w") as f:
|
304 |
+
f.write(text)
|
305 |
|
306 |
+
return self._parse_format(text)
|
307 |
except Exception as e:
|
308 |
logger.error(f"Error parsing transcript: {str(e)}")
|
309 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
310 |
|
311 |
def _parse_format(self, text: str) -> Dict:
|
312 |
+
"""Parse the transcript format with improved error handling"""
|
313 |
+
try:
|
314 |
+
parsed_data = {
|
315 |
+
'student_info': self._parse_student_info(text),
|
316 |
+
'academic_summary': self._parse_academic_summary(text),
|
317 |
+
'course_history': self._parse_courses(text),
|
318 |
+
'assessments': self._parse_assessments(text),
|
319 |
+
'format': 'miami_dade_v2'
|
320 |
+
}
|
321 |
+
|
322 |
+
# Validate we got at least some data
|
323 |
+
if not parsed_data['student_info'] or not parsed_data['course_history']:
|
324 |
+
raise ValueError("Incomplete data extracted from transcript")
|
325 |
+
|
326 |
+
return parsed_data
|
327 |
+
except Exception as e:
|
328 |
+
logger.error(f"Format parsing error: {str(e)}")
|
329 |
+
return self._parse_alternative_format(text)
|
330 |
+
|
331 |
+
def _parse_alternative_format(self, text: str) -> Dict:
|
332 |
+
"""Fallback parser for alternative formats"""
|
333 |
+
try:
|
334 |
+
parsed_data = {
|
335 |
+
'student_info': {},
|
336 |
+
'academic_summary': {},
|
337 |
+
'course_history': [],
|
338 |
+
'assessments': {},
|
339 |
+
'format': 'alternative'
|
340 |
+
}
|
341 |
+
|
342 |
+
# Try to extract basic student info
|
343 |
+
name_match = re.search(r"NAME:\s*([A-Z]+,\s*[A-Z]+)", text)
|
344 |
+
if name_match:
|
345 |
+
parsed_data['student_info']['name'] = name_match.group(1).replace(',', ' ').strip()
|
346 |
+
|
347 |
+
# Try to extract GPA
|
348 |
+
gpa_match = re.search(r"GPA:\s*([\d.]+)", text)
|
349 |
+
if gpa_match:
|
350 |
+
parsed_data['academic_summary']['gpa'] = {
|
351 |
+
'district': float(gpa_match.group(1)),
|
352 |
+
'state': float(gpa_match.group(1)) # Assume same if not specified
|
353 |
+
}
|
354 |
+
|
355 |
+
return parsed_data
|
356 |
+
except Exception as e:
|
357 |
+
logger.error(f"Alternative parser failed: {str(e)}")
|
358 |
+
raise ValueError("Could not parse transcript in any supported format")
|
359 |
|
360 |
def _parse_student_info(self, text: str) -> Dict:
|
361 |
+
"""Extract student information with improved pattern matching"""
|
362 |
match = self.patterns['student_info'].search(text)
|
363 |
if not match:
|
364 |
+
# Try alternative patterns
|
365 |
+
match = re.search(r"STUDENT INFORMATION.*?NAME:\s*([^\n]+)", text, re.DOTALL)
|
366 |
+
if not match:
|
367 |
+
return {}
|
368 |
+
|
369 |
return {
|
370 |
+
'name': match.group(1).replace(',', ' ').strip() if match else "Unknown",
|
371 |
+
'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
|
372 |
+
'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
|
373 |
+
'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
|
374 |
'birth_date': self._extract_birth_date(text),
|
375 |
'ethnicity': self._extract_ethnicity(text)
|
376 |
}
|
377 |
|
378 |
def _extract_birth_date(self, text: str) -> Optional[str]:
|
379 |
+
"""Extract birth date from transcript with multiple pattern attempts"""
|
380 |
+
patterns = [
|
381 |
+
r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})",
|
382 |
+
r"DOB:\s*(\d{2}/\d{2}/\d{4})",
|
383 |
+
r"DATE OF BIRTH:\s*([^\n]+)"
|
384 |
+
]
|
385 |
+
|
386 |
+
for pattern in patterns:
|
387 |
+
birth_match = re.search(pattern, text)
|
388 |
+
if birth_match:
|
389 |
+
return birth_match.group(1)
|
390 |
+
return None
|
391 |
|
392 |
def _extract_ethnicity(self, text: str) -> Optional[str]:
|
393 |
+
"""Extract ethnicity information with multiple pattern attempts"""
|
394 |
+
patterns = [
|
395 |
+
r"ETHNICITY:\s*([^\n]+)",
|
396 |
+
r"RACE/ETHNICITY:\s*([^\n]+)",
|
397 |
+
r"DEMOGRAPHICS.*?ETHNICITY:\s*([^\n]+)"
|
398 |
+
]
|
399 |
+
|
400 |
+
for pattern in patterns:
|
401 |
+
eth_match = re.search(pattern, text, re.DOTALL)
|
402 |
+
if eth_match:
|
403 |
+
return eth_match.group(1).strip()
|
404 |
+
return None
|
405 |
|
406 |
def _parse_academic_summary(self, text: str) -> Dict:
|
407 |
+
"""Parse academic summary section with improved error handling"""
|
|
|
|
|
|
|
|
|
408 |
summary = {
|
409 |
+
'gpa': {'district': None, 'state': None},
|
|
|
|
|
|
|
410 |
'credits': {},
|
411 |
+
'class_rank': {'percentile': None, 'class_size': None}
|
|
|
|
|
|
|
412 |
}
|
413 |
|
414 |
+
# Try multiple GPA patterns
|
415 |
+
gpa_match = self.patterns['gpa'].search(text)
|
416 |
+
if not gpa_match:
|
417 |
+
gpa_match = re.search(r"GPA.*?([\d.]+).*?([\d.]+)", text)
|
418 |
+
|
419 |
+
if gpa_match:
|
420 |
+
summary['gpa']['district'] = float(gpa_match.group(1))
|
421 |
+
summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
|
422 |
+
|
423 |
+
# Try multiple credit patterns
|
424 |
+
credits_matches = self.patterns['credits'].finditer(text)
|
425 |
+
if not credits_matches:
|
426 |
+
credits_matches = re.finditer(r"([A-Z ]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", text)
|
427 |
+
|
428 |
for match in credits_matches:
|
429 |
subject = match.group(1).strip()
|
430 |
summary['credits'][subject] = {
|
|
|
433 |
'remaining': float(match.group(4)) if match.group(4) else None
|
434 |
}
|
435 |
|
436 |
+
# Try multiple class rank patterns
|
437 |
+
rank_match = self.patterns['class_rank'].search(text)
|
438 |
+
if not rank_match:
|
439 |
+
rank_match = re.search(r"RANK.*?(\d+).*?(\d+)", text)
|
440 |
+
|
441 |
+
if rank_match:
|
442 |
+
summary['class_rank']['percentile'] = int(rank_match.group(1))
|
443 |
+
summary['class_rank']['class_size'] = int(rank_match.group(2))
|
444 |
+
|
445 |
return summary
|
446 |
|
447 |
def _parse_courses(self, text: str) -> List[Dict]:
|
448 |
+
"""Parse course history section with improved pattern matching"""
|
449 |
courses = []
|
450 |
+
|
451 |
+
# Try primary pattern first
|
452 |
for match in self.patterns['course'].finditer(text):
|
453 |
+
courses.append(self._create_course_dict(match))
|
454 |
+
|
455 |
+
# If no courses found, try alternative patterns
|
456 |
+
if not courses:
|
457 |
+
alt_pattern = re.compile(
|
458 |
+
r"(\d{4}-\d{4})\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([\d.]+)",
|
459 |
+
re.DOTALL
|
460 |
+
)
|
461 |
+
for match in alt_pattern.finditer(text):
|
462 |
+
courses.append({
|
463 |
+
'term': match.group(1),
|
464 |
+
'course_code': match.group(2),
|
465 |
+
'course_title': match.group(3).strip(),
|
466 |
+
'subject_area': match.group(4),
|
467 |
+
'grade': match.group(5),
|
468 |
+
'credit_earned': float(match.group(6)),
|
469 |
+
'credit_attempted': float(match.group(6))
|
470 |
+
})
|
471 |
+
|
472 |
return courses
|
473 |
|
474 |
+
def _create_course_dict(self, match) -> Dict:
|
475 |
+
"""Create standardized course dictionary from regex match"""
|
476 |
+
return {
|
477 |
+
'term': match.group(1),
|
478 |
+
'course_code': match.group(2),
|
479 |
+
'course_title': match.group(3).strip(),
|
480 |
+
'subject_area': match.group(4),
|
481 |
+
'grade': match.group(5),
|
482 |
+
'flag': match.group(6),
|
483 |
+
'credit_status': match.group(7),
|
484 |
+
'credit_attempted': float(match.group(8)),
|
485 |
+
'credit_earned': float(match.group(9))
|
486 |
+
}
|
487 |
+
|
488 |
def _parse_assessments(self, text: str) -> Dict:
|
489 |
+
"""Parse assessment and requirement information with improved patterns"""
|
|
|
490 |
assessments = {
|
491 |
'ela_passed_date': None,
|
492 |
'algebra_passed': False,
|
|
|
497 |
}
|
498 |
}
|
499 |
|
500 |
+
# Try multiple assessment patterns
|
501 |
+
matches = self.patterns['assessment'].finditer(text)
|
502 |
+
if not matches:
|
503 |
+
matches = re.finditer(r"(ENGLISH|ALGEBRA|BIOLOGY|SERVICE).*?(PASSED|MET|YES|NO|\d{2}/\d{4})", text)
|
504 |
+
|
505 |
for match in matches:
|
506 |
if match.group(1): # ELA date
|
507 |
assessments['ela_passed_date'] = match.group(1)
|
508 |
elif match.group(2): # Algebra
|
509 |
assessments['algebra_passed'] = match.group(2) == "YES"
|
510 |
+
elif "BIOLOGY" in match.group(0):
|
511 |
assessments['biology_passed'] = True
|
512 |
+
elif "SERVICE" in match.group(0):
|
513 |
assessments['community_service'] = {
|
514 |
'met': True,
|
515 |
+
'hours': int(match.group(4)) if match.group(4) else 0
|
516 |
}
|
517 |
|
518 |
return assessments
|
|
|
520 |
# Initialize the enhanced parser
|
521 |
transcript_parser = EnhancedMiamiDadeTranscriptParser()
|
522 |
|
|
|
523 |
class AcademicAnalyzer:
|
524 |
def __init__(self):
|
525 |
self.gpa_scale = {
|
|
|
543 |
}
|
544 |
|
545 |
try:
|
546 |
+
# Handle multiple transcript formats
|
547 |
if parsed_data.get('format') == 'progress_summary':
|
548 |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
|
549 |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
|
550 |
+
elif parsed_data.get('format') == 'miami_dade_v2':
|
551 |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
|
552 |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
|
553 |
+
else: # Alternative format
|
554 |
+
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', 0))
|
555 |
+
unweighted_gpa = weighted_gpa # Assume same if not specified
|
556 |
|
557 |
if weighted_gpa >= 4.5:
|
558 |
analysis['rating'] = 'Excellent'
|
|
|
610 |
analysis['comparison'] += "\n\nThe small difference between your weighted and unweighted GPA suggests you might benefit from more challenging courses."
|
611 |
|
612 |
return analysis
|
613 |
+
except Exception as e:
|
614 |
+
logger.error(f"GPA analysis error: {str(e)}")
|
615 |
return {
|
616 |
'rating': 'Unknown',
|
617 |
+
'description': 'Could not analyze GPA - data may be missing or incomplete',
|
618 |
+
'comparison': 'Please verify your transcript contains GPA information',
|
619 |
+
'improvement_tips': [
|
620 |
+
"Check that your transcript includes GPA information",
|
621 |
+
"Ensure the file is clear and all text was extracted properly"
|
622 |
+
]
|
623 |
}
|
624 |
|
625 |
def analyze_graduation_status(self, parsed_data: Dict) -> Dict:
|
|
|
715 |
)
|
716 |
|
717 |
return analysis
|
718 |
+
except Exception as e:
|
719 |
+
logger.error(f"Graduation status error: {str(e)}")
|
720 |
return {
|
721 |
+
'status': 'Could not analyze graduation status - data may be incomplete',
|
722 |
'completion_percentage': 0,
|
723 |
'missing_requirements': [],
|
724 |
'on_track': False,
|
725 |
+
'timeline': 'Please verify your transcript contains credit information'
|
726 |
}
|
727 |
|
728 |
def analyze_course_rigor(self, parsed_data: Dict) -> Dict:
|
|
|
791 |
]
|
792 |
|
793 |
return analysis
|
794 |
+
except Exception as e:
|
795 |
+
logger.error(f"Course rigor error: {str(e)}")
|
796 |
return {
|
797 |
'advanced_courses': 0,
|
798 |
'honors_courses': 0,
|
|
|
800 |
'ib_courses': 0,
|
801 |
'de_courses': 0,
|
802 |
'rating': 'Unknown',
|
803 |
+
'recommendations': [
|
804 |
+
"Could not analyze course rigor - verify your transcript contains course information",
|
805 |
+
"Check that course titles and types were properly extracted"
|
806 |
+
]
|
807 |
}
|
808 |
|
809 |
def generate_college_recommendations(self, parsed_data: Dict) -> Dict:
|
|
|
899 |
recommendations['improvement_areas'].append("Increase community service involvement")
|
900 |
|
901 |
return recommendations
|
902 |
+
except Exception as e:
|
903 |
+
logger.error(f"College recommendations error: {str(e)}")
|
904 |
return {
|
905 |
+
'reach': ["Could not generate recommendations - insufficient data"],
|
906 |
'target': [],
|
907 |
'safety': [],
|
908 |
'scholarships': [],
|
909 |
+
'improvement_areas': [
|
910 |
+
"Complete your profile information",
|
911 |
+
"Ensure your transcript contains GPA and course information"
|
912 |
+
]
|
913 |
}
|
914 |
|
915 |
def generate_study_plan(self, parsed_data: Dict, learning_style: str) -> Dict:
|
|
|
988 |
])
|
989 |
|
990 |
return plan
|
991 |
+
except Exception as e:
|
992 |
+
logger.error(f"Study plan error: {str(e)}")
|
993 |
return {
|
994 |
+
'weekly_schedule': {'Error': ["Could not generate schedule - course data may be missing"]},
|
995 |
+
'study_strategies': [
|
996 |
+
"Review your notes regularly",
|
997 |
+
"Create a consistent study routine",
|
998 |
+
"Ask teachers for clarification when needed"
|
999 |
+
],
|
1000 |
+
'time_management_tips': [
|
1001 |
+
"Set aside dedicated study time each day",
|
1002 |
+
"Break large tasks into smaller chunks",
|
1003 |
+
"Use a planner to track assignments"
|
1004 |
+
],
|
1005 |
+
'resource_recommendations': [
|
1006 |
+
"Khan Academy",
|
1007 |
+
"Quizlet",
|
1008 |
+
"Your textbook and class materials"
|
1009 |
+
]
|
1010 |
}
|
1011 |
|
1012 |
# Initialize academic analyzer
|
1013 |
academic_analyzer = AcademicAnalyzer()
|
1014 |
|
|
|
1015 |
class DataVisualizer:
|
1016 |
def __init__(self):
|
1017 |
self.color_palette = {
|
|
|
1279 |
# Initialize visualizer
|
1280 |
data_visualizer = DataVisualizer()
|
1281 |
|
|
|
1282 |
class EnhancedProfileManager:
|
1283 |
def __init__(self):
|
1284 |
self.profiles_dir = Path(PROFILES_DIR)
|
|
|
1485 |
# Initialize profile manager
|
1486 |
profile_manager = EnhancedProfileManager()
|
1487 |
|
|
|
1488 |
class EnhancedTeachingAssistant:
|
1489 |
def __init__(self):
|
1490 |
self.context_history = []
|
|
|
1854 |
# Initialize teaching assistant
|
1855 |
teaching_assistant = EnhancedTeachingAssistant()
|
1856 |
|
|
|
1857 |
def create_enhanced_interface():
|
1858 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
1859 |
session_token = gr.State(value=generate_session_token())
|
|
|
2584 |
app = create_enhanced_interface()
|
2585 |
|
2586 |
if __name__ == "__main__":
|
2587 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|
|