Dannyar608 commited on
Commit
e9299e0
·
verified ·
1 Parent(s): 85e97bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -25
app.py CHANGED
@@ -25,6 +25,10 @@ import hashlib
25
  from concurrent.futures import ThreadPoolExecutor
26
  from pydantic import BaseModel
27
  import plotly.express as px
 
 
 
 
28
 
29
  # ========== CONFIGURATION ==========
30
  PROFILES_DIR = "student_profiles"
@@ -180,6 +184,165 @@ def validate_file(file_obj) -> None:
180
  if file_size > MAX_FILE_SIZE_MB:
181
  raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  # ========== TEXT EXTRACTION FUNCTIONS ==========
184
  def preprocess_text(text: str) -> str:
185
  """Normalize text for more reliable parsing"""
@@ -194,6 +357,31 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
194
  if file_ext == '.pdf':
195
  try:
196
  # First try pdfplumber for better table extraction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  import pdfplumber
198
  with pdfplumber.open(file_path) as pdf:
199
  for page in pdf.pages:
@@ -237,30 +425,6 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
237
  logging.error(f"Text extraction error: {str(e)}")
238
  raise ValueError(f"Failed to extract text: {str(e)}")
239
 
240
- def extract_text_from_pdf_with_ocr(file_path: str) -> str:
241
- try:
242
- import pdf2image
243
- images = pdf2image.convert_from_path(file_path, dpi=300)
244
- custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;()-/ '
245
-
246
- text = ""
247
- for i, img in enumerate(images):
248
- # Pre-process image
249
- img = img.convert('L') # Grayscale
250
- img = img.point(lambda x: 0 if x < 140 else 255) # Increase contrast
251
-
252
- # OCR with retry logic
253
- try:
254
- page_text = pytesseract.image_to_string(img, config=custom_config)
255
- if len(page_text.strip()) > 20: # Minimum viable text
256
- text += f"PAGE {i+1}:\n{page_text}\n\n"
257
- except Exception as e:
258
- logging.warning(f"OCR failed on page {i+1}: {str(e)}")
259
-
260
- return text if text else "No readable text found"
261
- except Exception as e:
262
- raise ValueError(f"OCR processing failed: {str(e)}")
263
-
264
  def extract_text_with_ocr(file_path: str) -> str:
265
  try:
266
  image = Image.open(file_path)
@@ -1215,6 +1379,8 @@ def create_interface():
1215
  .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
1216
  .transcript-results { border-left: 4px solid #4CAF50 !important; padding: 15px !important; background: #f8f8f8 !important; }
1217
  .error-box { border: 1px solid #ff4444 !important; background: #fff8f8 !important; }
 
 
1218
 
1219
  .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
1220
  .dark .quiz-question { background-color: #3d3d3d !important; }
@@ -1223,6 +1389,7 @@ def create_interface():
1223
  .dark .output-markdown { color: #eee !important; }
1224
  .dark .chatbot { background-color: #333 !important; }
1225
  .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
 
1226
  """
1227
 
1228
  # Header
@@ -1448,6 +1615,9 @@ def create_interface():
1448
  "Your profile summary will appear here after saving.",
1449
  label="Profile Summary"
1450
  )
 
 
 
1451
 
1452
  save_btn.click(
1453
  fn=profile_manager.save_profile,
@@ -1457,6 +1627,13 @@ def create_interface():
1457
  book, book_reason, character, character_reason, blog
1458
  ],
1459
  outputs=output_summary
 
 
 
 
 
 
 
1460
  ).then(
1461
  fn=lambda: {3: True},
1462
  inputs=None,
@@ -1478,6 +1655,41 @@ def create_interface():
1478
  outputs=delete_btn
1479
  )
1480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  # ===== TAB 5: AI ASSISTANT =====
1482
  with gr.Tab("AI Assistant", id=4):
1483
  gr.Markdown("## Your Personalized Learning Assistant")
@@ -1573,5 +1785,4 @@ app = create_interface()
1573
 
1574
  if __name__ == "__main__":
1575
  app.launch()
1576
-
1577
 
 
25
  from concurrent.futures import ThreadPoolExecutor
26
  from pydantic import BaseModel
27
  import plotly.express as px
28
+ import pdfplumber
29
+ from io import BytesIO
30
+ import base64
31
+ import matplotlib.pyplot as plt
32
 
33
  # ========== CONFIGURATION ==========
34
  PROFILES_DIR = "student_profiles"
 
184
  if file_size > MAX_FILE_SIZE_MB:
185
  raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
186
 
187
+ # ========== ENHANCED PDF PARSING ==========
188
+ def parse_transcript_pdf(file_path: str):
189
+ """Parse the PDF transcript and extract structured data using pdfplumber"""
190
+ student_info = {}
191
+ requirements = []
192
+ courses = []
193
+
194
+ with pdfplumber.open(file_path) as pdf:
195
+ for page in pdf.pages:
196
+ text = page.extract_text()
197
+ tables = page.extract_tables()
198
+
199
+ # Parse student information from the first table
200
+ if not student_info and len(tables) > 0:
201
+ header_row = tables[0][0]
202
+ if "Graduation Progress Summary" in header_row[0]:
203
+ student_info = {
204
+ 'name': tables[0][1][0].split('-')[-1].strip(),
205
+ 'id': tables[0][1][0].split('-')[0].strip(),
206
+ 'school': tables[0][0][0].split('|')[1].strip(),
207
+ 'cohort': tables[0][0][1].replace('Cohort', '').strip(),
208
+ 'grade': tables[0][2][0].replace('Current Grade:', '').strip(),
209
+ 'grad_year': tables[0][2][1].replace('YOG', '').strip(),
210
+ 'gpa_weighted': tables[0][2][2].replace('Weighted GPA', '').strip(),
211
+ 'gpa_unweighted': tables[0][0][2].replace('Un-weighted GPA', '').strip(),
212
+ 'service_hours': tables[0][0][3].replace('Comm Serv Hours', '').strip(),
213
+ 'service_date': tables[0][2][3].replace('Comm Serv Date', '').strip(),
214
+ 'total_credits': tables[0][2][4].replace('Total Credits Earned', '').strip(),
215
+ 'virtual_grade': tables[0][0][4].replace('Virtual Grade', '').strip()
216
+ }
217
+
218
+ # Parse requirements table
219
+ if len(tables) > 1 and "Code" in tables[1][0][0]:
220
+ for row in tables[1][1:]:
221
+ if len(row) >= 6 and row[0] and row[0] != 'Total':
222
+ requirements.append({
223
+ 'code': row[0],
224
+ 'desc': row[1],
225
+ 'required': float(row[2]) if row[2] else 0,
226
+ 'waived': float(row[3]) if row[3] else 0,
227
+ 'completed': float(row[4]) if row[4] else 0,
228
+ 'status': float(row[5].replace('%', '')) if row[5] and '%' in row[5] else 0
229
+ })
230
+
231
+ # Parse course history table
232
+ if len(tables) > 2 and "Requirement" in tables[2][0][0]:
233
+ for row in tables[2][1:]:
234
+ if len(row) >= 10 and row[0]:
235
+ courses.append({
236
+ 'requirement': row[0],
237
+ 'year': row[1],
238
+ 'grade': row[2],
239
+ 'course_code': row[3],
240
+ 'course_name': row[4],
241
+ 'term': row[5],
242
+ 'district_num': row[6],
243
+ 'grade_earned': row[7],
244
+ 'included': row[8],
245
+ 'credits': float(row[9]) if row[9] and row[9] not in ['inProgress', ''] else 0,
246
+ 'status': 'Completed' if row[9] and row[9] != 'inProgress' else 'In Progress'
247
+ })
248
+
249
+ return student_info, requirements, courses
250
+
251
+ def analyze_college_readiness(student_info, requirements, courses):
252
+ """Analyze the student's profile for college readiness"""
253
+ analysis = {
254
+ 'gpa_rating': '',
255
+ 'rigor_rating': '',
256
+ 'service_rating': '',
257
+ 'recommendations': []
258
+ }
259
+
260
+ # GPA Analysis
261
+ weighted_gpa = float(student_info.get('gpa_weighted', 0))
262
+ if weighted_gpa >= 4.5:
263
+ analysis['gpa_rating'] = 'Excellent (Highly Competitive)'
264
+ elif weighted_gpa >= 3.8:
265
+ analysis['gpa_rating'] = 'Strong (Competitive)'
266
+ elif weighted_gpa >= 3.0:
267
+ analysis['gpa_rating'] = 'Good'
268
+ else:
269
+ analysis['gpa_rating'] = 'Below Average'
270
+
271
+ # Course Rigor Analysis
272
+ ap_count = sum(1 for course in courses if 'AP' in course['course_name'])
273
+ de_count = sum(1 for course in courses if 'DE' in course['course_name'])
274
+ honors_count = sum(1 for course in courses if 'Honors' in course['course_name'])
275
+
276
+ total_rigorous = ap_count + de_count + honors_count
277
+ if total_rigorous >= 10:
278
+ analysis['rigor_rating'] = 'Very High'
279
+ elif total_rigorous >= 6:
280
+ analysis['rigor_rating'] = 'High'
281
+ elif total_rigorous >= 3:
282
+ analysis['rigor_rating'] = 'Moderate'
283
+ else:
284
+ analysis['rigor_rating'] = 'Low'
285
+
286
+ # Community Service Analysis
287
+ service_hours = int(student_info.get('service_hours', 0))
288
+ if service_hours >= 100:
289
+ analysis['service_rating'] = 'Exceptional'
290
+ elif service_hours >= 50:
291
+ analysis['service_rating'] = 'Strong'
292
+ elif service_hours >= 30:
293
+ analysis['service_rating'] = 'Adequate'
294
+ else:
295
+ analysis['service_rating'] = 'Limited'
296
+
297
+ # Generate recommendations
298
+ if weighted_gpa < 3.5 and ap_count < 3:
299
+ analysis['recommendations'].append("Consider taking more advanced courses (AP/DE) to strengthen your academic profile")
300
+ if service_hours < 50:
301
+ analysis['recommendations'].append("Additional community service hours could enhance your college applications")
302
+
303
+ return analysis
304
+
305
+ def create_requirements_visualization_matplotlib(requirements):
306
+ """Create matplotlib visualization for requirements completion"""
307
+ fig, ax = plt.subplots(figsize=(10, 6))
308
+ req_names = [req['code'] for req in requirements]
309
+ req_completion = [min(req['status'], 100) for req in requirements]
310
+ colors = ['#4CAF50' if x >= 100 else '#FFC107' if x > 0 else '#F44336' for x in req_completion]
311
+
312
+ bars = ax.barh(req_names, req_completion, color=colors)
313
+ ax.set_xlabel('Completion (%)')
314
+ ax.set_title('Requirement Completion Status')
315
+ ax.set_xlim(0, 100)
316
+
317
+ # Add value labels
318
+ for bar in bars:
319
+ width = bar.get_width()
320
+ ax.text(width + 1, bar.get_y() + bar.get_height()/2,
321
+ f'{width:.1f}%',
322
+ ha='left', va='center')
323
+
324
+ plt.tight_layout()
325
+ return fig
326
+
327
+ def create_credits_distribution_visualization(requirements):
328
+ """Create pie chart for credits distribution"""
329
+ fig, ax = plt.subplots(figsize=(8, 8))
330
+
331
+ core_credits = sum(req['completed'] for req in requirements if req['code'] in ['A-English', 'B-Math', 'C-Science', 'D-Social'])
332
+ elective_credits = sum(req['completed'] for req in requirements if req['code'] in ['G-Electives'])
333
+ other_credits = sum(req['completed'] for req in requirements if req['code'] in ['E-Arts', 'F-PE'])
334
+
335
+ credit_values = [core_credits, elective_credits, other_credits]
336
+ credit_labels = ['Core Subjects', 'Electives', 'Arts/PE']
337
+ colors = ['#3498db', '#2ecc71', '#9b59b6']
338
+
339
+ ax.pie(credit_values, labels=credit_labels, autopct='%1.1f%%',
340
+ colors=colors, startangle=90)
341
+ ax.set_title('Credit Distribution')
342
+
343
+ plt.tight_layout()
344
+ return fig
345
+
346
  # ========== TEXT EXTRACTION FUNCTIONS ==========
347
  def preprocess_text(text: str) -> str:
348
  """Normalize text for more reliable parsing"""
 
357
  if file_ext == '.pdf':
358
  try:
359
  # First try pdfplumber for better table extraction
360
+ student_info, requirements, courses = parse_transcript_pdf(file_path)
361
+ if student_info:
362
+ # Convert parsed data to text format for compatibility
363
+ text += f"STUDENT INFORMATION:\n"
364
+ text += f"Name: {student_info.get('name', '')}\n"
365
+ text += f"ID: {student_info.get('id', '')}\n"
366
+ text += f"School: {student_info.get('school', '')}\n"
367
+ text += f"Grade: {student_info.get('grade', '')}\n"
368
+ text += f"Graduation Year: {student_info.get('grad_year', '')}\n"
369
+ text += f"Weighted GPA: {student_info.get('gpa_weighted', '')}\n"
370
+ text += f"Unweighted GPA: {student_info.get('gpa_unweighted', '')}\n"
371
+ text += f"Service Hours: {student_info.get('service_hours', '')}\n"
372
+ text += f"Total Credits: {student_info.get('total_credits', '')}\n\n"
373
+
374
+ text += "GRADUATION REQUIREMENTS:\n"
375
+ for req in requirements:
376
+ text += f"{req['code']} | {req['desc']} | Required: {req['required']} | Completed: {req['completed']} | Status: {req['status']}%\n"
377
+
378
+ text += "\nCOURSE HISTORY:\n"
379
+ for course in courses:
380
+ text += f"{course['course_code']} | {course['course_name']} | Grade: {course['grade_earned']} | Credits: {course['credits']} | Status: {course['status']}\n"
381
+
382
+ return text
383
+
384
+ # Fall back to regular text extraction if specialized parsing fails
385
  import pdfplumber
386
  with pdfplumber.open(file_path) as pdf:
387
  for page in pdf.pages:
 
425
  logging.error(f"Text extraction error: {str(e)}")
426
  raise ValueError(f"Failed to extract text: {str(e)}")
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  def extract_text_with_ocr(file_path: str) -> str:
429
  try:
430
  image = Image.open(file_path)
 
1379
  .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
1380
  .transcript-results { border-left: 4px solid #4CAF50 !important; padding: 15px !important; background: #f8f8f8 !important; }
1381
  .error-box { border: 1px solid #ff4444 !important; background: #fff8f8 !important; }
1382
+ .metric-box { background-color: white; border-radius: 10px; padding: 15px; margin: 10px 0; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }
1383
+ .recommendation { background-color: #fff8e1; padding: 10px; border-left: 4px solid #ffc107; margin: 5px 0; }
1384
 
1385
  .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
1386
  .dark .quiz-question { background-color: #3d3d3d !important; }
 
1389
  .dark .output-markdown { color: #eee !important; }
1390
  .dark .chatbot { background-color: #333 !important; }
1391
  .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
1392
+ .dark .metric-box { background-color: #333 !important; }
1393
  """
1394
 
1395
  # Header
 
1615
  "Your profile summary will appear here after saving.",
1616
  label="Profile Summary"
1617
  )
1618
+ with gr.Row():
1619
+ req_viz_matplotlib = gr.Plot(label="Requirements Progress", visible=False)
1620
+ credits_viz = gr.Plot(label="Credits Distribution", visible=False)
1621
 
1622
  save_btn.click(
1623
  fn=profile_manager.save_profile,
 
1627
  book, book_reason, character, character_reason, blog
1628
  ],
1629
  outputs=output_summary
1630
+ ).then(
1631
+ fn=lambda td: (
1632
+ gr.update(visible=True),
1633
+ gr.update(visible=True)
1634
+ ) if td and 'requirements' in td else (gr.update(visible=False), gr.update(visible=False)),
1635
+ inputs=transcript_data,
1636
+ outputs=[req_viz_matplotlib, credits_viz]
1637
  ).then(
1638
  fn=lambda: {3: True},
1639
  inputs=None,
 
1655
  outputs=delete_btn
1656
  )
1657
 
1658
+ # Create visualizations when profile is loaded
1659
+ load_btn.click(
1660
+ fn=lambda name: profile_manager.load_profile(name, session_token.value),
1661
+ inputs=load_profile_dropdown,
1662
+ outputs=None
1663
+ ).then(
1664
+ fn=lambda profile: (
1665
+ profile.get('name', ''),
1666
+ profile.get('age', ''),
1667
+ profile.get('interests', ''),
1668
+ profile.get('learning_style', ''),
1669
+ profile.get('favorites', {}).get('movie', ''),
1670
+ profile.get('favorites', {}).get('movie_reason', ''),
1671
+ profile.get('favorites', {}).get('show', ''),
1672
+ profile.get('favorites', {}).get('show_reason', ''),
1673
+ profile.get('favorites', {}).get('book', ''),
1674
+ profile.get('favorites', {}).get('book_reason', ''),
1675
+ profile.get('favorites', {}).get('character', ''),
1676
+ profile.get('favorites', {}).get('character_reason', ''),
1677
+ profile.get('blog', ''),
1678
+ profile.get('transcript', {}),
1679
+ gr.update(value="Profile loaded successfully!"),
1680
+ create_requirements_visualization_matplotlib(profile.get('transcript', {}).get('requirements', [])),
1681
+ create_credits_distribution_visualization(profile.get('transcript', {}).get('requirements', []))
1682
+ ),
1683
+ inputs=None,
1684
+ outputs=[
1685
+ name, age, interests, learning_output,
1686
+ movie, movie_reason, show, show_reason,
1687
+ book, book_reason, character, character_reason,
1688
+ blog, transcript_data, output_summary,
1689
+ req_viz_matplotlib, credits_viz
1690
+ ]
1691
+ )
1692
+
1693
  # ===== TAB 5: AI ASSISTANT =====
1694
  with gr.Tab("AI Assistant", id=4):
1695
  gr.Markdown("## Your Personalized Learning Assistant")
 
1785
 
1786
  if __name__ == "__main__":
1787
  app.launch()
 
1788