Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 22

Commit

e9299e0

verified ·

1 Parent(s): 85e97bb

Update app.py

Browse files

Files changed (1) hide show

app.py +236 -25

app.py CHANGED Viewed

@@ -25,6 +25,10 @@ import hashlib
 from concurrent.futures import ThreadPoolExecutor
 from pydantic import BaseModel
 import plotly.express as px
 # ========== CONFIGURATION ==========
 PROFILES_DIR = "student_profiles"
@@ -180,6 +184,165 @@ def validate_file(file_obj) -> None:
     if file_size > MAX_FILE_SIZE_MB:
         raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
 # ========== TEXT EXTRACTION FUNCTIONS ==========
 def preprocess_text(text: str) -> str:
     """Normalize text for more reliable parsing"""
@@ -194,6 +357,31 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
         if file_ext == '.pdf':
             try:
                 # First try pdfplumber for better table extraction
                 import pdfplumber
                 with pdfplumber.open(file_path) as pdf:
                     for page in pdf.pages:
@@ -237,30 +425,6 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
         logging.error(f"Text extraction error: {str(e)}")
         raise ValueError(f"Failed to extract text: {str(e)}")
-def extract_text_from_pdf_with_ocr(file_path: str) -> str:
-    try:
-        import pdf2image
-        images = pdf2image.convert_from_path(file_path, dpi=300)
-        custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;()-/ '
-        text = ""
-        for i, img in enumerate(images):
-            # Pre-process image
-            img = img.convert('L')  # Grayscale
-            img = img.point(lambda x: 0 if x < 140 else 255)  # Increase contrast
-            # OCR with retry logic
-            try:
-                page_text = pytesseract.image_to_string(img, config=custom_config)
-                if len(page_text.strip()) > 20:  # Minimum viable text
-                    text += f"PAGE {i+1}:\n{page_text}\n\n"
-            except Exception as e:
-                logging.warning(f"OCR failed on page {i+1}: {str(e)}")
-        return text if text else "No readable text found"
-    except Exception as e:
-        raise ValueError(f"OCR processing failed: {str(e)}")
 def extract_text_with_ocr(file_path: str) -> str:
     try:
         image = Image.open(file_path)
@@ -1215,6 +1379,8 @@ def create_interface():
         .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
         .transcript-results { border-left: 4px solid #4CAF50 !important; padding: 15px !important; background: #f8f8f8 !important; }
         .error-box { border: 1px solid #ff4444 !important; background: #fff8f8 !important; }
         .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
         .dark .quiz-question { background-color: #3d3d3d !important; }
@@ -1223,6 +1389,7 @@ def create_interface():
         .dark .output-markdown { color: #eee !important; }
         .dark .chatbot { background-color: #333 !important; }
         .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
         """
         # Header
@@ -1448,6 +1615,9 @@ def create_interface():
                             "Your profile summary will appear here after saving.",
                             label="Profile Summary"
                         )
                 save_btn.click(
                     fn=profile_manager.save_profile,
@@ -1457,6 +1627,13 @@ def create_interface():
                         book, book_reason, character, character_reason, blog
                     ],
                     outputs=output_summary
                 ).then(
                     fn=lambda: {3: True},
                     inputs=None,
@@ -1478,6 +1655,41 @@ def create_interface():
                     outputs=delete_btn
                 )
             # ===== TAB 5: AI ASSISTANT =====
             with gr.Tab("AI Assistant", id=4):
                 gr.Markdown("## Your Personalized Learning Assistant")
@@ -1573,5 +1785,4 @@ app = create_interface()
 if __name__ == "__main__":
     app.launch()

 from concurrent.futures import ThreadPoolExecutor
 from pydantic import BaseModel
 import plotly.express as px
+import pdfplumber
+from io import BytesIO
+import base64
+import matplotlib.pyplot as plt
 # ========== CONFIGURATION ==========
 PROFILES_DIR = "student_profiles"
     if file_size > MAX_FILE_SIZE_MB:
         raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
+# ========== ENHANCED PDF PARSING ==========
+def parse_transcript_pdf(file_path: str):
+    """Parse the PDF transcript and extract structured data using pdfplumber"""
+    student_info = {}
+    requirements = []
+    courses = []
+    with pdfplumber.open(file_path) as pdf:
+        for page in pdf.pages:
+            text = page.extract_text()
+            tables = page.extract_tables()
+            # Parse student information from the first table
+            if not student_info and len(tables) > 0:
+                header_row = tables[0][0]
+                if "Graduation Progress Summary" in header_row[0]:
+                    student_info = {
+                        'name': tables[0][1][0].split('-')[-1].strip(),
+                        'id': tables[0][1][0].split('-')[0].strip(),
+                        'school': tables[0][0][0].split('|')[1].strip(),
+                        'cohort': tables[0][0][1].replace('Cohort', '').strip(),
+                        'grade': tables[0][2][0].replace('Current Grade:', '').strip(),
+                        'grad_year': tables[0][2][1].replace('YOG', '').strip(),
+                        'gpa_weighted': tables[0][2][2].replace('Weighted GPA', '').strip(),
+                        'gpa_unweighted': tables[0][0][2].replace('Un-weighted GPA', '').strip(),
+                        'service_hours': tables[0][0][3].replace('Comm Serv Hours', '').strip(),
+                        'service_date': tables[0][2][3].replace('Comm Serv Date', '').strip(),
+                        'total_credits': tables[0][2][4].replace('Total Credits Earned', '').strip(),
+                        'virtual_grade': tables[0][0][4].replace('Virtual Grade', '').strip()
+                    }
+            # Parse requirements table
+            if len(tables) > 1 and "Code" in tables[1][0][0]:
+                for row in tables[1][1:]:
+                    if len(row) >= 6 and row[0] and row[0] != 'Total':
+                        requirements.append({
+                            'code': row[0],
+                            'desc': row[1],
+                            'required': float(row[2]) if row[2] else 0,
+                            'waived': float(row[3]) if row[3] else 0,
+                            'completed': float(row[4]) if row[4] else 0,
+                            'status': float(row[5].replace('%', '')) if row[5] and '%' in row[5] else 0
+                        })
+            # Parse course history table
+            if len(tables) > 2 and "Requirement" in tables[2][0][0]:
+                for row in tables[2][1:]:
+                    if len(row) >= 10 and row[0]:
+                        courses.append({
+                            'requirement': row[0],
+                            'year': row[1],
+                            'grade': row[2],
+                            'course_code': row[3],
+                            'course_name': row[4],
+                            'term': row[5],
+                            'district_num': row[6],
+                            'grade_earned': row[7],
+                            'included': row[8],
+                            'credits': float(row[9]) if row[9] and row[9] not in ['inProgress', ''] else 0,
+                            'status': 'Completed' if row[9] and row[9] != 'inProgress' else 'In Progress'
+                        })
+    return student_info, requirements, courses
+def analyze_college_readiness(student_info, requirements, courses):
+    """Analyze the student's profile for college readiness"""
+    analysis = {
+        'gpa_rating': '',
+        'rigor_rating': '',
+        'service_rating': '',
+        'recommendations': []
+    }
+    # GPA Analysis
+    weighted_gpa = float(student_info.get('gpa_weighted', 0))
+    if weighted_gpa >= 4.5:
+        analysis['gpa_rating'] = 'Excellent (Highly Competitive)'
+    elif weighted_gpa >= 3.8:
+        analysis['gpa_rating'] = 'Strong (Competitive)'
+    elif weighted_gpa >= 3.0:
+        analysis['gpa_rating'] = 'Good'
+    else:
+        analysis['gpa_rating'] = 'Below Average'
+    # Course Rigor Analysis
+    ap_count = sum(1 for course in courses if 'AP' in course['course_name'])
+    de_count = sum(1 for course in courses if 'DE' in course['course_name'])
+    honors_count = sum(1 for course in courses if 'Honors' in course['course_name'])
+    total_rigorous = ap_count + de_count + honors_count
+    if total_rigorous >= 10:
+        analysis['rigor_rating'] = 'Very High'
+    elif total_rigorous >= 6:
+        analysis['rigor_rating'] = 'High'
+    elif total_rigorous >= 3:
+        analysis['rigor_rating'] = 'Moderate'
+    else:
+        analysis['rigor_rating'] = 'Low'
+    # Community Service Analysis
+    service_hours = int(student_info.get('service_hours', 0))
+    if service_hours >= 100:
+        analysis['service_rating'] = 'Exceptional'
+    elif service_hours >= 50:
+        analysis['service_rating'] = 'Strong'
+    elif service_hours >= 30:
+        analysis['service_rating'] = 'Adequate'
+    else:
+        analysis['service_rating'] = 'Limited'
+    # Generate recommendations
+    if weighted_gpa < 3.5 and ap_count < 3:
+        analysis['recommendations'].append("Consider taking more advanced courses (AP/DE) to strengthen your academic profile")
+    if service_hours < 50:
+        analysis['recommendations'].append("Additional community service hours could enhance your college applications")
+    return analysis
+def create_requirements_visualization_matplotlib(requirements):
+    """Create matplotlib visualization for requirements completion"""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    req_names = [req['code'] for req in requirements]
+    req_completion = [min(req['status'], 100) for req in requirements]
+    colors = ['#4CAF50' if x >= 100 else '#FFC107' if x > 0 else '#F44336' for x in req_completion]
+    bars = ax.barh(req_names, req_completion, color=colors)
+    ax.set_xlabel('Completion (%)')
+    ax.set_title('Requirement Completion Status')
+    ax.set_xlim(0, 100)
+    # Add value labels
+    for bar in bars:
+        width = bar.get_width()
+        ax.text(width + 1, bar.get_y() + bar.get_height()/2,
+                f'{width:.1f}%',
+                ha='left', va='center')
+    plt.tight_layout()
+    return fig
+def create_credits_distribution_visualization(requirements):
+    """Create pie chart for credits distribution"""
+    fig, ax = plt.subplots(figsize=(8, 8))
+    core_credits = sum(req['completed'] for req in requirements if req['code'] in ['A-English', 'B-Math', 'C-Science', 'D-Social'])
+    elective_credits = sum(req['completed'] for req in requirements if req['code'] in ['G-Electives'])
+    other_credits = sum(req['completed'] for req in requirements if req['code'] in ['E-Arts', 'F-PE'])
+    credit_values = [core_credits, elective_credits, other_credits]
+    credit_labels = ['Core Subjects', 'Electives', 'Arts/PE']
+    colors = ['#3498db', '#2ecc71', '#9b59b6']
+    ax.pie(credit_values, labels=credit_labels, autopct='%1.1f%%',
+           colors=colors, startangle=90)
+    ax.set_title('Credit Distribution')
+    plt.tight_layout()
+    return fig
 # ========== TEXT EXTRACTION FUNCTIONS ==========
 def preprocess_text(text: str) -> str:
     """Normalize text for more reliable parsing"""
         if file_ext == '.pdf':
             try:
                 # First try pdfplumber for better table extraction
+                student_info, requirements, courses = parse_transcript_pdf(file_path)
+                if student_info:
+                    # Convert parsed data to text format for compatibility
+                    text += f"STUDENT INFORMATION:\n"
+                    text += f"Name: {student_info.get('name', '')}\n"
+                    text += f"ID: {student_info.get('id', '')}\n"
+                    text += f"School: {student_info.get('school', '')}\n"
+                    text += f"Grade: {student_info.get('grade', '')}\n"
+                    text += f"Graduation Year: {student_info.get('grad_year', '')}\n"
+                    text += f"Weighted GPA: {student_info.get('gpa_weighted', '')}\n"
+                    text += f"Unweighted GPA: {student_info.get('gpa_unweighted', '')}\n"
+                    text += f"Service Hours: {student_info.get('service_hours', '')}\n"
+                    text += f"Total Credits: {student_info.get('total_credits', '')}\n\n"
+                    text += "GRADUATION REQUIREMENTS:\n"
+                    for req in requirements:
+                        text += f"{req['code']} | {req['desc']} | Required: {req['required']} | Completed: {req['completed']} | Status: {req['status']}%\n"
+                    text += "\nCOURSE HISTORY:\n"
+                    for course in courses:
+                        text += f"{course['course_code']} | {course['course_name']} | Grade: {course['grade_earned']} | Credits: {course['credits']} | Status: {course['status']}\n"
+                    return text
+                # Fall back to regular text extraction if specialized parsing fails
                 import pdfplumber
                 with pdfplumber.open(file_path) as pdf:
                     for page in pdf.pages:
         logging.error(f"Text extraction error: {str(e)}")
         raise ValueError(f"Failed to extract text: {str(e)}")
 def extract_text_with_ocr(file_path: str) -> str:
     try:
         image = Image.open(file_path)
         .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
         .transcript-results { border-left: 4px solid #4CAF50 !important; padding: 15px !important; background: #f8f8f8 !important; }
         .error-box { border: 1px solid #ff4444 !important; background: #fff8f8 !important; }
+        .metric-box { background-color: white; border-radius: 10px; padding: 15px; margin: 10px 0; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }
+        .recommendation { background-color: #fff8e1; padding: 10px; border-left: 4px solid #ffc107; margin: 5px 0; }
         .dark .tab-content { background-color: #2d2d2d !important; border-color: #444 !important; }
         .dark .quiz-question { background-color: #3d3d3d !important; }
         .dark .output-markdown { color: #eee !important; }
         .dark .chatbot { background-color: #333 !important; }
         .dark .chatbot .user, .dark .chatbot .assistant { color: #eee !important; }
+        .dark .metric-box { background-color: #333 !important; }
         """
         # Header
                             "Your profile summary will appear here after saving.",
                             label="Profile Summary"
                         )
+                        with gr.Row():
+                            req_viz_matplotlib = gr.Plot(label="Requirements Progress", visible=False)
+                            credits_viz = gr.Plot(label="Credits Distribution", visible=False)
                 save_btn.click(
                     fn=profile_manager.save_profile,
                         book, book_reason, character, character_reason, blog
                     ],
                     outputs=output_summary
+                ).then(
+                    fn=lambda td: (
+                        gr.update(visible=True),
+                        gr.update(visible=True)
+                    ) if td and 'requirements' in td else (gr.update(visible=False), gr.update(visible=False)),
+                    inputs=transcript_data,
+                    outputs=[req_viz_matplotlib, credits_viz]
                 ).then(
                     fn=lambda: {3: True},
                     inputs=None,
                     outputs=delete_btn
                 )
+                # Create visualizations when profile is loaded
+                load_btn.click(
+                    fn=lambda name: profile_manager.load_profile(name, session_token.value),
+                    inputs=load_profile_dropdown,
+                    outputs=None
+                ).then(
+                    fn=lambda profile: (
+                        profile.get('name', ''),
+                        profile.get('age', ''),
+                        profile.get('interests', ''),
+                        profile.get('learning_style', ''),
+                        profile.get('favorites', {}).get('movie', ''),
+                        profile.get('favorites', {}).get('movie_reason', ''),
+                        profile.get('favorites', {}).get('show', ''),
+                        profile.get('favorites', {}).get('show_reason', ''),
+                        profile.get('favorites', {}).get('book', ''),
+                        profile.get('favorites', {}).get('book_reason', ''),
+                        profile.get('favorites', {}).get('character', ''),
+                        profile.get('favorites', {}).get('character_reason', ''),
+                        profile.get('blog', ''),
+                        profile.get('transcript', {}),
+                        gr.update(value="Profile loaded successfully!"),
+                        create_requirements_visualization_matplotlib(profile.get('transcript', {}).get('requirements', [])),
+                        create_credits_distribution_visualization(profile.get('transcript', {}).get('requirements', []))
+                    ),
+                    inputs=None,
+                    outputs=[
+                        name, age, interests, learning_output,
+                        movie, movie_reason, show, show_reason,
+                        book, book_reason, character, character_reason,
+                        blog, transcript_data, output_summary,
+                        req_viz_matplotlib, credits_viz
+                    ]
+                )
             # ===== TAB 5: AI ASSISTANT =====
             with gr.Tab("AI Assistant", id=4):
                 gr.Markdown("## Your Personalized Learning Assistant")
 if __name__ == "__main__":
     app.launch()