Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 10

Commit

ed548e3

verified ·

1 Parent(s): 0d7fd90

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -49

app.py CHANGED Viewed

@@ -182,7 +182,7 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
                 for page in doc:
                     text += page.get_text("text") + '\n'
                 if not text.strip():
-                    raise ValueError("PyMuPDF returned empty text")
             except Exception as e:
                 logging.warning(f"PyMuPDF failed: {str(e)}. Trying OCR fallback...")
                 text = extract_text_from_pdf_with_ocr(file_path)
@@ -194,13 +194,13 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
         text = clean_extracted_text(text)
         if not text.strip():
-            raise ValueError("No text could be extracted from the file")
         return text
     except Exception as e:
         logging.error(f"Text extraction error: {str(e)}")
-        raise gr.Error(f"Text extraction error: {str(e)}\nTips: Use high-quality images/PDFs with clear text.")
 def extract_text_from_pdf_with_ocr(file_path: str) -> str:
     """Fallback PDF text extraction using OCR."""
@@ -215,7 +215,7 @@ def extract_text_from_pdf_with_ocr(file_path: str) -> str:
             img = img.point(lambda x: 0 if x < 128 else 255)  # Binarize
             text += pytesseract.image_to_string(img, config='--psm 6 --oem 3') + '\n'
     except Exception as e:
-        raise ValueError(f"PDF OCR failed: {str(e)}")
     return text
 def extract_text_with_ocr(file_path: str) -> str:
@@ -232,7 +232,7 @@ def extract_text_with_ocr(file_path: str) -> str:
         text = pytesseract.image_to_string(image, config=custom_config)
         return text
     except Exception as e:
-        raise ValueError(f"OCR processing failed: {str(e)}")
 def clean_extracted_text(text: str) -> str:
     """Clean and normalize the extracted text."""
@@ -305,8 +305,8 @@ class TranscriptParser:
         except Exception as e:
             logging.error(f"Error parsing transcript: {str(e)}")
-            raise gr.Error(f"Error parsing transcript: {str(e)}")
     def _extract_student_info(self, text: str):
         """Enhanced student info extraction for Miami-Dade format"""
         # Extract basic student info
@@ -320,6 +320,17 @@ class TranscriptParser:
                 "current_grade": student_match.group(3),
                 "graduation_year": student_match.group(4)
             }
         # Extract GPA info
         gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
@@ -330,6 +341,15 @@ class TranscriptParser:
                 "unweighted_gpa": float(gpa_match.group(1)),
                 "weighted_gpa": float(gpa_match.group(2))
             })
         # Extract credits and service hours
         credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
@@ -346,7 +366,10 @@ class TranscriptParser:
         # Find the requirements table
         req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
         if not req_table_start:
-            raise ValueError("Could not find requirements table header")
         req_text = text[req_table_start.start():]
@@ -389,7 +412,10 @@ class TranscriptParser:
         # Find the course history table
         course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
         if not course_header:
-            raise ValueError("Could not find course history table header")
         course_text = text[course_header.start():]
@@ -615,7 +641,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
         raise gr.Error("The model ran out of memory. Try with a smaller transcript.")
     except Exception as e:
         logging.error(f"AI parsing error: {str(e)}")
-        raise gr.Error(f"Error processing transcript: {str(e)}")
 def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
     """Main function to parse transcript files with better error handling"""
@@ -633,7 +659,7 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
         text = extract_text_from_file(file_obj.name, file_ext)
         if not text.strip():
-            raise ValueError("No text could be extracted from the file")
         # Use AI for parsing with progress updates
         if progress:
@@ -650,15 +676,13 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
         return output_text, parsed_data
     except Exception as e:
-        error_msg = f"Error processing transcript: {str(e)}"
-        logging.error(error_msg)
-        # Provide helpful tips based on error type
-        if "No text could be extracted" in str(e):
-            error_msg += "\n\nTips: Please ensure your file is clear and readable. Try scanning at a higher resolution if it's an image."
-        elif "requirements table header" in str(e):
-            error_msg += "\n\nTips: This appears to be an unsupported transcript format. Please contact support."
         return error_msg, None
 # ========== LEARNING STYLE QUIZ ==========
@@ -773,7 +797,7 @@ class LearningStyleQuiz:
         """Evaluate quiz answers and generate enhanced results."""
         answers = list(answers)  # Convert tuple to list
         if len(answers) != len(self.questions):
-            raise gr.Error("Not all questions were answered")
         scores = {style: 0 for style in self.learning_styles}
@@ -1309,6 +1333,8 @@ def create_interface():
         .file-upload { border: 2px dashed #4CAF50 !important; padding: 20px !important; border-radius: 8px !important; }
         .progress-bar { height: 5px; background: linear-gradient(to right, #4CAF50, #8BC34A); margin-bottom: 15px; border-radius: 3px; }
         .quiz-question { margin-bottom: 15px; padding: 15px; background: #f5f5f5; border-radius: 5px; }
         """
         # Header
@@ -1347,6 +1373,7 @@ def create_interface():
                                 type="filepath"
                             )
                             upload_btn = gr.Button("Analyze Transcript", variant="primary")
                     with gr.Column(scale=2):
                         transcript_output = gr.Textbox(
@@ -1358,6 +1385,9 @@ def create_interface():
                 def process_transcript(file_obj, current_tab_status):
                     try:
                         output_text, data = parse_transcript(file_obj)
                         if "Error" not in output_text:
                             new_status = current_tab_status.copy()
@@ -1368,49 +1398,66 @@ def create_interface():
                                 new_status,
                                 gr.update(elem_classes="completed-tab"),
                                 gr.update(interactive=True),
                                 gr.update(visible=False)
                             )
                     except Exception as e:
                         return (
-                            f"Error: {str(e)}",
-                            None,
                             current_tab_status,
                             gr.update(),
                             gr.update(),
-                            gr.update(visible=True, value=f"<div class='nav-message'>Error: {str(e)}</div>")
                         )
                 upload_btn.click(
                     process_transcript,
                     inputs=[file_input, tab_completed],
-                    outputs=[transcript_output, transcript_data, tab_completed, step1, step2, nav_message]
                 )
             # ===== TAB 2: LEARNING STYLE QUIZ =====
             with gr.Tab("Learning Style Quiz", id=1):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        gr.Markdown("### Step 2: Discover Your Learning Style")
-                        progress = gr.HTML("<div class='progress-bar' style='width: 0%'></div>")
                         quiz_submit = gr.Button("Submit Quiz", variant="primary")
-                        quiz_alert = gr.HTML(visible=False)
-                    with gr.Column(scale=2):
-                        quiz_components = []
-                        with gr.Accordion("Quiz Questions", open=True):
-                            for i, (question, options) in enumerate(zip(learning_style_quiz.questions, learning_style_quiz.options)):
-                                with gr.Group(elem_classes="quiz-question"):
-                                    q = gr.Radio(
-                                        options,
-                                        label=f"{i+1}. {question}",
-                                        show_label=True
-                                    )
-                                    quiz_components.append(q)
-                        learning_output = gr.Markdown(
-                            label="Your Learning Style Results",
-                            visible=False
-                        )
                 # Update progress bar as questions are answered
                 for component in quiz_components:
@@ -1448,7 +1495,7 @@ def create_interface():
                             current_tab_status,
                             gr.update(),
                             gr.update(),
-                            gr.update(value=f"<div class='nav-message'>Error: {str(e)}</div>", visible=True),
                             gr.update(visible=False)
                         )
@@ -1457,6 +1504,14 @@ def create_interface():
                     inputs=[tab_completed] + quiz_components,
                     outputs=[learning_output, learning_output, tab_completed, step2, step3, quiz_alert, nav_message]
                 )
             # ===== TAB 3: PERSONAL QUESTIONS =====
             with gr.Tab("Personal Profile", id=2):
@@ -1507,7 +1562,7 @@ def create_interface():
                             gr.update(),
                             gr.update(),
                             gr.update(visible=False),
-                            gr.update(visible=True, value=f"<div class='nav-message'>Error: {str(e)}</div>")
                         )
                 save_personal_btn.click(
@@ -1566,7 +1621,7 @@ def create_interface():
                             current_tab_status,
                             gr.update(),
                             gr.update(),
-                            gr.update(visible=True, value=f"<div class='nav-message'>Error: {str(e)}</div>")
                         )
                 save_btn.click(
@@ -1654,7 +1709,7 @@ def create_interface():
             if not tab_completed_status.get(current_tab, False):
                 return (
                     gr.Tabs(selected=current_tab),
-                    gr.update(value=f"⚠️ Complete Step {current_tab+1} first!", visible=True)
                 )
             return gr.Tabs(selected=tab_index), gr.update(visible=False)

                 for page in doc:
                     text += page.get_text("text") + '\n'
                 if not text.strip():
+                    raise ValueError("PyMuPDF returned empty text - the PDF may be image-based")
             except Exception as e:
                 logging.warning(f"PyMuPDF failed: {str(e)}. Trying OCR fallback...")
                 text = extract_text_from_pdf_with_ocr(file_path)
         text = clean_extracted_text(text)
         if not text.strip():
+            raise ValueError("No text could be extracted from the file. Please ensure the file is clear and readable.")
         return text
     except Exception as e:
         logging.error(f"Text extraction error: {str(e)}")
+        raise gr.Error(f"Failed to extract text: {str(e)}\n\nTIPS:\n1. For PDFs, try saving as a different PDF format\n2. For images, ensure they are high-quality and well-lit\n3. Try cropping to just the transcript area")
 def extract_text_from_pdf_with_ocr(file_path: str) -> str:
     """Fallback PDF text extraction using OCR."""
             img = img.point(lambda x: 0 if x < 128 else 255)  # Binarize
             text += pytesseract.image_to_string(img, config='--psm 6 --oem 3') + '\n'
     except Exception as e:
+        raise ValueError(f"PDF OCR failed: {str(e)}. The PDF may be password protected or corrupted.")
     return text
 def extract_text_with_ocr(file_path: str) -> str:
         text = pytesseract.image_to_string(image, config=custom_config)
         return text
     except Exception as e:
+        raise ValueError(f"OCR processing failed: {str(e)}. Please ensure the image is clear and not blurry.")
 def clean_extracted_text(text: str) -> str:
     """Clean and normalize the extracted text."""
         except Exception as e:
             logging.error(f"Error parsing transcript: {str(e)}")
+            raise gr.Error(f"Error parsing transcript: {str(e)}\n\nThis may be due to an unsupported transcript format. Please ensure you're uploading an official Miami-Dade transcript or contact support.")
     def _extract_student_info(self, text: str):
         """Enhanced student info extraction for Miami-Dade format"""
         # Extract basic student info
                 "current_grade": student_match.group(3),
                 "graduation_year": student_match.group(4)
             }
+        else:
+            # Fallback pattern for alternative formats
+            fallback_pattern = r"Student:\s*([^\n]+)\s*ID:\s*(\d+)\s*Grade:\s*(\d+)"
+            fallback_match = re.search(fallback_pattern, text, re.IGNORECASE)
+            if fallback_match:
+                self.student_data = {
+                    "name": fallback_match.group(1).strip(),
+                    "id": fallback_match.group(2),
+                    "current_grade": fallback_match.group(3),
+                    "graduation_year": "Unknown"
+                }
         # Extract GPA info
         gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
                 "unweighted_gpa": float(gpa_match.group(1)),
                 "weighted_gpa": float(gpa_match.group(2))
             })
+        else:
+            # Try alternative GPA patterns
+            alt_gpa_pattern = r"GPA\s*([\d.]+)\s*/\s*([\d.]+)"
+            alt_match = re.search(alt_gpa_pattern, text)
+            if alt_match:
+                self.student_data.update({
+                    "unweighted_gpa": float(alt_match.group(1)),
+                    "weighted_gpa": float(alt_match.group(2))
+                })
         # Extract credits and service hours
         credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
         # Find the requirements table
         req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
         if not req_table_start:
+            # Try alternative table headers
+            req_table_start = re.search(r"Requirement\s*Req\s*Comp\s*Status", text)
+            if not req_table_start:
+                raise ValueError("Could not find requirements table header")
         req_text = text[req_table_start.start():]
         # Find the course history table
         course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
         if not course_header:
+            # Try alternative course history headers
+            course_header = re.search(r"Course\s*Grade\s*Credit\s*Year", text)
+            if not course_header:
+                raise ValueError("Could not find course history table header")
         course_text = text[course_header.start():]
         raise gr.Error("The model ran out of memory. Try with a smaller transcript.")
     except Exception as e:
         logging.error(f"AI parsing error: {str(e)}")
+        raise gr.Error(f"Error processing transcript: {str(e)}\n\nPlease try again or contact support with this error message.")
 def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
     """Main function to parse transcript files with better error handling"""
         text = extract_text_from_file(file_obj.name, file_ext)
         if not text.strip():
+            raise ValueError("No text could be extracted from the file. The file may be corrupted or in an unsupported format.")
         # Use AI for parsing with progress updates
         if progress:
         return output_text, parsed_data
     except Exception as e:
+        error_msg = f"❌ Error processing transcript: {str(e)}"
+        if "PDF" in str(e):
+            error_msg += "\n\nTIPS FOR PDF FILES:\n1. Try opening and re-saving the PDF in a different format\n2. Ensure the PDF isn't password protected\n3. Try taking a screenshot of the transcript and uploading as an image"
+        elif "image" in str(e).lower():
+            error_msg += "\n\nTIPS FOR IMAGE FILES:\n1. Ensure the image is clear and well-lit\n2. Try cropping to just the transcript area\n3. Avoid blurry or low-resolution images"
+        logging.error(error_msg)
         return error_msg, None
 # ========== LEARNING STYLE QUIZ ==========
         """Evaluate quiz answers and generate enhanced results."""
         answers = list(answers)  # Convert tuple to list
         if len(answers) != len(self.questions):
+            raise gr.Error("Please answer all questions before submitting")
         scores = {style: 0 for style in self.learning_styles}
         .file-upload { border: 2px dashed #4CAF50 !important; padding: 20px !important; border-radius: 8px !important; }
         .progress-bar { height: 5px; background: linear-gradient(to right, #4CAF50, #8BC34A); margin-bottom: 15px; border-radius: 3px; }
         .quiz-question { margin-bottom: 15px; padding: 15px; background: #f5f5f5; border-radius: 5px; }
+        .quiz-results { margin-top: 20px; padding: 20px; background: #e8f5e9; border-radius: 8px; }
+        .error-message { color: #d32f2f; background-color: #ffebee; padding: 10px; border-radius: 4px; margin: 10px 0; }
         """
         # Header
                                 type="filepath"
                             )
                             upload_btn = gr.Button("Analyze Transcript", variant="primary")
+                            file_error = gr.HTML(visible=False)
                     with gr.Column(scale=2):
                         transcript_output = gr.Textbox(
                 def process_transcript(file_obj, current_tab_status):
                     try:
+                        if not file_obj:
+                            raise ValueError("Please upload a file first")
                         output_text, data = parse_transcript(file_obj)
                         if "Error" not in output_text:
                             new_status = current_tab_status.copy()
                                 new_status,
                                 gr.update(elem_classes="completed-tab"),
                                 gr.update(interactive=True),
+                                gr.update(visible=False),
+                                gr.update(visible=False)
+                            )
+                        else:
+                            return (
+                                output_text,
+                                None,
+                                current_tab_status,
+                                gr.update(),
+                                gr.update(),
+                                gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
                                 gr.update(visible=False)
                             )
                     except Exception as e:
+                        error_msg = f"❌ Error: {str(e)}"
+                        if "PDF" in str(e):
+                            error_msg += "\n\nTIPS FOR PDF FILES:\n1. Try opening and re-saving the PDF\n2. Ensure it's not password protected\n3. Try converting to an image"
                         return (
+                            error_msg,
+                            None,
                             current_tab_status,
                             gr.update(),
                             gr.update(),
+                            gr.update(visible=True, value=f"<div class='error-message'>{error_msg}</div>"),
+                            gr.update(visible=False)
                         )
                 upload_btn.click(
                     process_transcript,
                     inputs=[file_input, tab_completed],
+                    outputs=[transcript_output, transcript_data, tab_completed, step1, step2, file_error, nav_message]
                 )
             # ===== TAB 2: LEARNING STYLE QUIZ =====
             with gr.Tab("Learning Style Quiz", id=1):
+                with gr.Column():
+                    gr.Markdown("### Step 2: Discover Your Learning Style")
+                    progress = gr.HTML("<div class='progress-bar' style='width: 0%'></div>")
+                    quiz_components = []
+                    with gr.Accordion("Quiz Questions", open=True):
+                        for i, (question, options) in enumerate(zip(learning_style_quiz.questions, learning_style_quiz.options)):
+                            with gr.Group(elem_classes="quiz-question"):
+                                q = gr.Radio(
+                                    options,
+                                    label=f"{i+1}. {question}",
+                                    show_label=True
+                                )
+                                quiz_components.append(q)
+                    with gr.Row():
                         quiz_submit = gr.Button("Submit Quiz", variant="primary")
+                        quiz_clear = gr.Button("Clear Answers")
+                    quiz_alert = gr.HTML(visible=False)
+                    learning_output = gr.Markdown(
+                        label="Your Learning Style Results",
+                        visible=False,
+                        elem_classes="quiz-results"
+                    )
                 # Update progress bar as questions are answered
                 for component in quiz_components:
                             current_tab_status,
                             gr.update(),
                             gr.update(),
+                            gr.update(value=f"<div class='error-message'>Error: {str(e)}</div>", visible=True),
                             gr.update(visible=False)
                         )
                     inputs=[tab_completed] + quiz_components,
                     outputs=[learning_output, learning_output, tab_completed, step2, step3, quiz_alert, nav_message]
                 )
+                quiz_clear.click(
+                    fn=lambda: [None] * len(quiz_components),
+                    outputs=quiz_components
+                ).then(
+                    fn=lambda: gr.HTML("<div class='progress-bar' style='width: 0%'></div>"),
+                    outputs=progress
+                )
             # ===== TAB 3: PERSONAL QUESTIONS =====
             with gr.Tab("Personal Profile", id=2):
                             gr.update(),
                             gr.update(),
                             gr.update(visible=False),
+                            gr.update(visible=True, value=f"<div class='error-message'>Error: {str(e)}</div>")
                         )
                 save_personal_btn.click(
                             current_tab_status,
                             gr.update(),
                             gr.update(),
+                            gr.update(visible=True, value=f"<div class='error-message'>Error: {str(e)}</div>")
                         )
                 save_btn.click(
             if not tab_completed_status.get(current_tab, False):
                 return (
                     gr.Tabs(selected=current_tab),
+                    gr.update(value=f"<div class='error-message'>⚠️ Please complete Step {current_tab+1} first!</div>", visible=True)
                 )
             return gr.Tabs(selected=tab_index), gr.update(visible=False)