Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on Aug 5

Commit

8ebff9f

1 Parent(s): 78de9fc

debugging intial loading issue

Browse files

Files changed (1) hide show

components/review_dashboard_page.py +168 -53

components/review_dashboard_page.py CHANGED Viewed

@@ -151,13 +151,37 @@ class ReviewDashboardPage:
             #     gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
             #     return None, None, gr.update(value=None, autoplay=False)
         def load_review_items_fn(session):
             user_id = session.get("user_id")
             username = session.get("username")
             if not user_id or not username:
                 log.warning("load_review_items_fn: user not found in session")
-                # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotator_name_placeholder, annotated_at, validation_status, audio_update, rejection_reason_update, rejection_mode, btn_reject_update
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
             # Check if user is in Phase 2 (should be a reviewer)
@@ -176,9 +200,8 @@ class ReviewDashboardPage:
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-            # Load annotations from target annotator
             with get_db() as db:
-                # try:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
                 if not target_annotator_obj:
@@ -187,59 +210,47 @@ class ReviewDashboardPage:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
-                # Get all annotations by target annotator (including deleted ones)
-                annotations = db.query(Annotation).join(TTSData).filter(
                     Annotation.annotator_id == target_annotator_obj.id
-                ).options(
-                    orm.joinedload(Annotation.tts_data),
-                    orm.joinedload(Annotation.annotator)
-                ).order_by(Annotation.id).all() # Added order_by for consistency
-                log.info(f"Fetched {len(annotations)} annotations for target annotator ID {target_annotator_obj.id}")
                 items = []
-                for annotation in annotations:
-                    # Check if annotation is deleted (no annotated_sentence or empty)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
-                    # Check if this annotation has been reviewed by current user
-                    existing_validation = db.query(Validation).filter_by(
-                        annotation_id=annotation.id,
-                        validator_id=user_id
-                    ).first()
-                    validation_status = "Not Reviewed"
-                    rejection_reason_val = "" # For the input box
-                    rejection_visible_val = False # For the input box
-                    if existing_validation:
-                        if existing_validation.validated:
-                            validation_status = "Approved"
-                        else:
-                            validation_status = f"Rejected"
-                            if existing_validation.description:
-                                validation_status += f" ({existing_validation.description})"
-                                rejection_reason_val = existing_validation.description
-                                rejection_visible_val = True
-                    # For deleted annotations, show special status
-                    if is_deleted:
-                        annotated_sentence_display = "[DELETED ANNOTATION]"
-                        if validation_status == "Not Reviewed":
-                            validation_status = "Not Reviewed (Deleted)"
-                    else:
-                        annotated_sentence_display = annotation.annotated_sentence
                     items.append({
                         "annotation_id": annotation.id,
-                        "tts_id": annotation.tts_data.id,
-                        "filename": annotation.tts_data.filename,
-                        "sentence": annotation.tts_data.sentence,
                         "annotated_sentence": annotated_sentence_display,
                         "is_deleted": is_deleted,
-                        # "annotator_name": annotation.annotator.name, # Anonymized
                         "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
-                        "validation_status": validation_status
                     })
                 # Find the first item that is not reviewed (prioritize non-deleted annotations)
@@ -269,7 +280,7 @@ class ReviewDashboardPage:
                 # Set initial display
                 if items:
                     initial_item = items[initial_idx]
-                    review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations."
                     # Ensure correct order of return values for 12 outputs
                     # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
                     rejection_reason_val = ""
@@ -314,6 +325,30 @@ class ReviewDashboardPage:
                 return "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
             current_item = items[idx]
             rejection_reason = ""
             rejection_visible = False
@@ -346,7 +381,11 @@ class ReviewDashboardPage:
             if not items:
                 return 0
             if direction == "next":
-                return min(current_idx + 1, len(items) - 1)
             else:  # prev
                 return max(current_idx - 1, 0)
@@ -459,6 +498,86 @@ class ReviewDashboardPage:
             #     gr.Warning(f"Invalid Data ID format: {target_data_id}")
             return current_idx
         # Output definitions
         review_display_outputs = [
             self.tts_id, self.filename, self.sentence, self.ann_sentence,
@@ -487,12 +606,8 @@ class ReviewDashboardPage:
             outputs=self.interactive_ui_elements
         )
-        # Load audio when filename changes
-        self.filename.change(
-            fn=download_voice_fn,
-            inputs=[self.filename],
-            outputs=[self.audio, self.original_audio_state, self.audio]
-        )
         # Navigation buttons
         for btn, direction in [(self.btn_prev, "prev"), (self.btn_next, "next")]:

             #     gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
             #     return None, None, gr.update(value=None, autoplay=False)
+        def get_validation_status_for_item(db, annotation_id, user_id, annotation_obj):
+            """Get validation status for a specific item - called on-demand"""
+            validation = db.query(Validation).filter_by(
+                annotation_id=annotation_id,
+                validator_id=user_id
+            ).first()
+            # Check if annotation is deleted
+            is_deleted = not annotation_obj.annotated_sentence or annotation_obj.annotated_sentence.strip() == ""
+            validation_status = "Not Reviewed"
+            if validation:
+                if validation.validated:
+                    validation_status = "Approved"
+                else:
+                    validation_status = "Rejected"
+                    if validation.description:
+                        validation_status += f" ({validation.description})"
+            # For deleted annotations, show special status
+            if is_deleted and validation_status == "Not Reviewed":
+                validation_status = "Not Reviewed (Deleted)"
+            return validation_status, is_deleted
         def load_review_items_fn(session):
             user_id = session.get("user_id")
             username = session.get("username")
             if not user_id or not username:
                 log.warning("load_review_items_fn: user not found in session")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
             # Check if user is in Phase 2 (should be a reviewer)
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+            # Load annotations from target annotator with FAST INITIAL LOADING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
                 if not target_annotator_obj:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
+                # FAST INITIAL QUERY: Load only essential data without complex validation processing
+                # Reduced batch size for instant loading in HuggingFace spaces
+                INITIAL_BATCH_SIZE = 5  # Load only 5 items initially for instant response
+                # Simple query to get basic annotation data quickly
+                initial_query = db.query(
+                    Annotation,
+                    TTSData.filename,
+                    TTSData.sentence
+                ).join(
+                    TTSData, Annotation.tts_data_id == TTSData.id
+                ).filter(
+                    Annotation.annotator_id == target_annotator_obj.id
+                ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
+                initial_results = initial_query.all()
+                # Get total count for progress info (this is fast)
+                total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
+                ).count()
+                log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
+                # Process items with minimal data - validation status will be loaded on-demand
                 items = []
+                for annotation, filename, sentence in initial_results:
+                    # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
+                    annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
                     items.append({
                         "annotation_id": annotation.id,
+                        "tts_id": annotation.tts_data_id,
+                        "filename": filename,
+                        "sentence": sentence,
                         "annotated_sentence": annotated_sentence_display,
                         "is_deleted": is_deleted,
                         "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
+                        "validation_status": "Loading...",  # Will be loaded on-demand
+                        "validation_loaded": False  # Track if validation status has been loaded
                     })
                 # Find the first item that is not reviewed (prioritize non-deleted annotations)
                 # Set initial display
                 if items:
                     initial_item = items[initial_idx]
+                    review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
                     # Ensure correct order of return values for 12 outputs
                     # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
                     rejection_reason_val = ""
                 return "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
             current_item = items[idx]
+            # Load validation status on-demand if not already loaded
+            if not current_item.get("validation_loaded", False):
+                user_id = session.get("user_id")
+                if user_id:
+                    with get_db() as db:
+                        try:
+                            # Get the full annotation object for validation processing
+                            annotation_obj = db.query(Annotation).filter_by(id=current_item["annotation_id"]).first()
+                            if annotation_obj:
+                                validation_status, is_deleted = get_validation_status_for_item(db, current_item["annotation_id"], user_id, annotation_obj)
+                                current_item["validation_status"] = validation_status
+                                current_item["is_deleted"] = is_deleted
+                                current_item["validation_loaded"] = True
+                                # Update displayed annotation if deleted
+                                if is_deleted:
+                                    current_item["annotated_sentence"] = "[DELETED ANNOTATION]"
+                                log.info(f"Loaded validation status for item {idx}: {validation_status}")
+                        except Exception as e:
+                            log.error(f"Error loading validation status for item {idx}: {e}")
+                            current_item["validation_status"] = "Error loading status"
             rejection_reason = ""
             rejection_visible = False
             if not items:
                 return 0
             if direction == "next":
+                new_idx = min(current_idx + 1, len(items) - 1)
+                # Check if we're getting close to the end - load more items if needed
+                if new_idx >= len(items) - 2 and len(items) % 5 == 0:  # Near end and items is a multiple of initial batch size
+                    log.info(f"User is near end of loaded items ({new_idx}/{len(items)}), may need to load more items")
+                return new_idx
             else:  # prev
                 return max(current_idx - 1, 0)
             #     gr.Warning(f"Invalid Data ID format: {target_data_id}")
             return current_idx
+        def load_more_items_fn(items, session, current_batch_size=100):
+            """Load more items when user needs them (pagination support)"""
+            user_id = session.get("user_id")
+            username = session.get("username")
+            if not user_id or not username:
+                return items  # Return existing items if no user session
+            # Find target annotator
+            target_annotator = None
+            for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
+                if reviewer_name == username:
+                    target_annotator = annotator_name
+                    break
+            if not target_annotator:
+                return items
+            with get_db() as db:
+                target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
+                if not target_annotator_obj:
+                    return items
+                # Load next batch starting from where we left off
+                offset = len(items)
+                query = db.query(
+                    Annotation,
+                    TTSData.filename,
+                    TTSData.sentence,
+                    Validation.validated,
+                    Validation.description
+                ).join(
+                    TTSData, Annotation.tts_data_id == TTSData.id
+                ).outerjoin(
+                    Validation,
+                    (Validation.annotation_id == Annotation.id) &
+                    (Validation.validator_id == user_id)
+                ).filter(
+                    Annotation.annotator_id == target_annotator_obj.id
+                ).order_by(Annotation.id).offset(offset).limit(current_batch_size)
+                results = query.all()
+                # Process new items same as before
+                new_items = []
+                for annotation, filename, sentence, validated, validation_description in results:
+                    is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
+                    validation_status = "Not Reviewed"
+                    if validated is not None:
+                        if validated:
+                            validation_status = "Approved"
+                        else:
+                            validation_status = "Rejected"
+                            if validation_description:
+                                validation_status += f" ({validation_description})"
+                    if is_deleted:
+                        annotated_sentence_display = "[DELETED ANNOTATION]"
+                        if validation_status == "Not Reviewed":
+                            validation_status = "Not Reviewed (Deleted)"
+                    else:
+                        annotated_sentence_display = annotation.annotated_sentence
+                    new_items.append({
+                        "annotation_id": annotation.id,
+                        "tts_id": annotation.tts_data_id,
+                        "filename": filename,
+                        "sentence": sentence,
+                        "annotated_sentence": annotated_sentence_display,
+                        "is_deleted": is_deleted,
+                        "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
+                        "validation_status": validation_status
+                    })
+                # Combine with existing items
+                all_items = items + new_items
+                log.info(f"Loaded {len(new_items)} more items, total now: {len(all_items)}")
+                return all_items
         # Output definitions
         review_display_outputs = [
             self.tts_id, self.filename, self.sentence, self.ann_sentence,
             outputs=self.interactive_ui_elements
         )
+        # Audio loading is now manual only via the Load Audio button
+        # Removed automatic filename.change callback to prevent slow loading during initialization
         # Navigation buttons
         for btn, direction in [(self.btn_prev, "prev"), (self.btn_next, "next")]: