Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on 26 days ago

Commit

2e99fbf

1 Parent(s): 8dd3ae7

index finding debug

Browse files

Files changed (1) hide show

components/review_dashboard_page.py +113 -6

components/review_dashboard_page.py CHANGED Viewed

@@ -340,8 +340,8 @@ class ReviewDashboardPage:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
                 # FAST INITIAL QUERY: Load only essential data without complex validation processing
-                # Reduced batch size for instant loading in HuggingFace spaces
-                INITIAL_BATCH_SIZE = 5  # Load only 5 items initially for instant response
                 # Get the target annotator's assigned intervals
                 assigned_intervals = db.query(AnnotationInterval).filter(
@@ -397,8 +397,8 @@ class ReviewDashboardPage:
                 # Query to get annotations with a window around the first unreviewed item
                 if not all_reviewed and first_unreviewed_tts_id:
-                    # Load a window around the first unreviewed TTS ID (some before, some after)
-                    WINDOW_BEFORE = 2  # Load 2 items before the first unreviewed
                     WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1  # Rest after
                     # Get a range starting before the first unreviewed item
@@ -590,7 +590,7 @@ class ReviewDashboardPage:
                 return f"🔍 **Phase 2 Review Mode** - No annotations found for review."
         def navigate_and_load_fn(items, current_idx, direction, session):
-            """Combined navigation and loading function"""
             if not items:
                 return items, 0, ""
@@ -610,7 +610,19 @@ class ReviewDashboardPage:
                     return items, new_idx, ""  # No review info update needed
             else:  # prev
                 new_idx = max(current_idx - 1, 0)
-                return items, new_idx, ""  # No review info update needed
         def save_validation_fn(items, idx, session, approved: bool, rejection_reason: str = ""):
             if not items or idx >= len(items):
@@ -944,6 +956,101 @@ class ReviewDashboardPage:
                 log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
                 return all_items, total_count
         # Output definitions
         review_display_outputs = [
             self.tts_id, self.filename, self.sentence, self.ann_sentence,

                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
                 # FAST INITIAL QUERY: Load only essential data without complex validation processing
+                # Increased batch size for better navigation experience
+                INITIAL_BATCH_SIZE = 10  # Load 10 items initially for better navigation
                 # Get the target annotator's assigned intervals
                 assigned_intervals = db.query(AnnotationInterval).filter(
                 # Query to get annotations with a window around the first unreviewed item
                 if not all_reviewed and first_unreviewed_tts_id:
+                    # Load a larger window around the first unreviewed TTS ID for better navigation
+                    WINDOW_BEFORE = 5  # Load 5 items before the first unreviewed
                     WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1  # Rest after
                     # Get a range starting before the first unreviewed item
                 return f"🔍 **Phase 2 Review Mode** - No annotations found for review."
         def navigate_and_load_fn(items, current_idx, direction, session):
+            """Combined navigation and loading function with dynamic loading in both directions"""
             if not items:
                 return items, 0, ""
                     return items, new_idx, ""  # No review info update needed
             else:  # prev
                 new_idx = max(current_idx - 1, 0)
+                # Load more items when user reaches the FIRST item of the batch
+                should_load_previous = (new_idx == 0 and current_idx == 0)
+                if should_load_previous:
+                    log.info(f"User reached beginning of loaded items, will load previous items")
+                    # Load previous items
+                    updated_items, total_count, loaded_count = load_previous_items_fn(items, session, current_batch_size=5)
+                    # Adjust index to account for new items loaded at the beginning
+                    adjusted_idx = new_idx + loaded_count
+                    # Update review info with new count
+                    review_info = update_review_info_fn(updated_items, total_count)
+                    return updated_items, adjusted_idx, review_info
+                else:
+                    return items, new_idx, ""  # No review info update needed
         def save_validation_fn(items, idx, session, approved: bool, rejection_reason: str = ""):
             if not items or idx >= len(items):
                 log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
                 return all_items, total_count
+        def load_previous_items_fn(items, session, current_batch_size=5):
+            """Load items before the current batch when user navigates backward"""
+            user_id = session.get("user_id")
+            username = session.get("username")
+            if not user_id or not username:
+                return items, 0, 0  # Return existing items if no user session
+            # Find target annotator
+            target_annotator = None
+            for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
+                if reviewer_name == username:
+                    target_annotator = annotator_name
+                    break
+            if not target_annotator:
+                return items, 0, 0
+            with get_db() as db:
+                target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
+                if not target_annotator_obj:
+                    return items, 0, 0
+                # Get the target annotator's assigned intervals
+                assigned_intervals = db.query(AnnotationInterval).filter(
+                    AnnotationInterval.annotator_id == target_annotator_obj.id
+                ).all()
+                if not assigned_intervals:
+                    return items, 0, 0
+                # Count total annotations within assigned intervals for progress info
+                total_count = 0
+                for interval in assigned_intervals:
+                    if interval.start_index is None or interval.end_index is None:
+                        continue
+                    interval_count = db.query(Annotation).join(
+                        TTSData, Annotation.tts_data_id == TTSData.id
+                    ).filter(
+                        Annotation.annotator_id == target_annotator_obj.id,
+                        TTSData.id >= interval.start_index,
+                        TTSData.id <= interval.end_index
+                    ).count()
+                    total_count += interval_count
+                # Get the first loaded annotation id to load items before it
+                first_loaded_id = items[0]["annotation_id"] if items else float('inf')
+                # LOAD ITEMS BEFORE: Use id-based pagination to get previous items
+                query = db.query(
+                    Annotation,
+                    TTSData.filename,
+                    TTSData.sentence
+                ).join(
+                    TTSData, Annotation.tts_data_id == TTSData.id
+                ).join(
+                    AnnotationInterval,
+                    and_(
+                        AnnotationInterval.annotator_id == target_annotator_obj.id,
+                        TTSData.id >= AnnotationInterval.start_index,
+                        TTSData.id <= AnnotationInterval.end_index
+                    )
+                ).filter(
+                    Annotation.annotator_id == target_annotator_obj.id,
+                    Annotation.id < first_loaded_id
+                ).order_by(Annotation.id.desc()).limit(current_batch_size)
+                results = query.all()
+                results.reverse()  # Restore ascending order
+                # Process new items with minimal data - validation status loaded on-demand
+                new_items = []
+                for annotation, filename, sentence in results:
+                    # Check if annotation is deleted (minimal processing)
+                    is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
+                    annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
+                    new_items.append({
+                        "annotation_id": annotation.id,
+                        "tts_id": annotation.tts_data_id,
+                        "filename": filename,
+                        "sentence": sentence,
+                        "annotated_sentence": annotated_sentence_display,
+                        "is_deleted": is_deleted,
+                        "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
+                        "validation_status": "Loading...",  # Will be loaded on-demand
+                        "validation_loaded": False  # Track if validation status has been loaded
+                    })
+                # Combine with existing items (new items go to the front)
+                all_items = new_items + items
+                loaded_count = len(new_items)
+                log.info(f"Loaded {loaded_count} items before id {first_loaded_id}, total now: {len(all_items)}")
+                return all_items, total_count, loaded_count
         # Output definitions
         review_display_outputs = [
             self.tts_id, self.filename, self.sentence, self.ann_sentence,