vargha commited on
Commit
2e99fbf
Β·
1 Parent(s): 8dd3ae7

index finding debug

Browse files
Files changed (1) hide show
  1. components/review_dashboard_page.py +113 -6
components/review_dashboard_page.py CHANGED
@@ -340,8 +340,8 @@ class ReviewDashboardPage:
340
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
341
 
342
  # FAST INITIAL QUERY: Load only essential data without complex validation processing
343
- # Reduced batch size for instant loading in HuggingFace spaces
344
- INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response
345
 
346
  # Get the target annotator's assigned intervals
347
  assigned_intervals = db.query(AnnotationInterval).filter(
@@ -397,8 +397,8 @@ class ReviewDashboardPage:
397
 
398
  # Query to get annotations with a window around the first unreviewed item
399
  if not all_reviewed and first_unreviewed_tts_id:
400
- # Load a window around the first unreviewed TTS ID (some before, some after)
401
- WINDOW_BEFORE = 2 # Load 2 items before the first unreviewed
402
  WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1 # Rest after
403
 
404
  # Get a range starting before the first unreviewed item
@@ -590,7 +590,7 @@ class ReviewDashboardPage:
590
  return f"πŸ” **Phase 2 Review Mode** - No annotations found for review."
591
 
592
  def navigate_and_load_fn(items, current_idx, direction, session):
593
- """Combined navigation and loading function"""
594
  if not items:
595
  return items, 0, ""
596
 
@@ -610,7 +610,19 @@ class ReviewDashboardPage:
610
  return items, new_idx, "" # No review info update needed
611
  else: # prev
612
  new_idx = max(current_idx - 1, 0)
613
- return items, new_idx, "" # No review info update needed
 
 
 
 
 
 
 
 
 
 
 
 
614
 
615
  def save_validation_fn(items, idx, session, approved: bool, rejection_reason: str = ""):
616
  if not items or idx >= len(items):
@@ -944,6 +956,101 @@ class ReviewDashboardPage:
944
  log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
945
  return all_items, total_count
946
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947
  # Output definitions
948
  review_display_outputs = [
949
  self.tts_id, self.filename, self.sentence, self.ann_sentence,
 
340
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
341
 
342
  # FAST INITIAL QUERY: Load only essential data without complex validation processing
343
+ # Increased batch size for better navigation experience
344
+ INITIAL_BATCH_SIZE = 10 # Load 10 items initially for better navigation
345
 
346
  # Get the target annotator's assigned intervals
347
  assigned_intervals = db.query(AnnotationInterval).filter(
 
397
 
398
  # Query to get annotations with a window around the first unreviewed item
399
  if not all_reviewed and first_unreviewed_tts_id:
400
+ # Load a larger window around the first unreviewed TTS ID for better navigation
401
+ WINDOW_BEFORE = 5 # Load 5 items before the first unreviewed
402
  WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1 # Rest after
403
 
404
  # Get a range starting before the first unreviewed item
 
590
  return f"πŸ” **Phase 2 Review Mode** - No annotations found for review."
591
 
592
  def navigate_and_load_fn(items, current_idx, direction, session):
593
+ """Combined navigation and loading function with dynamic loading in both directions"""
594
  if not items:
595
  return items, 0, ""
596
 
 
610
  return items, new_idx, "" # No review info update needed
611
  else: # prev
612
  new_idx = max(current_idx - 1, 0)
613
+ # Load more items when user reaches the FIRST item of the batch
614
+ should_load_previous = (new_idx == 0 and current_idx == 0)
615
+ if should_load_previous:
616
+ log.info(f"User reached beginning of loaded items, will load previous items")
617
+ # Load previous items
618
+ updated_items, total_count, loaded_count = load_previous_items_fn(items, session, current_batch_size=5)
619
+ # Adjust index to account for new items loaded at the beginning
620
+ adjusted_idx = new_idx + loaded_count
621
+ # Update review info with new count
622
+ review_info = update_review_info_fn(updated_items, total_count)
623
+ return updated_items, adjusted_idx, review_info
624
+ else:
625
+ return items, new_idx, "" # No review info update needed
626
 
627
  def save_validation_fn(items, idx, session, approved: bool, rejection_reason: str = ""):
628
  if not items or idx >= len(items):
 
956
  log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
957
  return all_items, total_count
958
 
959
+ def load_previous_items_fn(items, session, current_batch_size=5):
960
+ """Load items before the current batch when user navigates backward"""
961
+ user_id = session.get("user_id")
962
+ username = session.get("username")
963
+
964
+ if not user_id or not username:
965
+ return items, 0, 0 # Return existing items if no user session
966
+
967
+ # Find target annotator
968
+ target_annotator = None
969
+ for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
970
+ if reviewer_name == username:
971
+ target_annotator = annotator_name
972
+ break
973
+
974
+ if not target_annotator:
975
+ return items, 0, 0
976
+
977
+ with get_db() as db:
978
+ target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
979
+ if not target_annotator_obj:
980
+ return items, 0, 0
981
+
982
+ # Get the target annotator's assigned intervals
983
+ assigned_intervals = db.query(AnnotationInterval).filter(
984
+ AnnotationInterval.annotator_id == target_annotator_obj.id
985
+ ).all()
986
+
987
+ if not assigned_intervals:
988
+ return items, 0, 0
989
+
990
+ # Count total annotations within assigned intervals for progress info
991
+ total_count = 0
992
+ for interval in assigned_intervals:
993
+ if interval.start_index is None or interval.end_index is None:
994
+ continue
995
+ interval_count = db.query(Annotation).join(
996
+ TTSData, Annotation.tts_data_id == TTSData.id
997
+ ).filter(
998
+ Annotation.annotator_id == target_annotator_obj.id,
999
+ TTSData.id >= interval.start_index,
1000
+ TTSData.id <= interval.end_index
1001
+ ).count()
1002
+ total_count += interval_count
1003
+
1004
+ # Get the first loaded annotation id to load items before it
1005
+ first_loaded_id = items[0]["annotation_id"] if items else float('inf')
1006
+
1007
+ # LOAD ITEMS BEFORE: Use id-based pagination to get previous items
1008
+ query = db.query(
1009
+ Annotation,
1010
+ TTSData.filename,
1011
+ TTSData.sentence
1012
+ ).join(
1013
+ TTSData, Annotation.tts_data_id == TTSData.id
1014
+ ).join(
1015
+ AnnotationInterval,
1016
+ and_(
1017
+ AnnotationInterval.annotator_id == target_annotator_obj.id,
1018
+ TTSData.id >= AnnotationInterval.start_index,
1019
+ TTSData.id <= AnnotationInterval.end_index
1020
+ )
1021
+ ).filter(
1022
+ Annotation.annotator_id == target_annotator_obj.id,
1023
+ Annotation.id < first_loaded_id
1024
+ ).order_by(Annotation.id.desc()).limit(current_batch_size)
1025
+
1026
+ results = query.all()
1027
+ results.reverse() # Restore ascending order
1028
+
1029
+ # Process new items with minimal data - validation status loaded on-demand
1030
+ new_items = []
1031
+ for annotation, filename, sentence in results:
1032
+ # Check if annotation is deleted (minimal processing)
1033
+ is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
1034
+ annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
1035
+
1036
+ new_items.append({
1037
+ "annotation_id": annotation.id,
1038
+ "tts_id": annotation.tts_data_id,
1039
+ "filename": filename,
1040
+ "sentence": sentence,
1041
+ "annotated_sentence": annotated_sentence_display,
1042
+ "is_deleted": is_deleted,
1043
+ "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
1044
+ "validation_status": "Loading...", # Will be loaded on-demand
1045
+ "validation_loaded": False # Track if validation status has been loaded
1046
+ })
1047
+
1048
+ # Combine with existing items (new items go to the front)
1049
+ all_items = new_items + items
1050
+ loaded_count = len(new_items)
1051
+ log.info(f"Loaded {loaded_count} items before id {first_loaded_id}, total now: {len(all_items)}")
1052
+ return all_items, total_count, loaded_count
1053
+
1054
  # Output definitions
1055
  review_display_outputs = [
1056
  self.tts_id, self.filename, self.sentence, self.ann_sentence,