vargha commited on
Commit
2d16b47
·
1 Parent(s): e0f4aa9

resume point and pagination

Browse files
Files changed (1) hide show
  1. components/review_dashboard_page.py +68 -82
components/review_dashboard_page.py CHANGED
@@ -312,38 +312,55 @@ class ReviewDashboardPage:
312
 
313
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
314
 
315
- # RESUME-OPTIMIZED LOADING: Load enough items to find unreviewed annotations
316
- # Start with a reasonable batch size, but expand if no unreviewed items found
317
- INITIAL_BATCH_SIZE = 20 # Increased from 5 to better find unreviewed items
318
- MAX_SEARCH_BATCH = 100 # Maximum to search for unreviewed items before giving up
319
 
320
- # Query to find first unreviewed annotation efficiently
321
- # First, try to find annotations that haven't been validated by this reviewer
322
- unreviewed_query = db.query(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  Annotation,
324
  TTSData.filename,
325
  TTSData.sentence
326
  ).join(
327
  TTSData, Annotation.tts_data_id == TTSData.id
328
- ).outerjoin(
329
- Validation, (Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id)
330
  ).filter(
331
- Annotation.annotator_id == target_annotator_obj.id,
332
- Validation.id.is_(None) # No validation record exists for this reviewer
333
- ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
334
 
335
- unreviewed_results = unreviewed_query.all()
336
-
337
- # If we found unreviewed items, use those; otherwise fall back to loading from the beginning
338
- if unreviewed_results:
339
- log.info(f"Found {len(unreviewed_results)} unreviewed annotations for resume")
340
- query_results = unreviewed_results
341
- # All items in this result are unreviewed, so we'll start from the first one
342
- resume_from_unreviewed = True
343
- else:
344
- log.info("No unreviewed annotations found, loading from beginning")
345
- # Fall back to original query - load from beginning
346
- original_query = db.query(
347
  Annotation,
348
  TTSData.filename,
349
  TTSData.sentence
@@ -351,43 +368,19 @@ class ReviewDashboardPage:
351
  TTSData, Annotation.tts_data_id == TTSData.id
352
  ).filter(
353
  Annotation.annotator_id == target_annotator_obj.id
354
- ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
355
-
356
- query_results = original_query.all()
357
- resume_from_unreviewed = False
358
-
359
- # Get total count for progress info (this is fast)
360
- total_count = db.query(Annotation).filter(
361
- Annotation.annotator_id == target_annotator_obj.id
362
- ).count()
363
 
364
- log.info(f"Initial load: {len(query_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
365
 
366
- # Process items and check validation status during initial loading for resume functionality
367
  items = []
368
- first_unreviewed_idx = -1
369
- user_id = session.get("user_id")
370
-
371
- for i, (annotation, filename, sentence) in enumerate(query_results):
372
  # Check if annotation is deleted (minimal processing)
373
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
374
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
375
 
376
- # Check validation status - if we loaded unreviewed items, they're all unreviewed
377
- if resume_from_unreviewed:
378
- validation_status = "Not Reviewed (Deleted)" if is_deleted else "Not Reviewed"
379
- else:
380
- # For regular loading, check validation status
381
- validation_status, _ = get_validation_status_for_item(db, annotation.id, user_id, annotation)
382
-
383
- # Track first unreviewed item for resume functionality
384
- if first_unreviewed_idx == -1 and validation_status.startswith("Not Reviewed"):
385
- # Prioritize non-deleted annotations for resume point
386
- if not is_deleted:
387
- first_unreviewed_idx = i
388
- elif first_unreviewed_idx == -1: # If no non-deleted found yet, accept deleted as fallback
389
- first_unreviewed_idx = i
390
-
391
  items.append({
392
  "annotation_id": annotation.id,
393
  "tts_id": annotation.tts_data_id,
@@ -396,30 +389,22 @@ class ReviewDashboardPage:
396
  "annotated_sentence": annotated_sentence_display,
397
  "is_deleted": is_deleted,
398
  "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
399
- "validation_status": validation_status, # Loaded during initial load for resume functionality
400
- "validation_loaded": True # Mark as loaded since we just loaded it
401
  })
402
 
403
- # Resume Logic: Set initial index based on loading strategy
 
 
404
  initial_idx = 0
405
- if items:
406
- if resume_from_unreviewed:
407
- # We loaded unreviewed items, so start from the first one (prioritize non-deleted)
408
- for i, item in enumerate(items):
409
- if not item.get("is_deleted", False):
410
- initial_idx = i
411
- break
412
- # If all are deleted, start from first
413
- if initial_idx == 0 and items[0].get("is_deleted", False):
414
- initial_idx = 0
415
- log.info(f"Reviewer '{username}' resuming from unreviewed annotations, starting at index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
416
- elif first_unreviewed_idx != -1:
417
- initial_idx = first_unreviewed_idx
418
- log.info(f"Reviewer '{username}' resuming at first unreviewed item, index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
419
- else:
420
- # All items in this batch are reviewed, start from the last item
421
- initial_idx = len(items) - 1 if items else 0
422
- log.info(f"Reviewer '{username}' has no unreviewed items in current batch, starting at last item, index: {initial_idx}")
423
 
424
  # Set initial display
425
  if items:
@@ -687,11 +672,11 @@ class ReviewDashboardPage:
687
  total_count = db.query(Annotation).filter(
688
  Annotation.annotator_id == target_annotator_obj.id
689
  ).count()
690
-
691
- # Load next batch starting from where we left off
692
- offset = len(items)
693
 
694
- # FAST LOADING: Use same strategy as initial load - simple query without complex JOINs
 
 
 
695
  query = db.query(
696
  Annotation,
697
  TTSData.filename,
@@ -699,8 +684,9 @@ class ReviewDashboardPage:
699
  ).join(
700
  TTSData, Annotation.tts_data_id == TTSData.id
701
  ).filter(
702
- Annotation.annotator_id == target_annotator_obj.id
703
- ).order_by(Annotation.id).offset(offset).limit(current_batch_size)
 
704
 
705
  results = query.all()
706
 
@@ -725,7 +711,7 @@ class ReviewDashboardPage:
725
 
726
  # Combine with existing items
727
  all_items = items + new_items
728
- log.info(f"Loaded {len(new_items)} more items, total now: {len(all_items)}")
729
  return all_items, total_count
730
 
731
  # Output definitions
 
312
 
313
  log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
314
 
315
+ # FAST INITIAL QUERY: Load only essential data without complex validation processing
316
+ # Reduced batch size for instant loading in HuggingFace spaces
317
+ INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response
 
318
 
319
+ # Determine resume position: find the first UNREVIEWED annotation for this reviewer
320
+ # If none found (everything reviewed), we'll fall back to the last batch
321
+ all_reviewed = False
322
+ first_unreviewed_row = db.query(Annotation.id).outerjoin(
323
+ Validation,
324
+ (Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id)
325
+ ).filter(
326
+ Annotation.annotator_id == target_annotator_obj.id,
327
+ Validation.id == None # No validation by this reviewer
328
+ ).order_by(Annotation.id.asc()).first()
329
+
330
+ # Count total annotations for progress info (this is fast)
331
+ total_count = db.query(Annotation).filter(
332
+ Annotation.annotator_id == target_annotator_obj.id
333
+ ).count()
334
+
335
+ # Compute start offset so that the first item in the loaded batch is the first unreviewed
336
+ start_offset = 0
337
+ if first_unreviewed_row is not None:
338
+ first_unreviewed_id = first_unreviewed_row[0]
339
+ start_offset = db.query(Annotation).filter(
340
+ Annotation.annotator_id == target_annotator_obj.id,
341
+ Annotation.id < first_unreviewed_id
342
+ ).count()
343
+ else:
344
+ # Everything reviewed: flag and we will load the last batch
345
+ all_reviewed = True
346
+
347
+ # Simple query to get basic annotation data quickly, starting from resume offset
348
+ initial_query = db.query(
349
  Annotation,
350
  TTSData.filename,
351
  TTSData.sentence
352
  ).join(
353
  TTSData, Annotation.tts_data_id == TTSData.id
 
 
354
  ).filter(
355
+ Annotation.annotator_id == target_annotator_obj.id
356
+ ).order_by(Annotation.id).offset(start_offset).limit(INITIAL_BATCH_SIZE)
 
357
 
358
+ initial_results = initial_query.all()
359
+
360
+ # If everything is reviewed or resume window empty, load the last batch so user can still browse
361
+ if (not initial_results and total_count > 0) or all_reviewed:
362
+ fallback_offset = max(total_count - INITIAL_BATCH_SIZE, 0)
363
+ initial_results = db.query(
 
 
 
 
 
 
364
  Annotation,
365
  TTSData.filename,
366
  TTSData.sentence
 
368
  TTSData, Annotation.tts_data_id == TTSData.id
369
  ).filter(
370
  Annotation.annotator_id == target_annotator_obj.id
371
+ ).order_by(Annotation.id).offset(fallback_offset).limit(INITIAL_BATCH_SIZE).all()
372
+ start_offset = fallback_offset
373
+ all_reviewed = True # Ensure we set this so we start at the end of the batch
 
 
 
 
 
 
374
 
375
+ log.info(f"Fast initial load (offset {start_offset}): {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
376
 
377
+ # Process items with minimal data - validation status will be loaded on-demand
378
  items = []
379
+ for annotation, filename, sentence in initial_results:
 
 
 
380
  # Check if annotation is deleted (minimal processing)
381
  is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
382
  annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  items.append({
385
  "annotation_id": annotation.id,
386
  "tts_id": annotation.tts_data_id,
 
389
  "annotated_sentence": annotated_sentence_display,
390
  "is_deleted": is_deleted,
391
  "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
392
+ "validation_status": "Loading...", # Will be loaded on-demand
393
+ "validation_loaded": False # Track if validation status has been loaded
394
  })
395
 
396
+ # Determine initial index inside the loaded batch
397
+ # - Normal case (has unreviewed): start at 0 (first unreviewed)
398
+ # - All reviewed: start at last item in the batch for browsing
399
  initial_idx = 0
400
+ if items and all_reviewed:
401
+ initial_idx = len(items) - 1
402
+ elif items and start_offset + len(items) >= total_count and first_unreviewed_row is not None and start_offset >= total_count:
403
+ initial_idx = len(items) - 1
404
+ elif items and start_offset == max(total_count - INITIAL_BATCH_SIZE, 0) and (first_unreviewed_row is not None and start_offset >= total_count):
405
+ initial_idx = len(items) - 1
406
+ elif items and not initial_results and total_count == 0:
407
+ initial_idx = 0
 
 
 
 
 
 
 
 
 
 
408
 
409
  # Set initial display
410
  if items:
 
672
  total_count = db.query(Annotation).filter(
673
  Annotation.annotator_id == target_annotator_obj.id
674
  ).count()
 
 
 
675
 
676
+ # Determine the next window based on the last loaded annotation id
677
+ last_loaded_id = items[-1]["annotation_id"] if items else 0
678
+
679
+ # FAST LOADING: Use id-based pagination to continue from current position
680
  query = db.query(
681
  Annotation,
682
  TTSData.filename,
 
684
  ).join(
685
  TTSData, Annotation.tts_data_id == TTSData.id
686
  ).filter(
687
+ Annotation.annotator_id == target_annotator_obj.id,
688
+ Annotation.id > last_loaded_id
689
+ ).order_by(Annotation.id).limit(current_batch_size)
690
 
691
  results = query.all()
692
 
 
711
 
712
  # Combine with existing items
713
  all_items = items + new_items
714
+ log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
715
  return all_items, total_count
716
 
717
  # Output definitions