Spaces:
Running
Running
resume point and pagination
Browse files
components/review_dashboard_page.py
CHANGED
@@ -312,38 +312,55 @@ class ReviewDashboardPage:
|
|
312 |
|
313 |
log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
|
314 |
|
315 |
-
#
|
316 |
-
#
|
317 |
-
INITIAL_BATCH_SIZE =
|
318 |
-
MAX_SEARCH_BATCH = 100 # Maximum to search for unreviewed items before giving up
|
319 |
|
320 |
-
#
|
321 |
-
#
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
Annotation,
|
324 |
TTSData.filename,
|
325 |
TTSData.sentence
|
326 |
).join(
|
327 |
TTSData, Annotation.tts_data_id == TTSData.id
|
328 |
-
).outerjoin(
|
329 |
-
Validation, (Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id)
|
330 |
).filter(
|
331 |
-
Annotation.annotator_id == target_annotator_obj.id
|
332 |
-
|
333 |
-
).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
|
334 |
|
335 |
-
|
336 |
-
|
337 |
-
# If
|
338 |
-
if
|
339 |
-
|
340 |
-
|
341 |
-
# All items in this result are unreviewed, so we'll start from the first one
|
342 |
-
resume_from_unreviewed = True
|
343 |
-
else:
|
344 |
-
log.info("No unreviewed annotations found, loading from beginning")
|
345 |
-
# Fall back to original query - load from beginning
|
346 |
-
original_query = db.query(
|
347 |
Annotation,
|
348 |
TTSData.filename,
|
349 |
TTSData.sentence
|
@@ -351,43 +368,19 @@ class ReviewDashboardPage:
|
|
351 |
TTSData, Annotation.tts_data_id == TTSData.id
|
352 |
).filter(
|
353 |
Annotation.annotator_id == target_annotator_obj.id
|
354 |
-
).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
|
355 |
-
|
356 |
-
|
357 |
-
resume_from_unreviewed = False
|
358 |
-
|
359 |
-
# Get total count for progress info (this is fast)
|
360 |
-
total_count = db.query(Annotation).filter(
|
361 |
-
Annotation.annotator_id == target_annotator_obj.id
|
362 |
-
).count()
|
363 |
|
364 |
-
log.info(f"
|
365 |
|
366 |
-
# Process items
|
367 |
items = []
|
368 |
-
|
369 |
-
user_id = session.get("user_id")
|
370 |
-
|
371 |
-
for i, (annotation, filename, sentence) in enumerate(query_results):
|
372 |
# Check if annotation is deleted (minimal processing)
|
373 |
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
|
374 |
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
|
375 |
|
376 |
-
# Check validation status - if we loaded unreviewed items, they're all unreviewed
|
377 |
-
if resume_from_unreviewed:
|
378 |
-
validation_status = "Not Reviewed (Deleted)" if is_deleted else "Not Reviewed"
|
379 |
-
else:
|
380 |
-
# For regular loading, check validation status
|
381 |
-
validation_status, _ = get_validation_status_for_item(db, annotation.id, user_id, annotation)
|
382 |
-
|
383 |
-
# Track first unreviewed item for resume functionality
|
384 |
-
if first_unreviewed_idx == -1 and validation_status.startswith("Not Reviewed"):
|
385 |
-
# Prioritize non-deleted annotations for resume point
|
386 |
-
if not is_deleted:
|
387 |
-
first_unreviewed_idx = i
|
388 |
-
elif first_unreviewed_idx == -1: # If no non-deleted found yet, accept deleted as fallback
|
389 |
-
first_unreviewed_idx = i
|
390 |
-
|
391 |
items.append({
|
392 |
"annotation_id": annotation.id,
|
393 |
"tts_id": annotation.tts_data_id,
|
@@ -396,30 +389,22 @@ class ReviewDashboardPage:
|
|
396 |
"annotated_sentence": annotated_sentence_display,
|
397 |
"is_deleted": is_deleted,
|
398 |
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
|
399 |
-
"validation_status":
|
400 |
-
"validation_loaded":
|
401 |
})
|
402 |
|
403 |
-
#
|
|
|
|
|
404 |
initial_idx = 0
|
405 |
-
if items:
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
if initial_idx == 0 and items[0].get("is_deleted", False):
|
414 |
-
initial_idx = 0
|
415 |
-
log.info(f"Reviewer '{username}' resuming from unreviewed annotations, starting at index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
|
416 |
-
elif first_unreviewed_idx != -1:
|
417 |
-
initial_idx = first_unreviewed_idx
|
418 |
-
log.info(f"Reviewer '{username}' resuming at first unreviewed item, index: {initial_idx} (annotation ID: {items[initial_idx]['annotation_id']})")
|
419 |
-
else:
|
420 |
-
# All items in this batch are reviewed, start from the last item
|
421 |
-
initial_idx = len(items) - 1 if items else 0
|
422 |
-
log.info(f"Reviewer '{username}' has no unreviewed items in current batch, starting at last item, index: {initial_idx}")
|
423 |
|
424 |
# Set initial display
|
425 |
if items:
|
@@ -687,11 +672,11 @@ class ReviewDashboardPage:
|
|
687 |
total_count = db.query(Annotation).filter(
|
688 |
Annotation.annotator_id == target_annotator_obj.id
|
689 |
).count()
|
690 |
-
|
691 |
-
# Load next batch starting from where we left off
|
692 |
-
offset = len(items)
|
693 |
|
694 |
-
#
|
|
|
|
|
|
|
695 |
query = db.query(
|
696 |
Annotation,
|
697 |
TTSData.filename,
|
@@ -699,8 +684,9 @@ class ReviewDashboardPage:
|
|
699 |
).join(
|
700 |
TTSData, Annotation.tts_data_id == TTSData.id
|
701 |
).filter(
|
702 |
-
Annotation.annotator_id == target_annotator_obj.id
|
703 |
-
|
|
|
704 |
|
705 |
results = query.all()
|
706 |
|
@@ -725,7 +711,7 @@ class ReviewDashboardPage:
|
|
725 |
|
726 |
# Combine with existing items
|
727 |
all_items = items + new_items
|
728 |
-
log.info(f"Loaded {len(new_items)} more items, total now: {len(all_items)}")
|
729 |
return all_items, total_count
|
730 |
|
731 |
# Output definitions
|
|
|
312 |
|
313 |
log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
|
314 |
|
315 |
+
# FAST INITIAL QUERY: Load only essential data without complex validation processing
|
316 |
+
# Reduced batch size for instant loading in HuggingFace spaces
|
317 |
+
INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response
|
|
|
318 |
|
319 |
+
# Determine resume position: find the first UNREVIEWED annotation for this reviewer
|
320 |
+
# If none found (everything reviewed), we'll fall back to the last batch
|
321 |
+
all_reviewed = False
|
322 |
+
first_unreviewed_row = db.query(Annotation.id).outerjoin(
|
323 |
+
Validation,
|
324 |
+
(Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id)
|
325 |
+
).filter(
|
326 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
327 |
+
Validation.id == None # No validation by this reviewer
|
328 |
+
).order_by(Annotation.id.asc()).first()
|
329 |
+
|
330 |
+
# Count total annotations for progress info (this is fast)
|
331 |
+
total_count = db.query(Annotation).filter(
|
332 |
+
Annotation.annotator_id == target_annotator_obj.id
|
333 |
+
).count()
|
334 |
+
|
335 |
+
# Compute start offset so that the first item in the loaded batch is the first unreviewed
|
336 |
+
start_offset = 0
|
337 |
+
if first_unreviewed_row is not None:
|
338 |
+
first_unreviewed_id = first_unreviewed_row[0]
|
339 |
+
start_offset = db.query(Annotation).filter(
|
340 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
341 |
+
Annotation.id < first_unreviewed_id
|
342 |
+
).count()
|
343 |
+
else:
|
344 |
+
# Everything reviewed: flag and we will load the last batch
|
345 |
+
all_reviewed = True
|
346 |
+
|
347 |
+
# Simple query to get basic annotation data quickly, starting from resume offset
|
348 |
+
initial_query = db.query(
|
349 |
Annotation,
|
350 |
TTSData.filename,
|
351 |
TTSData.sentence
|
352 |
).join(
|
353 |
TTSData, Annotation.tts_data_id == TTSData.id
|
|
|
|
|
354 |
).filter(
|
355 |
+
Annotation.annotator_id == target_annotator_obj.id
|
356 |
+
).order_by(Annotation.id).offset(start_offset).limit(INITIAL_BATCH_SIZE)
|
|
|
357 |
|
358 |
+
initial_results = initial_query.all()
|
359 |
+
|
360 |
+
# If everything is reviewed or resume window empty, load the last batch so user can still browse
|
361 |
+
if (not initial_results and total_count > 0) or all_reviewed:
|
362 |
+
fallback_offset = max(total_count - INITIAL_BATCH_SIZE, 0)
|
363 |
+
initial_results = db.query(
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
Annotation,
|
365 |
TTSData.filename,
|
366 |
TTSData.sentence
|
|
|
368 |
TTSData, Annotation.tts_data_id == TTSData.id
|
369 |
).filter(
|
370 |
Annotation.annotator_id == target_annotator_obj.id
|
371 |
+
).order_by(Annotation.id).offset(fallback_offset).limit(INITIAL_BATCH_SIZE).all()
|
372 |
+
start_offset = fallback_offset
|
373 |
+
all_reviewed = True # Ensure we set this so we start at the end of the batch
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
+
log.info(f"Fast initial load (offset {start_offset}): {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
|
376 |
|
377 |
+
# Process items with minimal data - validation status will be loaded on-demand
|
378 |
items = []
|
379 |
+
for annotation, filename, sentence in initial_results:
|
|
|
|
|
|
|
380 |
# Check if annotation is deleted (minimal processing)
|
381 |
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
|
382 |
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
|
383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
items.append({
|
385 |
"annotation_id": annotation.id,
|
386 |
"tts_id": annotation.tts_data_id,
|
|
|
389 |
"annotated_sentence": annotated_sentence_display,
|
390 |
"is_deleted": is_deleted,
|
391 |
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
|
392 |
+
"validation_status": "Loading...", # Will be loaded on-demand
|
393 |
+
"validation_loaded": False # Track if validation status has been loaded
|
394 |
})
|
395 |
|
396 |
+
# Determine initial index inside the loaded batch
|
397 |
+
# - Normal case (has unreviewed): start at 0 (first unreviewed)
|
398 |
+
# - All reviewed: start at last item in the batch for browsing
|
399 |
initial_idx = 0
|
400 |
+
if items and all_reviewed:
|
401 |
+
initial_idx = len(items) - 1
|
402 |
+
elif items and start_offset + len(items) >= total_count and first_unreviewed_row is not None and start_offset >= total_count:
|
403 |
+
initial_idx = len(items) - 1
|
404 |
+
elif items and start_offset == max(total_count - INITIAL_BATCH_SIZE, 0) and (first_unreviewed_row is not None and start_offset >= total_count):
|
405 |
+
initial_idx = len(items) - 1
|
406 |
+
elif items and not initial_results and total_count == 0:
|
407 |
+
initial_idx = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
|
409 |
# Set initial display
|
410 |
if items:
|
|
|
672 |
total_count = db.query(Annotation).filter(
|
673 |
Annotation.annotator_id == target_annotator_obj.id
|
674 |
).count()
|
|
|
|
|
|
|
675 |
|
676 |
+
# Determine the next window based on the last loaded annotation id
|
677 |
+
last_loaded_id = items[-1]["annotation_id"] if items else 0
|
678 |
+
|
679 |
+
# FAST LOADING: Use id-based pagination to continue from current position
|
680 |
query = db.query(
|
681 |
Annotation,
|
682 |
TTSData.filename,
|
|
|
684 |
).join(
|
685 |
TTSData, Annotation.tts_data_id == TTSData.id
|
686 |
).filter(
|
687 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
688 |
+
Annotation.id > last_loaded_id
|
689 |
+
).order_by(Annotation.id).limit(current_batch_size)
|
690 |
|
691 |
results = query.all()
|
692 |
|
|
|
711 |
|
712 |
# Combine with existing items
|
713 |
all_items = items + new_items
|
714 |
+
log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
|
715 |
return all_items, total_count
|
716 |
|
717 |
# Output definitions
|