Spaces:
Running
Running
resume point and pagination
Browse files- components/review_dashboard_page.py +180 -106
components/review_dashboard_page.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
import gradio as gr
|
4 |
import datetime
|
5 |
import sentry_sdk
|
6 |
-
from sqlalchemy import orm
|
7 |
|
8 |
from components.header import Header
|
9 |
from utils.logger import Logger
|
@@ -12,6 +12,7 @@ from config import conf
|
|
12 |
from utils.database import get_db
|
13 |
from data.models import Annotation, TTSData, Annotator, Validation, AnnotationInterval
|
14 |
from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
|
|
|
15 |
|
16 |
log = Logger()
|
17 |
LOADER = CloudServerAudioLoader(conf.FTP_URL)
|
@@ -205,27 +206,44 @@ class ReviewDashboardPage:
|
|
205 |
if not target_annotator_obj:
|
206 |
return f"β οΈ **Error:** Annotator '{target_annotator}' not found"
|
207 |
|
208 |
-
#
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
if iv.start_index is not None and iv.end_index is not None:
|
213 |
-
interval_filters.append(Annotation.tts_data_id.between(iv.start_index, iv.end_index))
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
base_filters.append(or_(*interval_filters))
|
218 |
|
219 |
-
# Count total annotations for target annotator
|
220 |
-
total_count =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
-
# Count reviewed annotations (have validation from this reviewer)
|
223 |
-
reviewed_count =
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
230 |
if total_count > 0:
|
231 |
percentage = (reviewed_count / total_count) * 100
|
@@ -321,87 +339,111 @@ class ReviewDashboardPage:
|
|
321 |
|
322 |
log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
|
323 |
|
324 |
-
#
|
325 |
-
|
326 |
-
|
327 |
-
for iv in intervals:
|
328 |
-
if iv.start_index is not None and iv.end_index is not None:
|
329 |
-
interval_filters.append(Annotation.tts_data_id.between(iv.start_index, iv.end_index))
|
330 |
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
# Reduced batch size for instant loading
|
336 |
-
INITIAL_BATCH_SIZE = 5
|
337 |
|
338 |
-
|
339 |
-
|
|
|
340 |
|
341 |
-
#
|
342 |
all_reviewed = False
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
Annotation.
|
355 |
-
).
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
).count()
|
372 |
-
|
373 |
-
# no items in range
|
374 |
-
start_offset = 0
|
375 |
-
|
376 |
-
# Query initial batch ordered by tts_data_id (data id)
|
377 |
-
initial_query = db.query(
|
378 |
-
Annotation,
|
379 |
-
TTSData.filename,
|
380 |
-
TTSData.sentence
|
381 |
-
).join(
|
382 |
-
TTSData, Annotation.tts_data_id == TTSData.id
|
383 |
-
).filter(
|
384 |
-
*base_filters
|
385 |
-
).order_by(Annotation.tts_data_id.asc()).offset(start_offset).limit(INITIAL_BATCH_SIZE)
|
386 |
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
fallback_offset = max(total_count - INITIAL_BATCH_SIZE, 0)
|
392 |
-
initial_results = db.query(
|
393 |
Annotation,
|
394 |
TTSData.filename,
|
395 |
TTSData.sentence
|
396 |
).join(
|
397 |
TTSData, Annotation.tts_data_id == TTSData.id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
).filter(
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
-
log.info(f"Fast initial load
|
405 |
|
406 |
# Process items with minimal data - validation status will be loaded on-demand
|
407 |
items = []
|
@@ -423,9 +465,11 @@ class ReviewDashboardPage:
|
|
423 |
})
|
424 |
|
425 |
# Determine initial index inside the loaded batch
|
|
|
|
|
426 |
initial_idx = 0
|
427 |
if items and all_reviewed:
|
428 |
-
initial_idx = len(items) - 1
|
429 |
|
430 |
# Set initial display
|
431 |
if items:
|
@@ -462,6 +506,13 @@ class ReviewDashboardPage:
|
|
462 |
# Ensure correct order and number of return values for empty items (14 outputs)
|
463 |
return [], 0, f"π **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="β Reject")
|
464 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
def show_current_review_item_fn(items, idx, session):
|
466 |
if not items or idx >= len(items) or idx < 0:
|
467 |
# tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
|
@@ -597,6 +648,13 @@ class ReviewDashboardPage:
|
|
597 |
|
598 |
return items, items[idx]["validation_status"], rejection_input_update
|
599 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
|
601 |
"""Handle rejection button click - two-step process"""
|
602 |
if not items or idx >= len(items):
|
@@ -675,33 +733,49 @@ class ReviewDashboardPage:
|
|
675 |
if not target_annotator_obj:
|
676 |
return items, 0
|
677 |
|
678 |
-
#
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
if interval_filters:
|
686 |
-
base_filters.append(or_(*interval_filters))
|
687 |
|
688 |
-
#
|
689 |
-
total_count =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
690 |
|
691 |
-
# Determine the next window based on the last loaded
|
692 |
-
|
693 |
|
694 |
-
# Use
|
695 |
query = db.query(
|
696 |
Annotation,
|
697 |
TTSData.filename,
|
698 |
TTSData.sentence
|
699 |
).join(
|
700 |
TTSData, Annotation.tts_data_id == TTSData.id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
701 |
).filter(
|
702 |
-
|
703 |
-
Annotation.
|
704 |
-
).order_by(Annotation.
|
705 |
|
706 |
results = query.all()
|
707 |
|
@@ -726,7 +800,7 @@ class ReviewDashboardPage:
|
|
726 |
|
727 |
# Combine with existing items
|
728 |
all_items = items + new_items
|
729 |
-
log.info(f"Loaded {len(new_items)} more items after
|
730 |
return all_items, total_count
|
731 |
|
732 |
# Output definitions
|
|
|
3 |
import gradio as gr
|
4 |
import datetime
|
5 |
import sentry_sdk
|
6 |
+
from sqlalchemy import orm
|
7 |
|
8 |
from components.header import Header
|
9 |
from utils.logger import Logger
|
|
|
12 |
from utils.database import get_db
|
13 |
from data.models import Annotation, TTSData, Annotator, Validation, AnnotationInterval
|
14 |
from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
|
15 |
+
from sqlalchemy import and_
|
16 |
|
17 |
log = Logger()
|
18 |
LOADER = CloudServerAudioLoader(conf.FTP_URL)
|
|
|
206 |
if not target_annotator_obj:
|
207 |
return f"β οΈ **Error:** Annotator '{target_annotator}' not found"
|
208 |
|
209 |
+
# Get the target annotator's assigned intervals
|
210 |
+
assigned_intervals = db.query(AnnotationInterval).filter(
|
211 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id
|
212 |
+
).all()
|
|
|
|
|
213 |
|
214 |
+
if not assigned_intervals:
|
215 |
+
return f"β οΈ **Error:** No assigned intervals for annotator '{target_annotator}'"
|
|
|
216 |
|
217 |
+
# Count total annotations within assigned intervals for target annotator
|
218 |
+
total_count = 0
|
219 |
+
for interval in assigned_intervals:
|
220 |
+
if interval.start_index is None or interval.end_index is None:
|
221 |
+
continue
|
222 |
+
interval_count = db.query(Annotation).join(
|
223 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
224 |
+
).filter(
|
225 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
226 |
+
TTSData.id >= interval.start_index,
|
227 |
+
TTSData.id <= interval.end_index
|
228 |
+
).count()
|
229 |
+
total_count += interval_count
|
230 |
|
231 |
+
# Count reviewed annotations within assigned intervals (have validation from this reviewer)
|
232 |
+
reviewed_count = 0
|
233 |
+
for interval in assigned_intervals:
|
234 |
+
if interval.start_index is None or interval.end_index is None:
|
235 |
+
continue
|
236 |
+
interval_reviewed = db.query(Annotation).join(
|
237 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
238 |
+
).join(
|
239 |
+
Validation, Annotation.id == Validation.annotation_id
|
240 |
+
).filter(
|
241 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
242 |
+
TTSData.id >= interval.start_index,
|
243 |
+
TTSData.id <= interval.end_index,
|
244 |
+
Validation.validator_id == user_id
|
245 |
+
).count()
|
246 |
+
reviewed_count += interval_reviewed
|
247 |
|
248 |
if total_count > 0:
|
249 |
percentage = (reviewed_count / total_count) * 100
|
|
|
339 |
|
340 |
log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
|
341 |
|
342 |
+
# FAST INITIAL QUERY: Load only essential data without complex validation processing
|
343 |
+
# Reduced batch size for instant loading in HuggingFace spaces
|
344 |
+
INITIAL_BATCH_SIZE = 5 # Load only 5 items initially for instant response
|
|
|
|
|
|
|
345 |
|
346 |
+
# Get the target annotator's assigned intervals
|
347 |
+
assigned_intervals = db.query(AnnotationInterval).filter(
|
348 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id
|
349 |
+
).all()
|
|
|
|
|
350 |
|
351 |
+
if not assigned_intervals:
|
352 |
+
log.warning(f"No assigned intervals found for annotator {target_annotator}")
|
353 |
+
return [], 0, f"Review Target Error: No assigned intervals for annotator '{target_annotator}'.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="β Reject")
|
354 |
|
355 |
+
# Find the first UNREVIEWED annotation within assigned intervals for this reviewer
|
356 |
all_reviewed = False
|
357 |
+
first_unreviewed_tts_id = None
|
358 |
+
|
359 |
+
# Query for the first TTS data ID within assigned intervals that has no validation by this reviewer
|
360 |
+
for interval in assigned_intervals:
|
361 |
+
if interval.start_index is None or interval.end_index is None:
|
362 |
+
continue
|
363 |
+
|
364 |
+
unreviewed_query = db.query(TTSData.id).join(
|
365 |
+
Annotation, Annotation.tts_data_id == TTSData.id
|
366 |
+
).outerjoin(
|
367 |
+
Validation,
|
368 |
+
(Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id)
|
369 |
+
).filter(
|
370 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
371 |
+
TTSData.id >= interval.start_index,
|
372 |
+
TTSData.id <= interval.end_index,
|
373 |
+
Validation.id.is_(None) # No validation by this reviewer (fixed SQLAlchemy syntax)
|
374 |
+
).order_by(TTSData.id.asc()).first()
|
375 |
+
|
376 |
+
if unreviewed_query:
|
377 |
+
first_unreviewed_tts_id = unreviewed_query[0]
|
378 |
+
break
|
379 |
+
|
380 |
+
if first_unreviewed_tts_id is None:
|
381 |
+
# Everything reviewed: flag and we will load the last batch from the last interval
|
382 |
+
all_reviewed = True
|
383 |
+
|
384 |
+
# Count total annotations within assigned intervals for progress info
|
385 |
+
total_count = 0
|
386 |
+
for interval in assigned_intervals:
|
387 |
+
if interval.start_index is None or interval.end_index is None:
|
388 |
+
continue
|
389 |
+
interval_count = db.query(Annotation).join(
|
390 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
391 |
+
).filter(
|
392 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
393 |
+
TTSData.id >= interval.start_index,
|
394 |
+
TTSData.id <= interval.end_index
|
395 |
).count()
|
396 |
+
total_count += interval_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
+
# Query to get annotations starting from the first unreviewed item
|
399 |
+
if not all_reviewed and first_unreviewed_tts_id:
|
400 |
+
# Load from first unreviewed TTS ID
|
401 |
+
initial_query = db.query(
|
|
|
|
|
402 |
Annotation,
|
403 |
TTSData.filename,
|
404 |
TTSData.sentence
|
405 |
).join(
|
406 |
TTSData, Annotation.tts_data_id == TTSData.id
|
407 |
+
).join(
|
408 |
+
AnnotationInterval,
|
409 |
+
and_(
|
410 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id,
|
411 |
+
TTSData.id >= AnnotationInterval.start_index,
|
412 |
+
TTSData.id <= AnnotationInterval.end_index
|
413 |
+
)
|
414 |
).filter(
|
415 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
416 |
+
TTSData.id >= first_unreviewed_tts_id
|
417 |
+
).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
|
418 |
+
|
419 |
+
initial_results = initial_query.all()
|
420 |
+
else:
|
421 |
+
# Everything reviewed or no unreviewed items: load the last batch from assigned intervals
|
422 |
+
all_reviewed = True
|
423 |
+
if assigned_intervals and total_count > 0:
|
424 |
+
# Find the last interval and load the last batch from there
|
425 |
+
last_interval = max(assigned_intervals, key=lambda x: x.end_index or 0)
|
426 |
+
if last_interval.start_index is not None and last_interval.end_index is not None:
|
427 |
+
initial_query = db.query(
|
428 |
+
Annotation,
|
429 |
+
TTSData.filename,
|
430 |
+
TTSData.sentence
|
431 |
+
).join(
|
432 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
433 |
+
).filter(
|
434 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
435 |
+
TTSData.id >= last_interval.start_index,
|
436 |
+
TTSData.id <= last_interval.end_index
|
437 |
+
).order_by(TTSData.id.desc()).limit(INITIAL_BATCH_SIZE)
|
438 |
+
|
439 |
+
initial_results = initial_query.all()
|
440 |
+
initial_results.reverse() # Restore ascending order
|
441 |
+
else:
|
442 |
+
initial_results = []
|
443 |
+
else:
|
444 |
+
initial_results = []
|
445 |
|
446 |
+
log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
|
447 |
|
448 |
# Process items with minimal data - validation status will be loaded on-demand
|
449 |
items = []
|
|
|
465 |
})
|
466 |
|
467 |
# Determine initial index inside the loaded batch
|
468 |
+
# - Normal case (has unreviewed): start at 0 (first unreviewed)
|
469 |
+
# - All reviewed: start at last item in the batch for browsing
|
470 |
initial_idx = 0
|
471 |
if items and all_reviewed:
|
472 |
+
initial_idx = len(items) - 1
|
473 |
|
474 |
# Set initial display
|
475 |
if items:
|
|
|
506 |
# Ensure correct order and number of return values for empty items (14 outputs)
|
507 |
return [], 0, f"π **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="β Reject")
|
508 |
|
509 |
+
# except Exception as e:
|
510 |
+
# log.error(f"Error loading review items: {e}")
|
511 |
+
# sentry_sdk.capture_exception(e)
|
512 |
+
# gr.Error(f"Failed to load review data: {e}")
|
513 |
+
# # Ensure correct order and number of return values for error case (14 outputs)
|
514 |
+
# return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="β Reject")
|
515 |
+
|
516 |
def show_current_review_item_fn(items, idx, session):
|
517 |
if not items or idx >= len(items) or idx < 0:
|
518 |
# tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
|
|
|
648 |
|
649 |
return items, items[idx]["validation_status"], rejection_input_update
|
650 |
|
651 |
+
# except Exception as e:
|
652 |
+
# db.rollback()
|
653 |
+
# log.error(f"Error saving validation: {e}")
|
654 |
+
# sentry_sdk.capture_exception(e)
|
655 |
+
# gr.Error(f"Failed to save validation: {e}")
|
656 |
+
# return items, current_item["validation_status"], gr.update(visible=False) # Return original status and hide input on error
|
657 |
+
|
658 |
def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
|
659 |
"""Handle rejection button click - two-step process"""
|
660 |
if not items or idx >= len(items):
|
|
|
733 |
if not target_annotator_obj:
|
734 |
return items, 0
|
735 |
|
736 |
+
# Get the target annotator's assigned intervals
|
737 |
+
assigned_intervals = db.query(AnnotationInterval).filter(
|
738 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id
|
739 |
+
).all()
|
740 |
+
|
741 |
+
if not assigned_intervals:
|
742 |
+
return items, 0
|
|
|
|
|
743 |
|
744 |
+
# Count total annotations within assigned intervals for progress info
|
745 |
+
total_count = 0
|
746 |
+
for interval in assigned_intervals:
|
747 |
+
if interval.start_index is None or interval.end_index is None:
|
748 |
+
continue
|
749 |
+
interval_count = db.query(Annotation).join(
|
750 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
751 |
+
).filter(
|
752 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
753 |
+
TTSData.id >= interval.start_index,
|
754 |
+
TTSData.id <= interval.end_index
|
755 |
+
).count()
|
756 |
+
total_count += interval_count
|
757 |
|
758 |
+
# Determine the next window based on the last loaded annotation id
|
759 |
+
last_loaded_id = items[-1]["annotation_id"] if items else 0
|
760 |
|
761 |
+
# FAST LOADING: Use id-based pagination within assigned intervals to continue from current position
|
762 |
query = db.query(
|
763 |
Annotation,
|
764 |
TTSData.filename,
|
765 |
TTSData.sentence
|
766 |
).join(
|
767 |
TTSData, Annotation.tts_data_id == TTSData.id
|
768 |
+
).join(
|
769 |
+
AnnotationInterval,
|
770 |
+
and_(
|
771 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id,
|
772 |
+
TTSData.id >= AnnotationInterval.start_index,
|
773 |
+
TTSData.id <= AnnotationInterval.end_index
|
774 |
+
)
|
775 |
).filter(
|
776 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
777 |
+
Annotation.id > last_loaded_id
|
778 |
+
).order_by(Annotation.id).limit(current_batch_size)
|
779 |
|
780 |
results = query.all()
|
781 |
|
|
|
800 |
|
801 |
# Combine with existing items
|
802 |
all_items = items + new_items
|
803 |
+
log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
|
804 |
return all_items, total_count
|
805 |
|
806 |
# Output definitions
|