tts_labeling / components /review_dashboard_page.py
vargha's picture
index finding debug
2e99fbf
# components/review_dashboard_page.py
import gradio as gr
import datetime
import sentry_sdk
from sqlalchemy import orm
from components.header import Header
from utils.logger import Logger
from utils.cloud_server_audio_loader import CloudServerAudioLoader
from config import conf
from utils.database import get_db
from data.models import Annotation, TTSData, Annotator, Validation, AnnotationInterval
from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
from sqlalchemy import and_
log = Logger()
LOADER = CloudServerAudioLoader(conf.FTP_URL)
class ReviewDashboardPage:
def __init__(self) -> None:
with gr.Column(visible=False) as self.container:
self.header = Header()
self.load_trigger = gr.Number(value=0, visible=False) # Add this hidden trigger
# Review info banner
with gr.Row():
self.review_info = gr.Markdown("", elem_classes="review-banner")
with gr.Row():
# Left Column - Review Content
with gr.Column(scale=3):
with gr.Row():
self.tts_id = gr.Textbox(label="ID", interactive=False, scale=1)
self.filename = gr.Textbox(label="Filename", interactive=False, scale=3)
self.sentence = gr.Textbox(
label="Original Sentence", interactive=False, max_lines=5, rtl=True
)
self.ann_sentence = gr.Textbox(
label="Annotated Sentence (by Original Annotator)",
interactive=False, max_lines=5, rtl=True
)
with gr.Row():
# self.annotator_name = gr.Textbox(label="Original Annotator", interactive=False, scale=1) # Removed for anonymization
self.annotated_at = gr.Textbox(label="Annotated At", interactive=False, scale=2)
# Review Actions
with gr.Row():
self.btn_approve = gr.Button("βœ… Approve", variant="primary", min_width=120)
self.btn_reject = gr.Button("❌ Reject", variant="stop", min_width=120)
self.btn_skip = gr.Button("⏭️ Skip (No Decision)", min_width=150)
# Navigation
with gr.Row():
self.btn_prev = gr.Button("⬅️ Previous", min_width=120)
self.btn_next = gr.Button("Next ➑️", min_width=120)
# Jump controls
with gr.Row():
self.jump_data_id_input = gr.Number(
label="Jump to ID",
value=None,
precision=0,
interactive=True,
min_width=120
)
self.btn_jump = gr.Button("Go to ID", min_width=70)
# Right Column - Audio
with gr.Column(scale=2):
self.btn_load_voice = gr.Button("Load Audio & Play", min_width=150)
self.audio = gr.Audio(
label="πŸ”Š Audio", interactive=False, autoplay=True
)
# Review status display
with gr.Group():
gr.Markdown("### Review Status")
self.current_validation_status = gr.Textbox(
label="Current Status", interactive=False
)
self.rejection_reason_input = gr.Textbox(
label="Rejection Reason",
placeholder="Enter reason and press Enter or click away...",
interactive=True,
visible=False,
max_lines=3,
elem_id="rejection_reason_input" # Added elem_id for clarity
)
# State variables
self.items_state = gr.State([])
self.idx_state = gr.State(0)
self.original_audio_state = gr.State(None)
self.rejection_mode_active = gr.State(False) # Track if waiting for rejection reason
# List of interactive UI elements for enabling/disabling
self.interactive_ui_elements = [
self.btn_prev, self.btn_next, self.btn_approve, self.btn_reject,
self.btn_skip, self.btn_jump, self.jump_data_id_input, self.btn_load_voice
]
def register_callbacks(self, login_page, session_state: gr.State, root_blocks: gr.Blocks):
self.header.register_callbacks(login_page, self, session_state)
def update_ui_interactive_state(is_interactive: bool):
updates = []
for elem in self.interactive_ui_elements:
if elem == self.btn_load_voice and not is_interactive:
updates.append(gr.update(value="⏳ Loading Audio...", interactive=False))
elif elem == self.btn_load_voice and is_interactive:
updates.append(gr.update(value="Load Audio & Play", interactive=True))
else:
updates.append(gr.update(interactive=is_interactive))
return updates
def download_voice_fn(filename_to_load):
if not filename_to_load:
return None, None, gr.update(value=None, autoplay=False)
# try:
log.info(f"Downloading voice for review: {filename_to_load}")
# Show progress to user
# gr.Info(f"Loading audio file: {filename_to_load}")
sr, wav = LOADER.load_audio(filename_to_load)
log.info(f"Successfully loaded audio: {filename_to_load} (SR: {sr}, Length: {len(wav)} samples)")
# gr.Info(f"βœ… Audio loaded successfully!")
return (sr, wav), (sr, wav.copy()), gr.update(value=(sr, wav), autoplay=True)
# except TimeoutError as e:
# log.error(f"Audio download timeout for {filename_to_load}: {e}")
# sentry_sdk.capture_exception(e)
# raise
# except ConnectionError as e:
# log.error(f"Audio download connection error for {filename_to_load}: {e}")
# sentry_sdk.capture_exception(e)
# gr.Error(f"🌐 Connection error loading audio: {filename_to_load}. Please check your internet connection.")
# return None, None, gr.update(value=None, autoplay=False)
# except FileNotFoundError as e:
# log.error(f"Audio file not found for {filename_to_load}: {e}")
# sentry_sdk.capture_exception(e)
# gr.Error(f"πŸ“ Audio file not found: {filename_to_load}")
# return None, None, gr.update(value=None, autoplay=False)
# except Exception as e:
# log.error(f"Audio download failed for {filename_to_load}: {e}")
# sentry_sdk.capture_exception(e)
# gr.Error(f"❌ Failed to load audio: {filename_to_load}. Error: {e}")
# return None, None, gr.update(value=None, autoplay=False)
def get_validation_status_for_item(db, annotation_id, user_id, annotation_obj):
"""Get validation status for a specific item - called on-demand"""
validation = db.query(Validation).filter_by(
annotation_id=annotation_id,
validator_id=user_id
).first()
# Check if annotation is deleted
is_deleted = not annotation_obj.annotated_sentence or annotation_obj.annotated_sentence.strip() == ""
validation_status = "Not Reviewed"
if validation:
if validation.validated:
validation_status = "Approved"
else:
validation_status = "Rejected"
if validation.description:
validation_status += f" ({validation.description})"
# For deleted annotations, show special status
if is_deleted and validation_status == "Not Reviewed":
validation_status = "Not Reviewed (Deleted)"
return validation_status, is_deleted
def get_review_progress_fn(session):
"""Calculate review progress for the current reviewer with beautiful tqdm-style display"""
user_id = session.get("user_id")
username = session.get("username")
if not user_id or not username:
return ""
# Check if user is a reviewer
if username not in conf.REVIEW_MAPPING.values():
return ""
# Find target annotator
target_annotator = None
for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
if reviewer_name == username:
target_annotator = annotator_name
break
if not target_annotator:
return ""
with get_db() as db:
try:
# Get target annotator's ID
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
if not target_annotator_obj:
return f"⚠️ **Error:** Annotator '{target_annotator}' not found"
# Get the target annotator's assigned intervals
assigned_intervals = db.query(AnnotationInterval).filter(
AnnotationInterval.annotator_id == target_annotator_obj.id
).all()
if not assigned_intervals:
return f"⚠️ **Error:** No assigned intervals for annotator '{target_annotator}'"
# Count total annotations within assigned intervals for target annotator
total_count = 0
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
interval_count = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index
).count()
total_count += interval_count
# Count reviewed annotations within assigned intervals (have validation from this reviewer)
reviewed_count = 0
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
interval_reviewed = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).join(
Validation, Annotation.id == Validation.annotation_id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index,
Validation.validator_id == user_id
).count()
reviewed_count += interval_reviewed
if total_count > 0:
percentage = (reviewed_count / total_count) * 100
# Create tqdm-style progress bar
bar_width = 30 # Width of the progress bar in characters
filled = int((percentage / 100) * bar_width)
empty = bar_width - filled
# Different colors based on progress
if percentage < 25:
color = "πŸ”΄" # Red for low progress
bar_color = "progress-bar-low"
elif percentage < 50:
color = "🟑" # Yellow for medium-low progress
bar_color = "progress-bar-medium-low"
elif percentage < 75:
color = "🟠" # Orange for medium progress
bar_color = "progress-bar-medium"
elif percentage < 100:
color = "🟒" # Green for high progress
bar_color = "progress-bar-high"
else:
color = "βœ…" # Check mark for complete
bar_color = "progress-bar-complete"
# Create the visual progress bar with Unicode blocks
progress_bar = "β–ˆ" * filled + "β–‘" * empty
# Estimate remaining items
remaining = total_count - reviewed_count
# Create the beautiful progress display
progress_html = f"""
<div class="progress-container">
<div class="progress-header">
<span class="progress-icon">{color}</span>
<strong>Review Progress</strong>
</div>
<div class="progress-bar-container">
<span class="progress-percentage">{percentage:.1f}%</span>
<div class="progress-bar {bar_color}">
<span class="progress-fill" style="width: {percentage:.1f}%"></span>
</div>
<span class="progress-stats">{reviewed_count}/{total_count}</span>
</div>
<div class="progress-details">
πŸ“Š <code>{progress_bar}</code>
<span class="remaining-items">({remaining} remaining)</span>
</div>
</div>
"""
return progress_html
else:
return f"πŸ“­ **No items found for {target_annotator}**"
except Exception as e:
log.error(f"Error calculating review progress for user {user_id}: {e}")
return f"⚠️ **Error calculating progress**"
def load_review_items_fn(session):
user_id = session.get("user_id")
username = session.get("username")
if not user_id or not username:
log.warning("load_review_items_fn: user not found in session")
return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
# Check if user is in Phase 2 (should be a reviewer)
if username not in conf.REVIEW_MAPPING.values():
log.warning(f"User {username} is not assigned as a reviewer")
return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
# Find which annotator this user should review
target_annotator = None
for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
if reviewer_name == username:
target_annotator = annotator_name
break
if not target_annotator:
log.warning(f"No target annotator found for reviewer {username}")
return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
# Load annotations from target annotator with FAST INITIAL LOADING
with get_db() as db:
# Get target annotator's ID
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
if not target_annotator_obj:
log.error(f"Target annotator {target_annotator} not found in database")
return [], 0, f"Review Target Error: Annotator '{target_annotator}' not found.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
# FAST INITIAL QUERY: Load only essential data without complex validation processing
# Increased batch size for better navigation experience
INITIAL_BATCH_SIZE = 10 # Load 10 items initially for better navigation
# Get the target annotator's assigned intervals
assigned_intervals = db.query(AnnotationInterval).filter(
AnnotationInterval.annotator_id == target_annotator_obj.id
).all()
if not assigned_intervals:
log.warning(f"No assigned intervals found for annotator {target_annotator}")
return [], 0, f"Review Target Error: No assigned intervals for annotator '{target_annotator}'.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
# Find the first UNREVIEWED annotation within assigned intervals for this reviewer
all_reviewed = False
first_unreviewed_tts_id = None
# Query for the first TTS data ID within assigned intervals that has no validation by this reviewer
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
unreviewed_query = db.query(TTSData.id).join(
Annotation, Annotation.tts_data_id == TTSData.id
).outerjoin(
Validation,
(Validation.annotation_id == Annotation.id) & (Validation.validator_id == user_id)
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index,
Validation.id.is_(None) # No validation by this reviewer (fixed SQLAlchemy syntax)
).order_by(TTSData.id.asc()).first()
if unreviewed_query:
first_unreviewed_tts_id = unreviewed_query[0]
break
if first_unreviewed_tts_id is None:
# Everything reviewed: flag and we will load the last batch from the last interval
all_reviewed = True
# Count total annotations within assigned intervals for progress info
total_count = 0
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
interval_count = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index
).count()
total_count += interval_count
# Query to get annotations with a window around the first unreviewed item
if not all_reviewed and first_unreviewed_tts_id:
# Load a larger window around the first unreviewed TTS ID for better navigation
WINDOW_BEFORE = 5 # Load 5 items before the first unreviewed
WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1 # Rest after
# Get a range starting before the first unreviewed item
window_start_id = max(1, first_unreviewed_tts_id - WINDOW_BEFORE)
initial_query = db.query(
Annotation,
TTSData.filename,
TTSData.sentence
).join(
TTSData, Annotation.tts_data_id == TTSData.id
).join(
AnnotationInterval,
and_(
AnnotationInterval.annotator_id == target_annotator_obj.id,
TTSData.id >= AnnotationInterval.start_index,
TTSData.id <= AnnotationInterval.end_index
)
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= window_start_id
).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
initial_results = initial_query.all()
else:
# Everything reviewed or no unreviewed items: load the last batch from assigned intervals
all_reviewed = True
if assigned_intervals and total_count > 0:
# Find the last interval and load the last batch from there
last_interval = max(assigned_intervals, key=lambda x: x.end_index or 0)
if last_interval.start_index is not None and last_interval.end_index is not None:
initial_query = db.query(
Annotation,
TTSData.filename,
TTSData.sentence
).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= last_interval.start_index,
TTSData.id <= last_interval.end_index
).order_by(TTSData.id.desc()).limit(INITIAL_BATCH_SIZE)
initial_results = initial_query.all()
initial_results.reverse() # Restore ascending order
else:
initial_results = []
else:
initial_results = []
log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
# Process items with minimal data - validation status will be loaded on-demand
items = []
for annotation, filename, sentence in initial_results:
# Check if annotation is deleted (minimal processing)
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
items.append({
"annotation_id": annotation.id,
"tts_id": annotation.tts_data_id,
"filename": filename,
"sentence": sentence,
"annotated_sentence": annotated_sentence_display,
"is_deleted": is_deleted,
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
"validation_status": "Loading...", # Will be loaded on-demand
"validation_loaded": False # Track if validation status has been loaded
})
# Determine initial index inside the loaded batch
initial_idx = 0
if items and not all_reviewed and first_unreviewed_tts_id:
# Find the first unreviewed item within the loaded batch
for i, item in enumerate(items):
if item["tts_id"] == first_unreviewed_tts_id:
initial_idx = i
break
elif items and all_reviewed:
# All reviewed: start at last item in the batch for browsing
initial_idx = len(items) - 1
# Set initial display
if items:
initial_item = items[initial_idx]
review_info_text = f"πŸ” **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
# Ensure correct order of return values for 12 outputs
# items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
rejection_reason_val = ""
rejection_visible_val = False
if initial_item["validation_status"].startswith("Rejected"):
start_paren = initial_item["validation_status"].find("(")
end_paren = initial_item["validation_status"].find(")")
if start_paren != -1 and end_paren != -1:
rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren]
rejection_visible_val = True
return (
items,
initial_idx,
review_info_text,
str(initial_item["tts_id"]),
initial_item["filename"],
initial_item["sentence"],
initial_item["annotated_sentence"],
initial_item["annotated_at"],
initial_item["validation_status"],
"", # Placeholder for the original annotator name (maps to header.welcome)
gr.update(value=None, autoplay=False), # audio_update
gr.update(visible=rejection_visible_val, value=rejection_reason_val), # rejection_reason_input update
False, # Reset rejection mode
gr.update(value="❌ Reject") # Reset reject button
)
else:
# Ensure correct order and number of return values for empty items (14 outputs)
return [], 0, f"πŸ” **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
# except Exception as e:
# log.error(f"Error loading review items: {e}")
# sentry_sdk.capture_exception(e)
# gr.Error(f"Failed to load review data: {e}")
# # Ensure correct order and number of return values for error case (14 outputs)
# return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
def show_current_review_item_fn(items, idx, session):
if not items or idx >= len(items) or idx < 0:
# tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_name_placeholder, audio_update, rejection_reason_update, rejection_mode_reset, btn_reject_update
return "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
current_item = items[idx]
# Load validation status on-demand if not already loaded
if not current_item.get("validation_loaded", False):
user_id = session.get("user_id")
if user_id:
with get_db() as db:
try:
# Get the full annotation object for validation processing
annotation_obj = db.query(Annotation).filter_by(id=current_item["annotation_id"]).first()
if annotation_obj:
validation_status, is_deleted = get_validation_status_for_item(db, current_item["annotation_id"], user_id, annotation_obj)
current_item["validation_status"] = validation_status
current_item["is_deleted"] = is_deleted
current_item["validation_loaded"] = True
# Update displayed annotation if deleted
if is_deleted:
current_item["annotated_sentence"] = "[DELETED ANNOTATION]"
log.info(f"Loaded validation status for item {idx}: {validation_status}")
except Exception as e:
log.error(f"Error loading validation status for item {idx}: {e}")
current_item["validation_status"] = "Error loading status"
rejection_reason = ""
rejection_visible = False
# Check if this is a deleted annotation
is_deleted = current_item.get("is_deleted", False)
if current_item["validation_status"].startswith("Rejected"):
# Extract reason from status like "Rejected (reason)" or just use empty if no parenthesis
start_paren = current_item["validation_status"].find("(")
end_paren = current_item["validation_status"].find(")")
if start_paren != -1 and end_paren != -1:
rejection_reason = current_item["validation_status"][start_paren+1:end_paren]
rejection_visible = True
return (
str(current_item["tts_id"]),
current_item["filename"],
current_item["sentence"],
current_item["annotated_sentence"],
current_item["annotated_at"],
current_item["validation_status"],
"", # Placeholder for annotator_name
gr.update(value=None, autoplay=False),
gr.update(visible=rejection_visible, value=rejection_reason),
False, # Reset rejection mode
gr.update(value="❌ Reject") # Reset reject button text
)
def update_review_info_fn(items, total_count):
"""Update the review info banner with current loaded items count"""
if items:
return f"πŸ” **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
else:
return f"πŸ” **Phase 2 Review Mode** - No annotations found for review."
def navigate_and_load_fn(items, current_idx, direction, session):
"""Combined navigation and loading function with dynamic loading in both directions"""
if not items:
return items, 0, ""
# Navigate
if direction == "next":
new_idx = min(current_idx + 1, len(items) - 1)
# Only load more items when user reaches the LAST item of a batch
should_load_more = (new_idx == len(items) - 1 and len(items) % 5 == 0)
if should_load_more:
log.info(f"User reached end of loaded items ({new_idx}/{len(items)}), will load more items")
# Load more items
updated_items, total_count = load_more_items_fn(items, session, current_batch_size=10)
# Update review info with new count
review_info = update_review_info_fn(updated_items, total_count)
return updated_items, new_idx, review_info
else:
return items, new_idx, "" # No review info update needed
else: # prev
new_idx = max(current_idx - 1, 0)
# Load more items when user reaches the FIRST item of the batch
should_load_previous = (new_idx == 0 and current_idx == 0)
if should_load_previous:
log.info(f"User reached beginning of loaded items, will load previous items")
# Load previous items
updated_items, total_count, loaded_count = load_previous_items_fn(items, session, current_batch_size=5)
# Adjust index to account for new items loaded at the beginning
adjusted_idx = new_idx + loaded_count
# Update review info with new count
review_info = update_review_info_fn(updated_items, total_count)
return updated_items, adjusted_idx, review_info
else:
return items, new_idx, "" # No review info update needed
def save_validation_fn(items, idx, session, approved: bool, rejection_reason: str = ""):
if not items or idx >= len(items):
gr.Error("Invalid item index")
return items, "Error: Invalid item index", gr.update(visible=False)
user_id = session.get("user_id")
if not user_id:
gr.Error("User not logged in")
return items, "Error: User not logged in", gr.update(visible=False)
current_item = items[idx]
annotation_id = current_item["annotation_id"]
log.info(f"Saving validation for annotation_id: {annotation_id}, validator_id: {user_id}, approved: {approved}, reason: {rejection_reason}")
with get_db() as db:
# try:
existing_validation = db.query(Validation).filter_by(
annotation_id=annotation_id,
validator_id=user_id
).first()
if existing_validation:
log.info(f"Updating existing validation for annotation_id: {annotation_id}")
existing_validation.validated = approved
existing_validation.description = rejection_reason if not approved else None
existing_validation.validated_at = datetime.datetime.utcnow()
else:
log.info(f"Creating new validation for annotation_id: {annotation_id}")
new_validation = Validation(
annotation_id=annotation_id,
validator_id=user_id,
validated=approved,
description=rejection_reason if not approved else None,
validated_at=datetime.datetime.utcnow(),
)
db.add(new_validation)
db.commit()
log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
# Show rejection reason input only if rejected, otherwise hide and clear
rejection_input_update = gr.update(visible=not approved, value="" if approved else rejection_reason)
return items, items[idx]["validation_status"], rejection_input_update
# except Exception as e:
# db.rollback()
# log.error(f"Error saving validation: {e}")
# sentry_sdk.capture_exception(e)
# gr.Error(f"Failed to save validation: {e}")
# return items, current_item["validation_status"], gr.update(visible=False) # Return original status and hide input on error
def handle_rejection_fn(items, idx, session, rejection_reason, rejection_mode_active):
"""Handle rejection button click - two-step process"""
if not items or idx >= len(items):
return items, "Error: Invalid item", gr.update(visible=False), False, gr.update(value="❌ Reject")
current_item = items[idx]
current_status = current_item["validation_status"]
if not rejection_mode_active:
# First click - show rejection reason input and change button text
return (
items, # items unchanged
current_status, # Keep current validation status
gr.update(visible=True, value=""), # Show rejection reason input, clear any existing value
True, # Set rejection mode active
gr.update(value="⚠️ Confirm Reject") # Change button text
)
else:
# Second click - validate reason and save if provided
if not rejection_reason or rejection_reason.strip() == "":
gr.Warning("Rejection reason cannot be empty. Please provide a reason before confirming rejection.")
return (
items, # items unchanged
current_status, # Keep current validation status
gr.update(visible=True, value=rejection_reason), # Keep input visible
True, # Keep rejection mode active
gr.update(value="⚠️ Confirm Reject") # Keep button text
)
else:
# Save the rejection with reason
updated_items, validation_status, rejection_input_update = save_validation_fn(
items, idx, session, approved=False, rejection_reason=rejection_reason.strip()
)
return (
updated_items,
validation_status,
gr.update(visible=False, value=""), # Hide rejection input after successful save
False, # Reset rejection mode
gr.update(value="❌ Reject") # Reset button text
)
def jump_by_data_id_fn(items, target_data_id, current_idx, session):
"""Jump to a specific TTS ID by querying the database and loading a new batch around it"""
if not target_data_id:
return items, current_idx, ""
user_id = session.get("user_id")
username = session.get("username")
if not user_id or not username:
gr.Warning("User session not found")
return items, current_idx, ""
# Find target annotator
target_annotator = None
for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
if reviewer_name == username:
target_annotator = annotator_name
break
if not target_annotator:
gr.Warning("Target annotator not found for user")
return items, current_idx, ""
try:
target_id = int(target_data_id)
with get_db() as db:
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
if not target_annotator_obj:
gr.Warning("Target annotator not found in database")
return items, current_idx, ""
# Get the target annotator's assigned intervals
assigned_intervals = db.query(AnnotationInterval).filter(
AnnotationInterval.annotator_id == target_annotator_obj.id
).all()
if not assigned_intervals:
gr.Warning("No assigned intervals found")
return items, current_idx, ""
# Check if the target TTS ID exists within the assigned intervals
target_annotation = None
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
target_annotation = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id == target_id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index
).first()
if target_annotation:
break
if not target_annotation:
gr.Warning(f"Data ID {target_id} not found in assigned review range")
return items, current_idx, ""
# Load a batch around the target ID
BATCH_SIZE = 10
WINDOW_BEFORE = BATCH_SIZE // 2
window_start_id = max(1, target_id - WINDOW_BEFORE)
# Query for annotations in the window
new_query = db.query(
Annotation,
TTSData.filename,
TTSData.sentence
).join(
TTSData, Annotation.tts_data_id == TTSData.id
).join(
AnnotationInterval,
and_(
AnnotationInterval.annotator_id == target_annotator_obj.id,
TTSData.id >= AnnotationInterval.start_index,
TTSData.id <= AnnotationInterval.end_index
)
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= window_start_id
).order_by(TTSData.id).limit(BATCH_SIZE)
results = new_query.all()
# Process new items
new_items = []
target_idx = 0
for i, (annotation, filename, sentence) in enumerate(results):
# Check if annotation is deleted
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
new_items.append({
"annotation_id": annotation.id,
"tts_id": annotation.tts_data_id,
"filename": filename,
"sentence": sentence,
"annotated_sentence": annotated_sentence_display,
"is_deleted": is_deleted,
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
"validation_status": "Loading...",
"validation_loaded": False
})
# Find the target index within the new batch
if annotation.tts_data_id == target_id:
target_idx = i
if new_items:
# Count total for review info
total_count = 0
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
interval_count = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index
).count()
total_count += interval_count
review_info = f"πŸ” **Phase 2 Review Mode** - Jumped to ID {target_id}. Loaded {len(new_items)} of {total_count} total items."
log.info(f"Successfully jumped to TTS ID {target_id}, loaded {len(new_items)} items, target at index {target_idx}")
return new_items, target_idx, review_info
else:
gr.Warning(f"No items loaded around ID {target_id}")
return items, current_idx, ""
except ValueError:
gr.Warning(f"Invalid Data ID format: {target_data_id}")
return items, current_idx, ""
except Exception as e:
log.error(f"Error jumping to ID {target_data_id}: {e}")
gr.Warning(f"Error jumping to ID {target_data_id}")
return items, current_idx, ""
def load_more_items_fn(items, session, current_batch_size=10):
"""Load more items when user needs them (pagination support)"""
user_id = session.get("user_id")
username = session.get("username")
if not user_id or not username:
return items, 0 # Return existing items if no user session
# Find target annotator
target_annotator = None
for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
if reviewer_name == username:
target_annotator = annotator_name
break
if not target_annotator:
return items, 0
with get_db() as db:
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
if not target_annotator_obj:
return items, 0
# Get the target annotator's assigned intervals
assigned_intervals = db.query(AnnotationInterval).filter(
AnnotationInterval.annotator_id == target_annotator_obj.id
).all()
if not assigned_intervals:
return items, 0
# Count total annotations within assigned intervals for progress info
total_count = 0
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
interval_count = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index
).count()
total_count += interval_count
# Determine the next window based on the last loaded annotation id
last_loaded_id = items[-1]["annotation_id"] if items else 0
# FAST LOADING: Use id-based pagination within assigned intervals to continue from current position
query = db.query(
Annotation,
TTSData.filename,
TTSData.sentence
).join(
TTSData, Annotation.tts_data_id == TTSData.id
).join(
AnnotationInterval,
and_(
AnnotationInterval.annotator_id == target_annotator_obj.id,
TTSData.id >= AnnotationInterval.start_index,
TTSData.id <= AnnotationInterval.end_index
)
).filter(
Annotation.annotator_id == target_annotator_obj.id,
Annotation.id > last_loaded_id
).order_by(Annotation.id).limit(current_batch_size)
results = query.all()
# Process new items with minimal data - validation status loaded on-demand
new_items = []
for annotation, filename, sentence in results:
# Check if annotation is deleted (minimal processing)
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
new_items.append({
"annotation_id": annotation.id,
"tts_id": annotation.tts_data_id,
"filename": filename,
"sentence": sentence,
"annotated_sentence": annotated_sentence_display,
"is_deleted": is_deleted,
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
"validation_status": "Loading...", # Will be loaded on-demand
"validation_loaded": False # Track if validation status has been loaded
})
# Combine with existing items
all_items = items + new_items
log.info(f"Loaded {len(new_items)} more items after id {last_loaded_id}, total now: {len(all_items)}")
return all_items, total_count
def load_previous_items_fn(items, session, current_batch_size=5):
"""Load items before the current batch when user navigates backward"""
user_id = session.get("user_id")
username = session.get("username")
if not user_id or not username:
return items, 0, 0 # Return existing items if no user session
# Find target annotator
target_annotator = None
for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
if reviewer_name == username:
target_annotator = annotator_name
break
if not target_annotator:
return items, 0, 0
with get_db() as db:
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
if not target_annotator_obj:
return items, 0, 0
# Get the target annotator's assigned intervals
assigned_intervals = db.query(AnnotationInterval).filter(
AnnotationInterval.annotator_id == target_annotator_obj.id
).all()
if not assigned_intervals:
return items, 0, 0
# Count total annotations within assigned intervals for progress info
total_count = 0
for interval in assigned_intervals:
if interval.start_index is None or interval.end_index is None:
continue
interval_count = db.query(Annotation).join(
TTSData, Annotation.tts_data_id == TTSData.id
).filter(
Annotation.annotator_id == target_annotator_obj.id,
TTSData.id >= interval.start_index,
TTSData.id <= interval.end_index
).count()
total_count += interval_count
# Get the first loaded annotation id to load items before it
first_loaded_id = items[0]["annotation_id"] if items else float('inf')
# LOAD ITEMS BEFORE: Use id-based pagination to get previous items
query = db.query(
Annotation,
TTSData.filename,
TTSData.sentence
).join(
TTSData, Annotation.tts_data_id == TTSData.id
).join(
AnnotationInterval,
and_(
AnnotationInterval.annotator_id == target_annotator_obj.id,
TTSData.id >= AnnotationInterval.start_index,
TTSData.id <= AnnotationInterval.end_index
)
).filter(
Annotation.annotator_id == target_annotator_obj.id,
Annotation.id < first_loaded_id
).order_by(Annotation.id.desc()).limit(current_batch_size)
results = query.all()
results.reverse() # Restore ascending order
# Process new items with minimal data - validation status loaded on-demand
new_items = []
for annotation, filename, sentence in results:
# Check if annotation is deleted (minimal processing)
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
new_items.append({
"annotation_id": annotation.id,
"tts_id": annotation.tts_data_id,
"filename": filename,
"sentence": sentence,
"annotated_sentence": annotated_sentence_display,
"is_deleted": is_deleted,
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
"validation_status": "Loading...", # Will be loaded on-demand
"validation_loaded": False # Track if validation status has been loaded
})
# Combine with existing items (new items go to the front)
all_items = new_items + items
loaded_count = len(new_items)
log.info(f"Loaded {loaded_count} items before id {first_loaded_id}, total now: {len(all_items)}")
return all_items, total_count, loaded_count
# Output definitions
review_display_outputs = [
self.tts_id, self.filename, self.sentence, self.ann_sentence,
self.annotated_at,
self.current_validation_status,
self.header.welcome, # Placeholder for anonymized annotator name
self.audio,
self.rejection_reason_input, # Added rejection reason input to display outputs
self.rejection_mode_active, # Added rejection mode state
self.btn_reject # Added reject button to display outputs
]
# Trigger data loading when load_trigger changes (after successful login for a reviewer)
self.load_trigger.change(
fn=lambda: update_ui_interactive_state(False),
outputs=self.interactive_ui_elements
).then(
fn=load_review_items_fn,
inputs=[session_state],
outputs=[self.items_state, self.idx_state, self.review_info] + review_display_outputs
).then(
fn=get_review_progress_fn,
inputs=[session_state],
outputs=[self.header.progress_display]
).then(
fn=lambda: (None, gr.update(value=None)), # Clear audio state
outputs=[self.original_audio_state, self.audio]
).then(
fn=lambda: update_ui_interactive_state(True),
outputs=self.interactive_ui_elements
)
# Audio loading is now manual only via the Load Audio button
# Removed automatic filename.change callback to prevent slow loading during initialization
# Navigation buttons
for btn, direction in [(self.btn_prev, "prev"), (self.btn_next, "next")]:
btn.click(
fn=lambda: update_ui_interactive_state(False),
outputs=self.interactive_ui_elements
).then(
fn=lambda items, idx, session, dir=direction: navigate_and_load_fn(items, idx, dir, session),
inputs=[self.items_state, self.idx_state, session_state],
outputs=[self.items_state, self.idx_state, self.review_info]
).then(
fn=show_current_review_item_fn,
inputs=[self.items_state, self.idx_state, session_state],
outputs=review_display_outputs
).then(
# Auto-load audio with autoplay for smooth navigation
fn=download_voice_fn,
inputs=[self.filename],
outputs=[self.audio, self.original_audio_state, self.audio]
).then(
lambda: gr.update(value=None),
outputs=self.jump_data_id_input
).then(
fn=lambda: update_ui_interactive_state(True),
outputs=self.interactive_ui_elements
)
# Approve/Reject buttons
self.btn_approve.click(
fn=lambda items, idx, session: save_validation_fn(items, idx, session, approved=True, rejection_reason=""), # Pass empty rejection_reason
inputs=[self.items_state, self.idx_state, session_state],
outputs=[self.items_state, self.current_validation_status, self.rejection_reason_input]
).then(
fn=get_review_progress_fn, # Update progress after approval
inputs=[session_state],
outputs=[self.header.progress_display]
).then(
fn=lambda: False, # Reset rejection mode
outputs=[self.rejection_mode_active]
).then(
fn=lambda: gr.update(value="❌ Reject"), # Reset reject button
outputs=[self.btn_reject]
).then(
fn=lambda items, idx, session: navigate_and_load_fn(items, idx, "next", session),
inputs=[self.items_state, self.idx_state, session_state],
outputs=[self.items_state, self.idx_state, self.review_info]
).then(
fn=show_current_review_item_fn,
inputs=[self.items_state, self.idx_state, session_state],
outputs=review_display_outputs
).then(
# Auto-load audio with autoplay after moving to next item
fn=download_voice_fn,
inputs=[self.filename],
outputs=[self.audio, self.original_audio_state, self.audio]
)
self.btn_reject.click(
fn=handle_rejection_fn,
inputs=[self.items_state, self.idx_state, session_state, self.rejection_reason_input, self.rejection_mode_active],
outputs=[self.items_state, self.current_validation_status, self.rejection_reason_input, self.rejection_mode_active, self.btn_reject]
).then(
fn=lambda items, idx, session, rejection_mode: get_review_progress_fn(session) if not rejection_mode else "", # Update progress only after successful rejection
inputs=[self.items_state, self.idx_state, session_state, self.rejection_mode_active],
outputs=[self.header.progress_display]
).then(
fn=lambda items, idx, session, rejection_mode: navigate_and_load_fn(items, idx, "next", session) if not rejection_mode else (items, idx, ""),
inputs=[self.items_state, self.idx_state, session_state, self.rejection_mode_active],
outputs=[self.items_state, self.idx_state, self.review_info]
).then(
fn=lambda items, idx, session, rejection_mode: show_current_review_item_fn(items, idx, session) if not rejection_mode else (
str(items[idx]["tts_id"]) if items and idx < len(items) else "",
items[idx]["filename"] if items and idx < len(items) else "",
items[idx]["sentence"] if items and idx < len(items) else "",
items[idx]["annotated_sentence"] if items and idx < len(items) else "",
items[idx]["annotated_at"] if items and idx < len(items) else "",
items[idx]["validation_status"] if items and idx < len(items) else "",
"", # annotator placeholder
gr.update(value=None, autoplay=False), # audio
gr.update(), # rejection_reason_input - don't change
rejection_mode, # keep rejection mode as is
gr.update() # btn_reject - don't change
),
inputs=[self.items_state, self.idx_state, session_state, self.rejection_mode_active],
outputs=review_display_outputs
).then(
# Auto-load audio with autoplay only if we moved to next item (not in rejection mode)
fn=lambda filename, rejection_mode: download_voice_fn(filename) if not rejection_mode else (None, None, gr.update(value=None, autoplay=False)),
inputs=[self.filename, self.rejection_mode_active],
outputs=[self.audio, self.original_audio_state, self.audio]
)
# Skip button (just navigate to next)
self.btn_skip.click(
fn=lambda items, idx, session: navigate_and_load_fn(items, idx, "next", session),
inputs=[self.items_state, self.idx_state, session_state],
outputs=[self.items_state, self.idx_state, self.review_info]
).then(
fn=show_current_review_item_fn,
inputs=[self.items_state, self.idx_state, session_state],
outputs=review_display_outputs
).then(
# Auto-load audio with autoplay after skipping
fn=download_voice_fn,
inputs=[self.filename],
outputs=[self.audio, self.original_audio_state, self.audio]
)
# Jump button
self.btn_jump.click(
fn=jump_by_data_id_fn,
inputs=[self.items_state, self.jump_data_id_input, self.idx_state, session_state],
outputs=[self.items_state, self.idx_state, self.review_info]
).then(
fn=show_current_review_item_fn,
inputs=[self.items_state, self.idx_state, session_state],
outputs=review_display_outputs
).then(
# Auto-load audio with autoplay after jumping
fn=download_voice_fn,
inputs=[self.filename],
outputs=[self.audio, self.original_audio_state, self.audio]
).then(
lambda: gr.update(value=None),
outputs=self.jump_data_id_input
)
# Load audio button
self.btn_load_voice.click(
fn=lambda: update_ui_interactive_state(False),
outputs=self.interactive_ui_elements
).then(
fn=download_voice_fn,
inputs=[self.filename],
outputs=[self.audio, self.original_audio_state, self.audio]
).then(
fn=lambda: update_ui_interactive_state(True),
outputs=self.interactive_ui_elements
)
return self.container