File size: 3,647 Bytes
1000353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import argparse
import sys
import os
from sqlalchemy import func, and_

# Add project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from utils.database import get_db
from data.models import Annotator, Annotation, AnnotationInterval, TTSData
from utils.logger import Logger

log = Logger()

def generate_annotator_progress_report():
    """
    Generates and prints a progress report for each annotator.
    Progress is defined as the percentage of assigned TTSData items
    that have a non-empty annotation.
    """
    with get_db() as db:
        try:
            annotators = db.query(Annotator).filter(Annotator.is_active == True).all()

            if not annotators:
                log.info("No active annotators found.")
                return

            log.info("--- Annotator Progress Report ---")

            for annotator in annotators:
                # Calculate total assigned items for the annotator
                total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1))\
                                         .filter(AnnotationInterval.annotator_id == annotator.id)
                total_assigned_result = total_assigned_query.scalar()
                total_assigned = total_assigned_result if total_assigned_result is not None else 0

                # Calculate completed items by this annotator within their assigned intervals
                # An item is considered completed if annotated_sentence is not None and not an empty string.
                completed_count_query = db.query(func.count(Annotation.id))\
                                          .join(TTSData, Annotation.tts_data_id == TTSData.id)\
                                          .join(AnnotationInterval, 
                                                and_(AnnotationInterval.annotator_id == annotator.id,
                                                     TTSData.id >= AnnotationInterval.start_index,
                                                     TTSData.id <= AnnotationInterval.end_index))\
                                          .filter(Annotation.annotator_id == annotator.id,
                                                  Annotation.annotated_sentence != None,
                                                  Annotation.annotated_sentence != "")
                
                completed_count_result = completed_count_query.scalar()
                completed_count = completed_count_result if completed_count_result is not None else 0
                
                percentage_completed = 0
                if total_assigned > 0:
                    percentage_completed = (completed_count / total_assigned) * 100
                
                log.info(f"Annotator: {annotator.name} (ID: {annotator.id})")
                log.info(f"  Total Assigned Items: {total_assigned}")
                log.info(f"  Completed Items: {completed_count}")
                log.info(f"  Progress: {percentage_completed:.2f}%")
                log.info("-" * 30)

        except Exception as e:
            # For errors, we might still want the full log details
            log.error(f"Failed to generate annotator progress report: {e}")

if __name__ == "__main__":
    # No arguments needed for this script, it reports on all active annotators
    # parser = argparse.ArgumentParser(description="Generate a progress report for all active annotators.")
    # args = parser.parse_args() # Not needed for now
    
    generate_annotator_progress_report()