File size: 14,295 Bytes
1000353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#!/usr/bin/env python3
"""
Phase 2 Review Results Report Script

This script generates a comprehensive report of Phase 2 review results,
showing approval and rejection statistics for each reviewer and overall totals.
"""

import argparse
import sys
import os
from datetime import datetime
from collections import defaultdict
from sqlalchemy import func, and_

# Add project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from utils.database import get_db
from data.models import Annotator, Annotation, Validation, TTSData
from utils.logger import Logger
from config import conf

log = Logger()

def generate_review_results_report(detailed=False, export_csv=False):
    """
    Generates and prints a review results report for Phase 2 validation.
    
    Args:
        detailed (bool): If True, shows detailed breakdown by annotator being reviewed.
        export_csv (bool): If True, exports results to CSV file.
    """
    with get_db() as db:
        try:
            # Get all reviewers (users who appear in REVIEW_MAPPING values)
            reviewers = list(conf.REVIEW_MAPPING.values())
            
            if not reviewers:
                print("No reviewers found in REVIEW_MAPPING configuration.")
                return

            print("=" * 80)
            print("                    PHASE 2 REVIEW RESULTS REPORT")
            print("=" * 80)
            print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
            print()

            overall_approved = 0
            overall_rejected = 0
            overall_total = 0
            csv_data = []

            for reviewer_name in reviewers:
                # Get reviewer object
                reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
                if not reviewer:
                    print(f"⚠️  Reviewer '{reviewer_name}' not found in database")
                    continue

                # Find which annotator this reviewer is assigned to review
                assigned_annotator = None
                for annotator_name, assigned_reviewer in conf.REVIEW_MAPPING.items():
                    if assigned_reviewer == reviewer_name:
                        assigned_annotator = annotator_name
                        break

                if not assigned_annotator:
                    print(f"⚠️  No annotator assignment found for reviewer '{reviewer_name}'")
                    continue

                # Get annotator being reviewed
                annotator = db.query(Annotator).filter_by(name=assigned_annotator).first()
                if not annotator:
                    print(f"⚠️  Assigned annotator '{assigned_annotator}' not found in database")
                    continue

                print(f"\nπŸ“‹ REVIEWER: {reviewer_name}")
                print(f"   Reviewing work by: {assigned_annotator}")
                print("-" * 60)

                # Get all validations by this reviewer for the assigned annotator's work
                validations_query = db.query(Validation)\
                    .join(Annotation, Validation.annotation_id == Annotation.id)\
                    .filter(
                        Validation.validator_id == reviewer.id,
                        Annotation.annotator_id == annotator.id
                    )

                total_validations = validations_query.count()
                approved_validations = validations_query.filter(Validation.validated == True).count()
                rejected_validations = validations_query.filter(Validation.validated == False).count()

                # Calculate percentages
                approved_percentage = (approved_validations / total_validations * 100) if total_validations > 0 else 0
                rejected_percentage = (rejected_validations / total_validations * 100) if total_validations > 0 else 0

                print(f"   πŸ“Š Total Reviews: {total_validations:,}")
                print(f"   βœ… Approved: {approved_validations:,} ({approved_percentage:.1f}%)")
                print(f"   ❌ Rejected: {rejected_validations:,} ({rejected_percentage:.1f}%)")

                # Update overall totals
                overall_total += total_validations
                overall_approved += approved_validations
                overall_rejected += rejected_validations

                # Collect CSV data
                if export_csv:
                    csv_data.append({
                        'reviewer': reviewer_name,
                        'reviewed_annotator': assigned_annotator,
                        'total_reviews': total_validations,
                        'approved': approved_validations,
                        'rejected': rejected_validations,
                        'approval_rate': approved_percentage
                    })

                # Show detailed rejection reasons if requested
                if detailed and rejected_validations > 0:
                    print("\n   πŸ“ Rejection Reasons:")
                    rejection_reasons = db.query(Validation.description)\
                        .join(Annotation, Validation.annotation_id == Annotation.id)\
                        .filter(
                            Validation.validator_id == reviewer.id,
                            Annotation.annotator_id == annotator.id,
                            Validation.validated == False,
                            Validation.description.isnot(None),
                            Validation.description != ""
                        ).all()
                    
                    reason_counts = {}
                    for (reason,) in rejection_reasons:
                        if reason:
                            reason_counts[reason] = reason_counts.get(reason, 0) + 1
                    
                    for reason, count in sorted(reason_counts.items(), key=lambda x: x[1], reverse=True):
                        print(f"      β€’ {reason}: {count} times")
                    
                    if not reason_counts:
                        print("      (No reasons provided)")

                # Show annotation coverage (how much of assigned work has been reviewed)
                total_annotations_query = db.query(Annotation)\
                    .filter(
                        Annotation.annotator_id == annotator.id,
                        Annotation.annotated_sentence.isnot(None),
                        Annotation.annotated_sentence != ""
                    )
                total_annotations = total_annotations_query.count()
                
                coverage_percentage = (total_validations / total_annotations * 100) if total_annotations > 0 else 0
                print(f"   πŸ“ˆ Review Coverage: {total_validations:,}/{total_annotations:,} ({coverage_percentage:.1f}%)")

            # Overall summary
            print("\n" + "=" * 80)
            print("                        OVERALL SUMMARY")
            print("=" * 80)
            
            overall_approved_percentage = (overall_approved / overall_total * 100) if overall_total > 0 else 0
            overall_rejected_percentage = (overall_rejected / overall_total * 100) if overall_total > 0 else 0

            print(f"πŸ“Š Total Reviews Across All Reviewers: {overall_total:,}")
            print(f"βœ… Total Approved: {overall_approved:,} ({overall_approved_percentage:.1f}%)")
            print(f"❌ Total Rejected: {overall_rejected:,} ({overall_rejected_percentage:.1f}%)")

            # Quality score (approval rate)
            if overall_total > 0:
                print(f"🎯 Overall Quality Score: {overall_approved_percentage:.1f}% approval rate")
                
                # Quality assessment
                if overall_approved_percentage >= 95:
                    quality_rating = "🌟 Excellent"
                elif overall_approved_percentage >= 85:
                    quality_rating = "πŸ‘ Good"
                elif overall_approved_percentage >= 75:
                    quality_rating = "⚠️  Fair"
                else:
                    quality_rating = "πŸ”΄ Needs Improvement"
                
                print(f"πŸ“Š Quality Rating: {quality_rating}")
            
            print("=" * 80)

            # Export to CSV if requested
            if export_csv and csv_data:
                try:
                    import pandas as pd
                    df = pd.DataFrame(csv_data)
                    filename = f"review_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
                    df.to_csv(filename, index=False)
                    print(f"\nπŸ“„ Results exported to: {filename}")
                except ImportError:
                    print("\n⚠️  CSV export requires pandas. Install with: pip install pandas")

        except Exception as e:
            log.error(f"Failed to generate review results report: {e}")
            print(f"❌ Error generating report: {e}")

def generate_annotator_breakdown_report():
    """
    Generates a report showing how each annotator's work was reviewed.
    """
    with get_db() as db:
        try:
            print("\n" + "=" * 80)
            print("                  ANNOTATOR PERFORMANCE BREAKDOWN")
            print("=" * 80)

            # Get all annotators who have been reviewed
            for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
                annotator = db.query(Annotator).filter_by(name=annotator_name).first()
                reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
                
                if not annotator or not reviewer:
                    continue

                print(f"\nπŸ‘€ ANNOTATOR: {annotator_name}")
                print(f"   Reviewed by: {reviewer_name}")
                print("-" * 60)

                # Get validation stats for this annotator's work
                validations = db.query(Validation)\
                    .join(Annotation, Validation.annotation_id == Annotation.id)\
                    .filter(
                        Annotation.annotator_id == annotator.id,
                        Validation.validator_id == reviewer.id
                    ).all()

                if not validations:
                    print("   πŸ“Š No reviews completed yet")
                    continue

                total = len(validations)
                approved = sum(1 for v in validations if v.validated)
                rejected = total - approved

                approved_percentage = (approved / total * 100) if total > 0 else 0
                rejected_percentage = (rejected / total * 100) if total > 0 else 0

                print(f"   πŸ“Š Total Reviewed: {total:,}")
                print(f"   βœ… Approved: {approved:,} ({approved_percentage:.1f}%)")
                print(f"   ❌ Rejected: {rejected:,} ({rejected_percentage:.1f}%)")

                # Performance rating
                if approved_percentage >= 95:
                    rating = "🌟 Excellent"
                elif approved_percentage >= 85:
                    rating = "πŸ‘ Good"
                elif approved_percentage >= 75:
                    rating = "⚠️  Fair"
                elif approved_percentage >= 60:
                    rating = "πŸ”΄ Needs Improvement"
                else:
                    rating = "πŸ’₯ Poor"
                
                print(f"   πŸ“ˆ Performance: {rating}")
                
                # Show most common rejection reasons if any
                if rejected > 0:
                    rejected_validations = [v for v in validations if not v.validated and v.description]
                    if rejected_validations:
                        print("   πŸ“ Top Rejection Reasons:")
                        reason_counts = defaultdict(int)
                        for v in rejected_validations:
                            if v.description:
                                reason_counts[v.description.strip()] += 1
                        
                        for reason, count in sorted(reason_counts.items(), key=lambda x: x[1], reverse=True)[:3]:
                            print(f"      β€’ {reason}: {count} times")

        except Exception as e:
            log.error(f"Failed to generate annotator breakdown report: {e}")
            print(f"❌ Error generating annotator breakdown: {e}")


def generate_quick_summary():
    """Generate a quick one-line summary of review results."""
    with get_db() as db:
        try:
            total_reviews = db.query(Validation).count()
            if total_reviews == 0:
                print("No review data found.")
                return
                
            approved_reviews = db.query(Validation).filter(Validation.validated == True).count()
            rejected_reviews = total_reviews - approved_reviews
            
            approval_rate = (approved_reviews / total_reviews) * 100
            
            print(f"πŸ“Š QUICK SUMMARY: {total_reviews:,} total reviews | {approved_reviews:,} approved ({approval_rate:.1f}%) | {rejected_reviews:,} rejected ({100-approval_rate:.1f}%)")
            
        except Exception as e:
            print(f"❌ Error generating summary: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate Phase 2 review results report.")
    parser.add_argument(
        "--detailed", 
        action="store_true", 
        help="Show detailed breakdown including rejection reasons"
    )
    parser.add_argument(
        "--annotator-breakdown", 
        action="store_true", 
        help="Show performance breakdown by annotator"
    )
    parser.add_argument(
        "--csv", 
        action="store_true", 
        help="Export results to CSV file"
    )
    parser.add_argument(
        "--quick", 
        action="store_true", 
        help="Show only a quick summary line"
    )
    
    args = parser.parse_args()
    
    if args.quick:
        generate_quick_summary()
    else:
        generate_review_results_report(detailed=args.detailed, export_csv=args.csv)
        
        if args.annotator_breakdown:
            generate_annotator_breakdown_report()