File size: 5,630 Bytes
297437a
aa57e68
 
e760939
aa57e68
 
 
 
 
e760939
aa57e68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e760939
aa57e68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e760939
aa57e68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e760939
aa57e68
 
 
 
 
 
e760939
aa57e68
 
 
 
 
 
 
 
 
 
 
 
 
e760939
aa57e68
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr
from lettucedetect.models.inference import HallucinationDetector
import os

# Initialize the LettuceDetect model
detector = HallucinationDetector(
    method="transformer", 
    model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1"
)

# Function to evaluate hallucination with LettuceDetect
def evaluate_hallucination(context, question, answer):
    try:
        # Get span-level predictions from LettuceDetect
        predictions = detector.predict(
            context=[context], 
            question=question, 
            answer=answer, 
            output_format="spans"
        )
        
        # Process predictions for HighlightedText
        if not predictions:
            return "🟒", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A"
        
        highlighted_segments = []
        confidence_scores = []
        last_end = 0
        total_confidence = 0.0
        
        for pred in predictions:
            start, end = pred['start'], pred['end']
            confidence = pred['confidence']
            text = pred['text']
            
            # Add non-hallucinated text before this span
            if last_end < start:
                highlighted_segments.append((answer[last_end:start], None))
            
            # Add hallucinated span with confidence as label
            label_with_confidence = f"hallucination (conf: {confidence:.4f})"
            highlighted_segments.append((text, label_with_confidence))
            confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}")
            total_confidence += confidence
            last_end = end
        
        # Add any remaining text after the last hallucination
        if last_end < len(answer):
            highlighted_segments.append((answer[last_end:], None))
        
        # Calculate average confidence
        avg_confidence = total_confidence / len(predictions) if predictions else 0.0
        
        # Determine overall status
        status = "πŸ”΄" if predictions else "🟒"
        explanation = "Hallucinations detected" if predictions else "No hallucinations detected"
        
        return (
            status, 
            explanation, 
            highlighted_segments, 
            "\n".join(confidence_scores) if confidence_scores else "N/A",
            f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A"
        )
    
    except Exception as e:
        return "βšͺ", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A"

# Gradio Blocks interface
with gr.Blocks(
    title="πŸ₯¬ LettuceDetect Hallucination Tester πŸŸ’πŸ”΄",
    theme="ParityError/Anime"
) as demo:
    gr.Markdown(
        """
        # πŸ₯¬ LettuceDetect Hallucination Tester πŸŸ’πŸ”΄
        Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support!
        
        ### How to Use:
        1. Enter a **Context** (source document or info).
        2. Enter a **Question** related to the context.
        3. Enter an **Answer** to evaluate.
        4. Press **Submit** to see if the answer hallucinates!
        
        - 🟒 = No hallucinations
        - πŸ”΄ = Hallucinations detected
        - Highlighted text shows hallucinated spans in **red** with confidence scores.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=2):
            # Inputs
            context_input = gr.Textbox(
                label="Context", 
                lines=5, 
                placeholder="Enter the context (e.g., a document or source text)..."
            )
            question_input = gr.Textbox(
                label="Question", 
                placeholder="Enter the question..."
            )
            answer_input = gr.Textbox(
                label="Answer", 
                lines=3, 
                placeholder="Enter the answer to evaluate..."
            )
            submit_btn = gr.Button("Submit")
        
        with gr.Column(scale=3):
            # Outputs
            status_output = gr.Label(label="Status")
            explanation_output = gr.Textbox(label="Explanation", interactive=False)
            highlighted_answer_output = gr.HighlightedText(
                label="Answer with Hallucinations Highlighted",
                show_legend=True,
                color_map={"hallucination": "red"},  # Note: Only "hallucination" is used as base category
                combine_adjacent=True
            )
            spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False)
            avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False)
    
    # Connect inputs to outputs via the evaluation function
    submit_btn.click(
        fn=evaluate_hallucination,
        inputs=[context_input, question_input, answer_input],
        outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output]
    )
    
    # Example
    gr.Markdown("### Example")
    with gr.Row():
        gr.Examples(
            examples=[
                [
                    "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
                    "What is the capital of France? What is the population of France?",
                    "The capital of France is Paris. The population of France is 69 million."
                ]
            ],
            inputs=[context_input, question_input, answer_input]
        )

# Launch the demo
demo.launch()