Spaces:
Running
Running
File size: 5,630 Bytes
297437a aa57e68 e760939 aa57e68 e760939 aa57e68 e760939 aa57e68 e760939 aa57e68 e760939 aa57e68 e760939 aa57e68 e760939 aa57e68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
from lettucedetect.models.inference import HallucinationDetector
import os
# Initialize the LettuceDetect model
detector = HallucinationDetector(
method="transformer",
model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1"
)
# Function to evaluate hallucination with LettuceDetect
def evaluate_hallucination(context, question, answer):
try:
# Get span-level predictions from LettuceDetect
predictions = detector.predict(
context=[context],
question=question,
answer=answer,
output_format="spans"
)
# Process predictions for HighlightedText
if not predictions:
return "π’", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A"
highlighted_segments = []
confidence_scores = []
last_end = 0
total_confidence = 0.0
for pred in predictions:
start, end = pred['start'], pred['end']
confidence = pred['confidence']
text = pred['text']
# Add non-hallucinated text before this span
if last_end < start:
highlighted_segments.append((answer[last_end:start], None))
# Add hallucinated span with confidence as label
label_with_confidence = f"hallucination (conf: {confidence:.4f})"
highlighted_segments.append((text, label_with_confidence))
confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}")
total_confidence += confidence
last_end = end
# Add any remaining text after the last hallucination
if last_end < len(answer):
highlighted_segments.append((answer[last_end:], None))
# Calculate average confidence
avg_confidence = total_confidence / len(predictions) if predictions else 0.0
# Determine overall status
status = "π΄" if predictions else "π’"
explanation = "Hallucinations detected" if predictions else "No hallucinations detected"
return (
status,
explanation,
highlighted_segments,
"\n".join(confidence_scores) if confidence_scores else "N/A",
f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A"
)
except Exception as e:
return "βͺ", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A"
# Gradio Blocks interface
with gr.Blocks(
title="π₯¬ LettuceDetect Hallucination Tester π’π΄",
theme="ParityError/Anime"
) as demo:
gr.Markdown(
"""
# π₯¬ LettuceDetect Hallucination Tester π’π΄
Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support!
### How to Use:
1. Enter a **Context** (source document or info).
2. Enter a **Question** related to the context.
3. Enter an **Answer** to evaluate.
4. Press **Submit** to see if the answer hallucinates!
- π’ = No hallucinations
- π΄ = Hallucinations detected
- Highlighted text shows hallucinated spans in **red** with confidence scores.
"""
)
with gr.Row():
with gr.Column(scale=2):
# Inputs
context_input = gr.Textbox(
label="Context",
lines=5,
placeholder="Enter the context (e.g., a document or source text)..."
)
question_input = gr.Textbox(
label="Question",
placeholder="Enter the question..."
)
answer_input = gr.Textbox(
label="Answer",
lines=3,
placeholder="Enter the answer to evaluate..."
)
submit_btn = gr.Button("Submit")
with gr.Column(scale=3):
# Outputs
status_output = gr.Label(label="Status")
explanation_output = gr.Textbox(label="Explanation", interactive=False)
highlighted_answer_output = gr.HighlightedText(
label="Answer with Hallucinations Highlighted",
show_legend=True,
color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category
combine_adjacent=True
)
spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False)
avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False)
# Connect inputs to outputs via the evaluation function
submit_btn.click(
fn=evaluate_hallucination,
inputs=[context_input, question_input, answer_input],
outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output]
)
# Example
gr.Markdown("### Example")
with gr.Row():
gr.Examples(
examples=[
[
"France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
"What is the capital of France? What is the population of France?",
"The capital of France is Paris. The population of France is 69 million."
]
],
inputs=[context_input, question_input, answer_input]
)
# Launch the demo
demo.launch() |