Spaces:
Running
Running
import gradio as gr | |
from lettucedetect.models.inference import HallucinationDetector | |
import os | |
# Initialize the LettuceDetect model | |
detector = HallucinationDetector( | |
method="transformer", | |
model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1" | |
) | |
# Function to evaluate hallucination with LettuceDetect | |
def evaluate_hallucination(context, question, answer): | |
try: | |
# Get span-level predictions from LettuceDetect | |
predictions = detector.predict( | |
context=[context], | |
question=question, | |
answer=answer, | |
output_format="spans" | |
) | |
# Process predictions for HighlightedText | |
if not predictions: | |
return "π’", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A" | |
highlighted_segments = [] | |
confidence_scores = [] | |
last_end = 0 | |
total_confidence = 0.0 | |
for pred in predictions: | |
start, end = pred['start'], pred['end'] | |
confidence = pred['confidence'] | |
text = pred['text'] | |
# Add non-hallucinated text before this span | |
if last_end < start: | |
highlighted_segments.append((answer[last_end:start], None)) | |
# Add hallucinated span with confidence as label | |
label_with_confidence = f"hallucination (conf: {confidence:.4f})" | |
highlighted_segments.append((text, label_with_confidence)) | |
confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}") | |
total_confidence += confidence | |
last_end = end | |
# Add any remaining text after the last hallucination | |
if last_end < len(answer): | |
highlighted_segments.append((answer[last_end:], None)) | |
# Calculate average confidence | |
avg_confidence = total_confidence / len(predictions) if predictions else 0.0 | |
# Determine overall status | |
status = "π΄" if predictions else "π’" | |
explanation = "Hallucinations detected" if predictions else "No hallucinations detected" | |
return ( | |
status, | |
explanation, | |
highlighted_segments, | |
"\n".join(confidence_scores) if confidence_scores else "N/A", | |
f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A" | |
) | |
except Exception as e: | |
return "βͺ", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A" | |
# Gradio Blocks interface | |
with gr.Blocks( | |
title="π₯¬ LettuceDetect Hallucination Tester π’π΄", | |
theme="ParityError/Anime" | |
) as demo: | |
gr.Markdown( | |
""" | |
# π₯¬ LettuceDetect Hallucination Tester π’π΄ | |
Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support! | |
### How to Use: | |
1. Enter a **Context** (source document or info). | |
2. Enter a **Question** related to the context. | |
3. Enter an **Answer** to evaluate. | |
4. Press **Submit** to see if the answer hallucinates! | |
- π’ = No hallucinations | |
- π΄ = Hallucinations detected | |
- Highlighted text shows hallucinated spans in **red** with confidence scores. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Inputs | |
context_input = gr.Textbox( | |
label="Context", | |
lines=5, | |
placeholder="Enter the context (e.g., a document or source text)..." | |
) | |
question_input = gr.Textbox( | |
label="Question", | |
placeholder="Enter the question..." | |
) | |
answer_input = gr.Textbox( | |
label="Answer", | |
lines=3, | |
placeholder="Enter the answer to evaluate..." | |
) | |
submit_btn = gr.Button("Submit") | |
with gr.Column(scale=3): | |
# Outputs | |
status_output = gr.Label(label="Status") | |
explanation_output = gr.Textbox(label="Explanation", interactive=False) | |
highlighted_answer_output = gr.HighlightedText( | |
label="Answer with Hallucinations Highlighted", | |
show_legend=True, | |
color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category | |
combine_adjacent=True | |
) | |
spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False) | |
avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False) | |
# Connect inputs to outputs via the evaluation function | |
submit_btn.click( | |
fn=evaluate_hallucination, | |
inputs=[context_input, question_input, answer_input], | |
outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output] | |
) | |
# Example | |
gr.Markdown("### Example") | |
with gr.Row(): | |
gr.Examples( | |
examples=[ | |
[ | |
"France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", | |
"What is the capital of France? What is the population of France?", | |
"The capital of France is Paris. The population of France is 69 million." | |
] | |
], | |
inputs=[context_input, question_input, answer_input] | |
) | |
# Launch the demo | |
demo.launch() |