Spaces:
Running
Running
""" | |
Main Streamlit application for the Fake News Detector. | |
This module implements the user interface for claim verification, | |
rendering the results and handling user interactions. It also | |
manages the application lifecycle including initialization and cleanup. | |
""" | |
import streamlit as st | |
import time | |
import json | |
import os | |
import logging | |
import atexit | |
import sys | |
from pathlib import Path | |
# Configure logging first, before other imports | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[logging.StreamHandler()] | |
) | |
logger = logging.getLogger("misinformation_detector") | |
# Check for critical environment variables | |
if not os.environ.get("OPENAI_API_KEY"): | |
logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.") | |
# Import our modules | |
from utils.models import initialize_models | |
from utils.performance import PerformanceTracker | |
# Import agent functionality | |
import agent | |
# Initialize performance tracker | |
performance_tracker = PerformanceTracker() | |
# Ensure data directory exists | |
data_dir = Path("data") | |
if not data_dir.exists(): | |
logger.info("Creating data directory") | |
data_dir.mkdir(exist_ok=True) | |
# Set page configuration | |
st.set_page_config( | |
page_title="AskVeracity", | |
page_icon="π", | |
layout="wide", | |
) | |
# Hide the "Press β+Enter to apply" text with CSS | |
st.markdown(""" | |
<style> | |
/* Hide the shortcut text that appears at the bottom of text areas */ | |
.stTextArea div:has(textarea) + div { | |
visibility: hidden !important; | |
height: 0px !important; | |
position: absolute !important; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def get_agent(): | |
""" | |
Initialize and cache the agent for reuse across requests. | |
This function creates and caches the fact-checking agent to avoid | |
recreating it for every request. It's decorated with st.cache_resource | |
to ensure the agent is only initialized once per session. | |
Returns: | |
object: Initialized LangGraph agent for fact checking | |
""" | |
logger.info("Initializing models and agent (cached)") | |
initialize_models() | |
return agent.setup_agent() | |
def cleanup_resources(): | |
""" | |
Clean up resources when app is closed. | |
This function is registered with atexit to ensure resources | |
are properly released when the application terminates. | |
""" | |
try: | |
# Clear any cached data | |
st.cache_data.clear() | |
# Reset performance tracker | |
performance_tracker.reset() | |
# Log cleanup | |
logger.info("Resources cleaned up successfully") | |
except Exception as e: | |
logger.error(f"Error during cleanup: {e}") | |
# Register cleanup handler | |
atexit.register(cleanup_resources) | |
# App title and description | |
st.title("π AskVeracity") | |
st.markdown(""" | |
This is a simple AI-powered agentic tool - a fact-checking system that analyzes claims to determine | |
their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia, | |
news outlets, and academic repositories. The application aims to support broader efforts in misinformation detection. | |
""") | |
# Sidebar with app information | |
with st.sidebar: | |
st.header("About") | |
st.info( | |
"This system uses a combination of NLP techniques and LLMs to " | |
"extract claims, gather evidence, and classify the truthfulness of statements.\n\n" | |
"**Technical:** Built with Python, Streamlit, LangGraph, and OpenAI, leveraging spaCy for NLP and various APIs for retrieving evidence from diverse sources." | |
) | |
# Application information | |
st.markdown("### How It Works") | |
st.info( | |
"1. Enter any recent news or a factual claim\n" | |
"2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n" | |
"3. The system analyzes the evidence to determine truthfulness\n" | |
"4. Results show the verdict with supporting evidence" | |
) | |
# Our Mission | |
st.markdown("### Our Mission") | |
st.info( | |
"AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. " | |
"We believe in empowering people with factual information to make informed decisions." | |
) | |
# Limitations and Usage | |
st.markdown("### Limitations") | |
st.warning( | |
"Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. " | |
"Performance is typically best with widely-reported news and information published within the last 48 hours. " | |
"Additionally, the system evaluates claims based on current evidence - a claim that was true in the past " | |
"may be judged false if circumstances have changed, and vice versa." | |
) | |
# Best Practices | |
st.markdown("### Best Practices") | |
st.success( | |
"For optimal results:\n\n" | |
"β’ Keep claims short and precise\n\n" | |
"β’ Include key details in your claim\n\n" | |
"β’ Phrase claims as direct statements rather than questions\n\n" | |
"β’ Be specific about who said what" | |
) | |
# Example comparison | |
with st.expander("π Examples of Effective Claims"): | |
st.markdown(""" | |
**Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe." | |
**More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction." | |
""") | |
# Important Notes | |
st.markdown("### Important Notes") | |
st.info( | |
"β’ AskVeracity covers general topics and is not specialized in any single domain or location\n\n" | |
"β’ Results can vary based on available evidence and LLM behavior\n\n" | |
"β’ The system is designed to indicate uncertainty when evidence is insufficient\n\n" | |
"β’ AskVeracity is not a chatbot and does not maintain conversation history\n\n" | |
"β’ We recommend cross-verifying critical information with additional sources" | |
) | |
# Privacy Information | |
st.markdown("### Data Privacy") | |
st.info( | |
"We do not collect or store any data about the claims you submit. " | |
"Your interactions are processed by OpenAI's API. Please refer to " | |
"[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices." | |
) | |
# Feedback Section | |
st.markdown("### Feedback") | |
st.success( | |
"AskVeracity is evolving and we welcome your feedback to help us improve. " | |
"Please reach out to us with questions, suggestions, or concerns." | |
) | |
# Initialize session state variables | |
if 'processing' not in st.session_state: | |
st.session_state.processing = False | |
if 'claim_to_process' not in st.session_state: | |
st.session_state.claim_to_process = "" | |
if 'has_result' not in st.session_state: | |
st.session_state.has_result = False | |
if 'result' not in st.session_state: | |
st.session_state.result = None | |
if 'total_time' not in st.session_state: | |
st.session_state.total_time = 0 | |
if 'fresh_state' not in st.session_state: | |
st.session_state.fresh_state = True | |
# Initialize verify button disabled state | |
if 'verify_btn_disabled' not in st.session_state: | |
st.session_state.verify_btn_disabled = False | |
# Add a flag to clear the input field | |
if 'clear_form' not in st.session_state: | |
st.session_state.clear_form = False | |
# Main interface | |
st.markdown("### Enter a claim to verify") | |
# Input area | |
claim_input = st.text_area("", | |
height=100, | |
placeholder=( | |
"Examples: The Eiffel Tower is located in Rome, Italy. " | |
"Meta recently released its Llama 4 large language model. " | |
"Justin Trudeau is not the Canadian Prime Minister anymore. " | |
"China retaliated with 125% tariffs against U.S. imports. " | |
"A recent piece of news." | |
), | |
key="claim_input_area", | |
value="" if st.session_state.clear_form else None, # Clear if flag is set | |
label_visibility="collapsed", | |
max_chars=None) | |
# Reset the clear_form flag after using it | |
if st.session_state.clear_form: | |
st.session_state.clear_form = False | |
# Information about result variability | |
st.caption(""" | |
π‘ **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to: | |
- Incorporate the most recent evidence available | |
- Benefit from the AI's ability to consider multiple perspectives | |
- Adapt to evolving information landscapes | |
""") | |
st.warning("β±οΈ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.") | |
# Create a clean interface based on state | |
if st.session_state.fresh_state: | |
# Only show the verify button in fresh state | |
verify_button = st.button( | |
"Verify Claim", | |
type="primary", | |
key="verify_btn" | |
) | |
# When button is clicked and not already processing | |
if verify_button and not st.session_state.processing: | |
if not claim_input: | |
st.error("Please enter a claim to verify.") | |
else: | |
# Store the claim and set processing state | |
st.session_state.claim_to_process = claim_input | |
st.session_state.processing = True | |
st.session_state.fresh_state = False | |
# Force a rerun to refresh UI | |
st.rerun() | |
else: | |
# This is either during processing or showing results | |
# Create a container for processing and results | |
analysis_container = st.container() | |
with analysis_container: | |
# If we're processing, show the processing UI | |
if st.session_state.processing: | |
st.subheader("π Processing...") | |
status = st.empty() | |
status.text("Verifying claim... (this may take a while)") | |
progress_bar = st.progress(0) | |
# Initialize models and agent if needed | |
if not hasattr(st.session_state, 'agent_initialized'): | |
with st.spinner("Initializing system..."): | |
st.session_state.agent = get_agent() | |
st.session_state.agent_initialized = True | |
try: | |
# Use the stored claim for processing | |
claim_to_process = st.session_state.claim_to_process | |
# Process the claim with the agent | |
start_time = time.time() | |
result = agent.process_claim(claim_to_process, st.session_state.agent) | |
total_time = time.time() - start_time | |
# Update progress as claim processing completes | |
progress_bar.progress(100) | |
# Check for None result | |
if result is None: | |
st.error("Failed to process the claim. Please try again.") | |
st.session_state.processing = False | |
st.session_state.fresh_state = True | |
else: | |
# If result exists but key values are missing, provide default values | |
if "classification" not in result or result["classification"] is None: | |
result["classification"] = "Uncertain" | |
if "confidence" not in result or result["confidence"] is None: | |
result["confidence"] = 0.6 # Default to 0.6 instead of 0.0 | |
if "explanation" not in result or result["explanation"] is None: | |
result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim." | |
# Update result with timing information | |
if "processing_times" not in result: | |
result["processing_times"] = {"total": total_time} | |
# Store the result and timing information | |
st.session_state.result = result | |
st.session_state.total_time = total_time | |
st.session_state.has_result = True | |
st.session_state.processing = False | |
# Clear processing indicators before showing results | |
status.empty() | |
progress_bar.empty() | |
# Force rerun to display results | |
st.rerun() | |
except Exception as e: | |
# Handle any exceptions and reset processing state | |
logger.error(f"Error during claim processing: {str(e)}") | |
st.error(f"An error occurred: {str(e)}") | |
st.session_state.processing = False | |
st.session_state.fresh_state = True | |
# Force rerun to re-enable button | |
st.rerun() | |
# Display results if available | |
elif st.session_state.has_result and st.session_state.result: | |
result = st.session_state.result | |
total_time = st.session_state.total_time | |
claim_to_process = st.session_state.claim_to_process | |
st.subheader("π Verification Results") | |
result_col1, result_col2 = st.columns([2, 1]) | |
with result_col1: | |
# Display both original and processed claim if they differ | |
if "claim" in result and result["claim"] and result["claim"] != claim_to_process: | |
st.markdown(f"**Original Claim:** {claim_to_process}") | |
st.markdown(f"**Processed Claim:** {result['claim']}") | |
else: | |
st.markdown(f"**Claim:** {claim_to_process}") | |
# Make verdict colorful based on classification | |
truth_label = result.get('classification', 'Uncertain') | |
if truth_label and "True" in truth_label: | |
verdict_color = "green" | |
elif truth_label and "False" in truth_label: | |
verdict_color = "red" | |
else: | |
verdict_color = "gray" | |
st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True) | |
# Ensure confidence value is used | |
if "confidence" in result and result["confidence"] is not None: | |
confidence_value = result["confidence"] | |
# Make sure confidence is a numeric value between 0 and 1 | |
try: | |
confidence_value = float(confidence_value) | |
if confidence_value < 0: | |
confidence_value = 0.0 | |
elif confidence_value > 1: | |
confidence_value = 1.0 | |
except (ValueError, TypeError): | |
confidence_value = 0.6 # Fallback to reasonable default | |
else: | |
confidence_value = 0.6 # Default confidence | |
# Display the confidence | |
st.markdown(f"**Confidence:** {confidence_value:.2%}") | |
st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}") | |
# Add disclaimer about cross-verification | |
st.info("β οΈ **Note:** Please cross-verify important information with additional reliable sources.") | |
with result_col2: | |
st.markdown("**Processing Time**") | |
times = result.get("processing_times", {"total": total_time}) | |
st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s") | |
# Show agent thoughts | |
if "thoughts" in result and result["thoughts"]: | |
st.markdown("**AI Reasoning Process**") | |
thoughts = result.get("thoughts", []) | |
for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts | |
st.markdown(f"{i+1}. {thought}") | |
if len(thoughts) > 5: | |
with st.expander("Show all reasoning steps"): | |
for i, thought in enumerate(thoughts): | |
st.markdown(f"{i+1}. {thought}") | |
# Display evidence | |
st.subheader("π Evidence") | |
evidence_count = result.get("evidence_count", 0) | |
evidence = result.get("evidence", []) | |
# Ensure evidence is a list | |
if not isinstance(evidence, list): | |
if isinstance(evidence, str): | |
# Try to parse string as a list | |
try: | |
import ast | |
parsed_evidence = ast.literal_eval(evidence) | |
if isinstance(parsed_evidence, list): | |
evidence = parsed_evidence | |
else: | |
evidence = [evidence] | |
except: | |
evidence = [evidence] | |
else: | |
evidence = [str(evidence)] if evidence else [] | |
# Update evidence count based on actual evidence list | |
evidence_count = len(evidence) | |
# Check for empty evidence | |
if evidence_count == 0 or not any(ev for ev in evidence if ev): | |
st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.") | |
else: | |
st.markdown(f"Retrieved {evidence_count} pieces of evidence") | |
# Get classification results | |
classification_results = result.get("classification_results", []) | |
# Only show evidence tabs if we have evidence | |
if evidence and any(ev for ev in evidence if ev): | |
# Create tabs for different evidence categories | |
evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"]) | |
with evidence_tabs[0]: | |
for i, ev in enumerate(evidence): | |
if ev and isinstance(ev, str) and ev.strip(): # Only show non-empty evidence | |
with st.expander(f"Evidence {i+1}", expanded=i==0): | |
st.text(ev) | |
with evidence_tabs[1]: | |
if classification_results: | |
# Check if classification_results items have the expected format | |
valid_results = [] | |
for res in classification_results: | |
if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res: | |
if res.get("evidence"): # Only include results with actual evidence | |
valid_results.append(res) | |
if valid_results: | |
sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True) | |
top_results = sorted_results[:min(3, len(sorted_results))] | |
for i, res in enumerate(top_results): | |
with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0): | |
st.text(res.get("evidence", "No evidence text available")) | |
st.markdown(f"**Classification:** {res.get('label', 'unknown')}") | |
else: | |
# If no valid results, just show the evidence | |
shown = False | |
for i, ev in enumerate(evidence[:3]): | |
if ev and isinstance(ev, str) and ev.strip(): | |
with st.expander(f"Evidence {i+1}", expanded=i==0): | |
st.text(ev) | |
shown = True | |
if not shown: | |
st.info("No detailed classification results available.") | |
else: | |
# Just show regular evidence if no classification details | |
shown = False | |
for i, ev in enumerate(evidence[:3]): | |
if ev and isinstance(ev, str) and ev.strip(): | |
with st.expander(f"Evidence {i+1}", expanded=i==0): | |
st.text(ev) | |
shown = True | |
if not shown: | |
st.info("No detailed classification results available.") | |
with evidence_tabs[2]: | |
evidence_sources = {} | |
for ev in evidence: | |
if not ev or not isinstance(ev, str): | |
continue | |
source = "Unknown" | |
# Extract source info from evidence text | |
if "URL:" in ev: | |
import re | |
url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev) | |
if url_match: | |
source = url_match.group(1) | |
if source in evidence_sources: | |
evidence_sources[source] += 1 | |
else: | |
evidence_sources[source] = 1 | |
# Display evidence source distribution | |
if evidence_sources: | |
st.markdown("**Evidence Source Distribution**") | |
for source, count in evidence_sources.items(): | |
st.markdown(f"- {source}: {count} item(s)") | |
else: | |
st.info("No source information available in the evidence.") | |
else: | |
st.warning("No evidence was retrieved for this claim.") | |
# Button to start a new verification | |
if st.button("Verify Another Claim", type="primary", key="new_verify_btn"): | |
st.session_state.fresh_state = True | |
st.session_state.has_result = False | |
st.session_state.result = None | |
st.session_state.clear_form = True # Set flag to clear the form on next rerun | |
st.rerun() | |
# Footer with additional information | |
st.markdown("---") | |
st.caption(""" | |
**AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis. | |
While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information. | |
""") |