Spaces:

ankanghosh
/

askveracity

Running

App Files Files Community

ankanghosh commited on Apr 15

Commit

7130eb6

verified ·

1 Parent(s): 1dfaa8e

Upload 7 files.

Browse files

Files changed (6) hide show

LICENSE +21 -0
__init__.py +1 -0
agent.py +430 -0
app.py +518 -0
config.py +130 -0
requirements.txt +17 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 AskVeracity
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Root package initialization

agent.py ADDED Viewed

	@@ -0,0 +1,430 @@

+"""
+Agent module for the Fake News Detector application.
+This module implements a LangGraph-based agent that orchestrates
+the fact-checking process. It defines the agent setup, tools,
+and processing pipeline for claim verification.
+"""
+import os
+import time
+import logging
+import traceback
+from langchain_core.tools import tool
+from langchain.prompts import PromptTemplate
+from langgraph.prebuilt import create_react_agent
+from utils.models import get_llm_model
+from utils.performance import PerformanceTracker
+from modules.claim_extraction import extract_claims
+from modules.evidence_retrieval import retrieve_combined_evidence
+from modules.classification import classify_with_llm, aggregate_evidence
+from modules.explanation import generate_explanation
+# Configure logger
+logger = logging.getLogger("misinformation_detector")
+# Reference to global performance tracker
+performance_tracker = PerformanceTracker()
+# Define LangGraph Tools
+@tool
+def claim_extractor(query):
+    """
+    Tool that extracts factual claims from a given text.
+    Args:
+        query (str): Text containing potential factual claims
+    Returns:
+        str: Extracted factual claim
+    """
+    performance_tracker.log_claim_processed()
+    return extract_claims(query)
+@tool
+def evidence_retriever(query):
+    """
+    Tool that retrieves evidence from multiple sources for a claim.
+    Args:
+        query (str): The factual claim to gather evidence for
+    Returns:
+        list: List of evidence items from various sources
+    """
+    return retrieve_combined_evidence(query)
+@tool
+def truth_classifier(query, evidence):
+    """
+    Tool that classifies the truthfulness of a claim based on evidence.
+    Args:
+        query (str): The factual claim to classify
+        evidence (list): Evidence items to evaluate
+    Returns:
+        str: JSON string containing verdict, confidence, and results
+    """
+    classification_results = classify_with_llm(query, evidence)
+    truth_label, confidence = aggregate_evidence(classification_results)
+    # Debug logging
+    logger.info(f"Classification results: {len(classification_results)} items")
+    logger.info(f"Aggregate result: {truth_label}, confidence: {confidence}")
+    # Ensure confidence is at least 0.6 for any definitive verdict
+    if "True" in truth_label or "False" in truth_label:
+        confidence = max(confidence, 0.6)
+    # Return a dictionary with all needed information
+    result = {
+        "verdict": truth_label,
+        "confidence": confidence,
+        "results": classification_results
+    }
+    # Convert to string for consistent handling
+    import json
+    return json.dumps(result)
+@tool
+def explanation_generator(claim, evidence_results, truth_label):
+    """
+    Tool that generates a human-readable explanation for the verdict.
+    Args:
+        claim (str): The factual claim being verified
+        evidence_results (list): Evidence items and classification results
+        truth_label (str): The verdict (True/False/Uncertain)
+    Returns:
+        str: Natural language explanation of the verdict
+    """
+    explanation = generate_explanation(claim, evidence_results, truth_label)
+    logger.info(f"Generated explanation: {explanation[:100]}...")
+    return explanation
+def setup_agent():
+    """
+    Create and configure a ReAct agent with the fact-checking tools.
+    This function configures a LangGraph ReAct agent with all the
+    necessary tools for fact checking, including claim extraction,
+    evidence retrieval, classification, and explanation generation.
+    Returns:
+        object: Configured LangGraph agent ready for claim processing
+    Raises:
+        ValueError: If OpenAI API key is not set
+    """
+    # Make sure OpenAI API key is set
+    if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
+        logger.error("OPENAI_API_KEY environment variable not set or empty.")
+        raise ValueError("OpenAI API key is required")
+    # Define tools with any customizations
+    tools = [
+        claim_extractor,
+        evidence_retriever,
+        truth_classifier,
+        explanation_generator
+    ]
+    # Define the prompt template with clearer, more efficient instructions
+    FORMAT_INSTRUCTIONS_TEMPLATE = """
+    Use the following format:
+    Question: the input question you must answer
+    Action: the action to take, should be one of: {tool_names}
+    Action Input: the input to the action
+    Observation: the result of the action
+    ... (this Action/Action Input/Observation can repeat N times)
+    Final Answer: the final answer to the original input question
+    """
+    prompt = PromptTemplate(
+        input_variables=["input", "tool_names"],
+        template=f"""
+        You are a fact-checking assistant that verifies claims by gathering evidence and
+        determining their truthfulness. Follow these exact steps in sequence:
+        1. Call claim_extractor to extract the main factual claim
+        2. Call evidence_retriever to gather evidence about the claim
+        3. Call truth_classifier to evaluate the claim using the evidence
+        4. Call explanation_generator to explain the result
+        5. Provide your Final Answer that summarizes everything
+        Execute these steps in order without unnecessary thinking steps between tool calls.
+        Be direct and efficient in your verification process.
+        {FORMAT_INSTRUCTIONS_TEMPLATE}
+        """
+    )
+    try:
+        # Get the LLM model
+        model = get_llm_model()
+        # Create the agent with a shorter timeout
+        graph = create_react_agent(model, tools=tools)
+        logger.info("Agent created successfully")
+        return graph
+    except Exception as e:
+        logger.error(f"Error creating agent: {str(e)}")
+        raise e
+def process_claim(claim, agent=None, recursion_limit=20):
+    """
+    Process a claim to determine its truthfulness using the agent.
+    This function invokes the LangGraph agent to process a factual claim,
+    extract supporting evidence, evaluate the claim's truthfulness, and
+    generate a human-readable explanation.
+    Args:
+        claim (str): The factual claim to be verified
+        agent (object, optional): Initialized LangGraph agent. If None, an error is logged.
+        recursion_limit (int, optional): Maximum recursion depth for agent. Default: 20.
+            Higher values allow more complex reasoning but increase processing time.
+    Returns:
+        dict: Result dictionary containing:
+            - claim: Extracted factual claim
+            - evidence: List of evidence pieces
+            - evidence_count: Number of evidence pieces
+            - classification: Verdict (True/False/Uncertain)
+            - confidence: Confidence score (0-1)
+            - explanation: Human-readable explanation of the verdict
+            - final_answer: Final answer from the agent
+            - Or error information if processing failed
+    """
+    if agent is None:
+        logger.error("Agent not initialized. Call setup_agent() first.")
+        return None
+    start_time = time.time()
+    logger.info(f"Processing claim with agent: {claim}")
+    try:
+        # Format inputs for the agent
+        inputs = {"messages": [("user", claim)]}
+        # Set configuration - reduced recursion limit for faster processing
+        config = {"recursion_limit": recursion_limit}
+        # Invoke the agent
+        response = agent.invoke(inputs, config)
+        # Format the response
+        result = format_response(response)
+        # Log performance
+        elapsed = time.time() - start_time
+        logger.info(f"Claim processed in {elapsed:.2f} seconds")
+        return result
+    except Exception as e:
+        logger.error(f"Error processing claim with agent: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {"error": str(e)}
+def format_response(response):
+    """
+    Format the agent's response into a structured result.
+    This function extracts key information from the agent's response,
+    including the claim, evidence, classification, and explanation.
+    It also performs error handling and provides fallback values.
+    Args:
+        response (dict): Raw response from the LangGraph agent
+    Returns:
+        dict: Structured result containing claim verification data
+    """
+    try:
+        if not response or "messages" not in response:
+            return {"error": "Invalid response format"}
+        messages = response.get("messages", [])
+        # Initialize result container with default values
+        result = {
+            "claim": None,
+            "evidence": [],
+            "evidence_count": 0,
+            "classification": "Uncertain",
+            "confidence": 0.2,  # Default low confidence
+            "explanation": "Insufficient evidence to evaluate this claim.",
+            "final_answer": None,
+            "thoughts": []
+        }
+        # Track if we found results from each tool
+        found_tools = {
+            "claim_extractor": False,
+            "evidence_retriever": False,
+            "truth_classifier": False,
+            "explanation_generator": False
+        }
+        # Extract information from messages
+        tool_outputs = {}
+        for idx, message in enumerate(messages):
+            # Extract agent thoughts
+            if hasattr(message, "content") and getattr(message, "type", "") == "assistant":
+                content = message.content
+                if "Thought:" in content:
+                    thought_parts = content.split("Thought:", 1)
+                    if len(thought_parts) > 1:
+                        thought = thought_parts[1].split("\n")[0].strip()
+                        result["thoughts"].append(thought)
+            # Extract tool outputs
+            if hasattr(message, "type") and message.type == "tool":
+                tool_name = getattr(message, "name", "unknown")
+                # Store tool outputs
+                tool_outputs[tool_name] = message.content
+                # Extract specific information
+                if tool_name == "claim_extractor":
+                    found_tools["claim_extractor"] = True
+                    if message.content:
+                        result["claim"] = message.content
+                elif tool_name == "evidence_retriever":
+                    found_tools["evidence_retriever"] = True
+                    # Handle string representation of a list
+                    if message.content:
+                        if isinstance(message.content, list):
+                            result["evidence"] = message.content
+                            result["evidence_count"] = len(message.content)
+                        elif isinstance(message.content, str) and message.content.startswith("[") and message.content.endswith("]"):
+                            try:
+                                import ast
+                                parsed_content = ast.literal_eval(message.content)
+                                if isinstance(parsed_content, list):
+                                    result["evidence"] = parsed_content
+                                    result["evidence_count"] = len(parsed_content)
+                                else:
+                                    result["evidence"] = [message.content]
+                                    result["evidence_count"] = 1
+                            except:
+                                result["evidence"] = [message.content]
+                                result["evidence_count"] = 1
+                        else:
+                            result["evidence"] = [message.content]
+                            result["evidence_count"] = 1
+                            logger.warning(f"Evidence retrieved is not a list: {type(message.content)}")
+                elif tool_name == "truth_classifier":
+                    found_tools["truth_classifier"] = True
+                    # Log the incoming content for debugging
+                    logger.info(f"Truth classifier content type: {type(message.content)}")
+                    logger.info(f"Truth classifier content: {message.content}")
+                    # Handle JSON formatted result from truth_classifier
+                    if isinstance(message.content, str):
+                        try:
+                            import json
+                            # Parse the JSON string
+                            parsed_content = json.loads(message.content)
+                            # Extract the values from the parsed content
+                            result["classification"] = parsed_content.get("verdict", "Uncertain")
+                            result["confidence"] = float(parsed_content.get("confidence", 0.2))
+                            result["classification_results"] = parsed_content.get("results", [])
+                            logger.info(f"Extracted from JSON: verdict={result['classification']}, confidence={result['confidence']}")
+                        except json.JSONDecodeError:
+                            logger.warning(f"Could not parse truth classifier JSON: {message.content}")
+                        except Exception as e:
+                            logger.warning(f"Error extracting from truth classifier output: {e}")
+                    else:
+                        logger.warning(f"Unexpected truth_classifier content format: {message.content}")
+                elif tool_name == "explanation_generator":
+                    found_tools["explanation_generator"] = True
+                    if message.content:
+                        result["explanation"] = message.content
+                        logger.info(f"Found explanation from tool: {message.content[:100]}...")
+            # Get final answer from last message
+            elif idx == len(messages) - 1 and hasattr(message, "content"):
+                result["final_answer"] = message.content
+        # Log which tools weren't found
+        missing_tools = [tool for tool, found in found_tools.items() if not found]
+        if missing_tools:
+            logger.warning(f"Missing tool outputs in response: {', '.join(missing_tools)}")
+        # FALLBACK: If we have truth classification but explanation is missing, generate it now
+        if found_tools["truth_classifier"] and not found_tools["explanation_generator"]:
+            logger.info("Explanation generator was not called by the agent, using fallback explanation generation")
+            try:
+                # Get the necessary inputs for explanation generation
+                claim = result["claim"]
+                evidence = result["evidence"]
+                truth_label = result["classification"]
+                confidence_value = result["confidence"]  # Pass the confidence value
+                classification_results = result.get("classification_results", [])
+                # Choose the best available evidence for explanation
+                explanation_evidence = classification_results if classification_results else evidence
+                # Generate explanation with confidence value
+                explanation = generate_explanation(claim, explanation_evidence, truth_label, confidence_value)
+                # Use the generated explanation
+                if explanation:
+                    logger.info(f"Generated fallback explanation: {explanation[:100]}...")
+                    result["explanation"] = explanation
+            except Exception as e:
+                logger.error(f"Error generating fallback explanation: {e}")
+        # Make sure evidence exists
+        if result["evidence_count"] > 0 and (not result["evidence"] or len(result["evidence"]) == 0):
+            logger.warning("Evidence count is non-zero but evidence list is empty. This is a data inconsistency.")
+            result["evidence_count"] = 0
+        # Add debug info about the final result
+        logger.info(f"Final classification: {result['classification']}, confidence: {result['confidence']}")
+        logger.info(f"Final explanation: {result['explanation'][:100]}...")
+        # Add performance metrics
+        result["performance"] = performance_tracker.get_summary()
+        # Memory management - limit the size of evidence and thoughts
+        # To keep memory usage reasonable for web deployment
+        if "evidence" in result and isinstance(result["evidence"], list):
+            limited_evidence = []
+            for ev in result["evidence"]:
+                if isinstance(ev, str) and len(ev) > 500:
+                    limited_evidence.append(ev[:497] + "...")
+                else:
+                    limited_evidence.append(ev)
+            result["evidence"] = limited_evidence
+        # Limit thoughts to conserve memory
+        if "thoughts" in result and len(result["thoughts"]) > 10:
+            result["thoughts"] = result["thoughts"][:10]
+        return result
+    except Exception as e:
+        logger.error(f"Error formatting agent response: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "error": str(e),
+            "traceback": traceback.format_exc(),
+            "classification": "Error",
+            "confidence": 0.1,
+            "explanation": "An error occurred while processing this claim."
+        }

app.py ADDED Viewed

	@@ -0,0 +1,518 @@

+"""
+Main Streamlit application for the Fake News Detector.
+This module implements the user interface for claim verification,
+rendering the results and handling user interactions. It also
+manages the application lifecycle including initialization and cleanup.
+"""
+import streamlit as st
+import time
+import json
+import os
+import logging
+import atexit
+import sys
+from pathlib import Path
+# Configure logging first, before other imports
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler()]
+)
+logger = logging.getLogger("misinformation_detector")
+# Check for critical environment variables
+if not os.environ.get("OPENAI_API_KEY"):
+    logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.")
+# Import our modules
+from utils.models import initialize_models
+from utils.performance import PerformanceTracker
+# Import agent functionality
+import agent
+# Initialize performance tracker
+performance_tracker = PerformanceTracker()
+# Ensure data directory exists
+data_dir = Path("data")
+if not data_dir.exists():
+    logger.info("Creating data directory")
+    data_dir.mkdir(exist_ok=True)
+# Set page configuration
+st.set_page_config(
+    page_title="AskVeracity",
+    page_icon="🔍",
+    layout="wide",
+)
+# Hide the "Press ⌘+Enter to apply" text with CSS
+st.markdown("""
+<style>
+/* Hide the shortcut text that appears at the bottom of text areas */
+.stTextArea div:has(textarea) + div {
+    visibility: hidden !important;
+    height: 0px !important;
+    position: absolute !important;
+}
+</style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def get_agent():
+    """
+    Initialize and cache the agent for reuse across requests.
+    This function creates and caches the fact-checking agent to avoid
+    recreating it for every request. It's decorated with st.cache_resource
+    to ensure the agent is only initialized once per session.
+    Returns:
+        object: Initialized LangGraph agent for fact checking
+    """
+    logger.info("Initializing models and agent (cached)")
+    initialize_models()
+    return agent.setup_agent()
+def cleanup_resources():
+    """
+    Clean up resources when app is closed.
+    This function is registered with atexit to ensure resources
+    are properly released when the application terminates.
+    """
+    try:
+        # Clear any cached data
+        st.cache_data.clear()
+        # Reset performance tracker
+        performance_tracker.reset()
+        # Log cleanup
+        logger.info("Resources cleaned up successfully")
+    except Exception as e:
+        logger.error(f"Error during cleanup: {e}")
+# Register cleanup handler
+atexit.register(cleanup_resources)
+# App title and description
+st.title("🔍 AskVeracity")
+st.markdown("""
+            This is a simple AI-powered tool - a fact-checking system that analyzes claims to determine
+            their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia,
+            news outlets, and academic repositories.
+""")
+# Sidebar with app information
+with st.sidebar:
+    st.header("About")
+    st.info(
+        "This system uses a combination of NLP techniques and LLMs to "
+        "extract claims, gather evidence, and classify the truthfulness of statements."
+    )
+    # Application information
+    st.markdown("### How It Works")
+    st.info(
+        "1. Enter any recent news or a factual claim\n"
+        "2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n"
+        "3. The system analyzes the evidence to determine truthfulness\n"
+        "4. Results show the verdict with supporting evidence"
+    )
+    # Our Mission
+    st.markdown("### Our Mission")
+    st.info(
+        "AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. "
+        "We believe in empowering people with factual information to make informed decisions."
+    )
+    # Limitations and Usage
+    st.markdown("### Limitations")
+    st.warning(
+        "Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. "
+        "Performance is typically best with widely-reported news and information published within the last 48 hours. "
+        "Additionally, the system evaluates claims based on current evidence - a claim that was true in the past "
+        "may be judged false if circumstances have changed, and vice versa."
+    )
+    # Best Practices
+    st.markdown("### Best Practices")
+    st.success(
+        "For optimal results:\n\n"
+        "• Keep claims short and precise\n\n"
+        "• Include key details in your claim\n\n"
+        "• Phrase claims as direct statements rather than questions\n\n"
+        "• Be specific about who said what"
+    )
+    # Example comparison
+    with st.expander("📝 Examples of Effective Claims"):
+        st.markdown("""
+        **Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe."
+        **More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction."
+        """)
+    # Important Notes
+    st.markdown("### Important Notes")
+    st.info(
+        "• AskVeracity covers general topics and is not specialized in any single domain or location\n\n"
+        "• Results can vary based on available evidence and LLM behavior\n\n"
+        "• The system is designed to indicate uncertainty when evidence is insufficient\n\n"
+        "• AskVeracity is not a chatbot and does not maintain conversation history\n\n"
+        "• We recommend cross-verifying critical information with additional sources"
+    )
+    # Privacy Information
+    st.markdown("### Data Privacy")
+    st.info(
+        "We do not collect or store any data about the claims you submit. "
+        "Your interactions are processed by OpenAI's API. Please refer to "
+        "[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices."
+    )
+    # Feedback Section
+    st.markdown("### Feedback")
+    st.success(
+        "AskVeracity is evolving and we welcome your feedback to help us improve. "
+        "Please reach out to us with questions, suggestions, or concerns."
+    )
+# Initialize session state variables
+if 'processing' not in st.session_state:
+    st.session_state.processing = False
+if 'claim_to_process' not in st.session_state:
+    st.session_state.claim_to_process = ""
+if 'has_result' not in st.session_state:
+    st.session_state.has_result = False
+if 'result' not in st.session_state:
+    st.session_state.result = None
+if 'total_time' not in st.session_state:
+    st.session_state.total_time = 0
+if 'fresh_state' not in st.session_state:
+    st.session_state.fresh_state = True
+# Main interface
+st.markdown("### Enter a claim to verify")
+# Input area
+claim_input = st.text_area("",
+                         height=100,
+                         placeholder=(
+                             "Examples: The Eiffel Tower is located in Rome, Italy. "
+                             "Meta recently released its Llama 4 large language model. "
+                             "Justin Trudeau is not the Canadian Prime Minister anymore. "
+                             "China retaliated with 125% tariffs against U.S. imports. "
+                             "A recent piece of news."
+                         ),
+                         key="claim_input_area",
+                         label_visibility="collapsed",
+                         max_chars=None)
+# Information about result variability
+st.caption("""
+💡 **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to:
+- Incorporate the most recent evidence available
+- Benefit from the AI's ability to consider multiple perspectives
+- Adapt to evolving information landscapes
+""")
+st.warning("⏱️ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.")
+# Button for verifying claim
+verify_button = st.button(
+    "Verify Claim",
+    type="primary",
+    disabled=st.session_state.processing,
+    key="verify_btn"
+)
+# Create a clean interface
+if st.session_state.fresh_state:
+    # Show a clean interface for the first query or when we need to reset
+    analysis_placeholder = st.empty()
+    # When button is clicked and not already processing
+    if verify_button and not st.session_state.processing:
+        if not claim_input:
+            st.error("Please enter a claim to verify.")
+        else:
+            # Store the claim and set processing state
+            st.session_state.claim_to_process = claim_input
+            st.session_state.processing = True
+            st.session_state.fresh_state = False
+            # Force a rerun to refresh UI
+            st.rerun()
+else:
+    # This is either during processing or showing results
+    # Create a container for processing and results
+    analysis_container = st.container()
+    with analysis_container:
+        # If we're processing, show the processing UI
+        if st.session_state.processing:
+            st.subheader("🔄 Processing...")
+            status = st.empty()
+            status.text("Verifying claim... (this may take a while)")
+            progress_bar = st.progress(0)
+            # Initialize models and agent if needed
+            if not hasattr(st.session_state, 'agent_initialized'):
+                with st.spinner("Initializing system..."):
+                    st.session_state.agent = get_agent()
+                    st.session_state.agent_initialized = True
+            try:
+                # Use the stored claim for processing
+                claim_to_process = st.session_state.claim_to_process
+                # Process the claim with the agent
+                start_time = time.time()
+                result = agent.process_claim(claim_to_process, st.session_state.agent)
+                total_time = time.time() - start_time
+                # Update progress as claim processing completes
+                progress_bar.progress(100)
+                # Check for None result
+                if result is None:
+                    st.error("Failed to process the claim. Please try again.")
+                    st.session_state.processing = False
+                    st.session_state.fresh_state = True
+                else:
+                    # If result exists but key values are missing, provide default values
+                    if "classification" not in result or result["classification"] is None:
+                        result["classification"] = "Uncertain"
+                    if "confidence" not in result or result["confidence"] is None:
+                        result["confidence"] = 0.6  # Default to 0.6 instead of 0.0
+                    if "explanation" not in result or result["explanation"] is None:
+                        result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim."
+                    # Update result with timing information
+                    if "processing_times" not in result:
+                        result["processing_times"] = {"total": total_time}
+                    # Store the result and timing information
+                    st.session_state.result = result
+                    st.session_state.total_time = total_time
+                    st.session_state.has_result = True
+                    st.session_state.processing = False
+                    # Clear processing indicators before showing results
+                    status.empty()
+                    progress_bar.empty()
+                    # Force rerun to display results
+                    st.rerun()
+            except Exception as e:
+                # Handle any exceptions and reset processing state
+                logger.error(f"Error during claim processing: {str(e)}")
+                st.error(f"An error occurred: {str(e)}")
+                st.session_state.processing = False
+                st.session_state.fresh_state = True
+                # Force rerun to re-enable button
+                st.rerun()
+        # Display results if available
+        elif st.session_state.has_result and st.session_state.result:
+            result = st.session_state.result
+            total_time = st.session_state.total_time
+            claim_to_process = st.session_state.claim_to_process
+            st.subheader("📊 Verification Results")
+            result_col1, result_col2 = st.columns([2, 1])
+            with result_col1:
+                # Display both original and processed claim if they differ
+                if "claim" in result and result["claim"] and result["claim"] != claim_to_process:
+                    st.markdown(f"**Original Claim:** {claim_to_process}")
+                    st.markdown(f"**Processed Claim:** {result['claim']}")
+                else:
+                    st.markdown(f"**Claim:** {claim_to_process}")
+                # Make verdict colorful based on classification
+                truth_label = result.get('classification', 'Uncertain')
+                if truth_label and "True" in truth_label:
+                    verdict_color = "green"
+                elif truth_label and "False" in truth_label:
+                    verdict_color = "red"
+                else:
+                    verdict_color = "gray"
+                st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True)
+                # Ensure confidence value is used
+                if "confidence" in result and result["confidence"] is not None:
+                    confidence_value = result["confidence"]
+                    # Make sure confidence is a numeric value between 0 and 1
+                    try:
+                        confidence_value = float(confidence_value)
+                        if confidence_value < 0:
+                            confidence_value = 0.0
+                        elif confidence_value > 1:
+                            confidence_value = 1.0
+                    except (ValueError, TypeError):
+                        confidence_value = 0.6  # Fallback to reasonable default
+                else:
+                    confidence_value = 0.6  # Default confidence
+                # Display the confidence
+                st.markdown(f"**Confidence:** {confidence_value:.2%}")
+                st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}")
+                # Add disclaimer about cross-verification
+                st.info("⚠️ **Note:** Please cross-verify important information with additional reliable sources.")
+            with result_col2:
+                st.markdown("**Processing Time**")
+                times = result.get("processing_times", {"total": total_time})
+                st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s")
+                # Show agent thoughts
+                if "thoughts" in result and result["thoughts"]:
+                    st.markdown("**AI Reasoning Process**")
+                    thoughts = result.get("thoughts", [])
+                    for i, thought in enumerate(thoughts[:5]):  # Show top 5 thoughts
+                        st.markdown(f"{i+1}. {thought}")
+                    if len(thoughts) > 5:
+                        with st.expander("Show all reasoning steps"):
+                            for i, thought in enumerate(thoughts):
+                                st.markdown(f"{i+1}. {thought}")
+            # Display evidence
+            st.subheader("📝 Evidence")
+            evidence_count = result.get("evidence_count", 0)
+            evidence = result.get("evidence", [])
+            # Ensure evidence is a list
+            if not isinstance(evidence, list):
+                if isinstance(evidence, str):
+                    # Try to parse string as a list
+                    try:
+                        import ast
+                        parsed_evidence = ast.literal_eval(evidence)
+                        if isinstance(parsed_evidence, list):
+                            evidence = parsed_evidence
+                        else:
+                            evidence = [evidence]
+                    except:
+                        evidence = [evidence]
+                else:
+                    evidence = [str(evidence)] if evidence else []
+            # Update evidence count based on actual evidence list
+            evidence_count = len(evidence)
+            # Check for empty evidence
+            if evidence_count == 0 or not any(ev for ev in evidence if ev):
+                st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.")
+            else:
+                st.markdown(f"Retrieved {evidence_count} pieces of evidence")
+            # Get classification results
+            classification_results = result.get("classification_results", [])
+            # Only show evidence tabs if we have evidence
+            if evidence and any(ev for ev in evidence if ev):
+                # Create tabs for different evidence categories
+                evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"])
+                with evidence_tabs[0]:
+                    for i, ev in enumerate(evidence):
+                        if ev and isinstance(ev, str) and ev.strip():  # Only show non-empty evidence
+                            with st.expander(f"Evidence {i+1}", expanded=i==0):
+                                st.text(ev)
+                with evidence_tabs[1]:
+                    if classification_results:
+                        # Check if classification_results items have the expected format
+                        valid_results = []
+                        for res in classification_results:
+                            if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res:
+                                if res.get("evidence"):  # Only include results with actual evidence
+                                    valid_results.append(res)
+                        if valid_results:
+                            sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True)
+                            top_results = sorted_results[:min(3, len(sorted_results))]
+                            for i, res in enumerate(top_results):
+                                with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0):
+                                    st.text(res.get("evidence", "No evidence text available"))
+                                    st.markdown(f"**Classification:** {res.get('label', 'unknown')}")
+                        else:
+                            # If no valid results, just show the evidence
+                            shown = False
+                            for i, ev in enumerate(evidence[:3]):
+                                if ev and isinstance(ev, str) and ev.strip():
+                                    with st.expander(f"Evidence {i+1}", expanded=i==0):
+                                        st.text(ev)
+                                        shown = True
+                            if not shown:
+                                st.info("No detailed classification results available.")
+                    else:
+                        # Just show regular evidence if no classification details
+                        shown = False
+                        for i, ev in enumerate(evidence[:3]):
+                            if ev and isinstance(ev, str) and ev.strip():
+                                with st.expander(f"Evidence {i+1}", expanded=i==0):
+                                    st.text(ev)
+                                    shown = True
+                        if not shown:
+                            st.info("No detailed classification results available.")
+                with evidence_tabs[2]:
+                    evidence_sources = {}
+                    for ev in evidence:
+                        if not ev or not isinstance(ev, str):
+                            continue
+                        source = "Unknown"
+                        # Extract source info from evidence text
+                        if "URL:" in ev:
+                            import re
+                            url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev)
+                            if url_match:
+                                source = url_match.group(1)
+                        if source in evidence_sources:
+                            evidence_sources[source] += 1
+                        else:
+                            evidence_sources[source] = 1
+                    # Display evidence source distribution
+                    if evidence_sources:
+                        st.markdown("**Evidence Source Distribution**")
+                        for source, count in evidence_sources.items():
+                            st.markdown(f"- {source}: {count} item(s)")
+                    else:
+                        st.info("No source information available in the evidence.")
+            else:
+                st.warning("No evidence was retrieved for this claim.")
+            # Button to start a new verification
+            if st.button("Verify Another Claim", type="primary", key="new_verify_btn"):
+                # Reset to fresh state for a new verification
+                st.session_state.fresh_state = True
+                st.session_state.has_result = False
+                st.session_state.result = None
+                st.rerun()
+# Footer with additional information
+st.markdown("---")
+st.caption("""
+**AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis.
+While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information.
+""")

config.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+Configuration module for the Fake News Detector application.
+This module handles loading configuration parameters, API keys,
+and source credibility data needed for the fact checking system.
+It manages environment variables and file-based configurations.
+"""
+import os
+import json
+import logging
+from pathlib import Path
+# Configure logger
+logger = logging.getLogger("misinformation_detector")
+# Base paths
+ROOT_DIR = Path(__file__).parent.absolute()
+DATA_DIR = ROOT_DIR / "data"
+# Ensure data directory exists
+DATA_DIR.mkdir(exist_ok=True)
+# First try to get API keys from Streamlit secrets, then fall back to environment variables
+# try:
+#     import streamlit as st
+#     OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
+#     NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
+#     FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
+# except (AttributeError, ImportError):
+#     # Fall back to environment variables if Streamlit secrets aren't available
+#     OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
+#     NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
+#     FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
+try:
+    import streamlit as st
+    OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
+    NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
+    FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
+except (AttributeError, ImportError):
+    # For local testing only - REMOVE BEFORE COMMITTING!
+    OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY")
+    NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3")
+    FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA")
+# Log secrets status (but not the values)
+if OPENAI_API_KEY:
+    logger.info("OPENAI_API_KEY is set")
+else:
+    logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
+if NEWS_API_KEY:
+    logger.info("NEWS_API_KEY is set")
+else:
+    logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
+if FACTCHECK_API_KEY:
+    logger.info("FACTCHECK_API_KEY is set")
+else:
+    logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
+# Set API key in environment to ensure it's available to all components
+os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
+# Source credibility file path
+source_cred_file = DATA_DIR / "source_credibility.json"
+def load_source_credibility():
+    """
+    Load source credibility data from JSON file
+    Returns:
+        dict: Mapping of domain names to credibility scores (0-1)
+              Empty dict if file is not found or has errors
+    """
+    try:
+        if source_cred_file.exists():
+            with open(source_cred_file, 'r') as f:
+                return json.load(f)
+        else:
+            logger.warning(f"Source credibility file not found: {source_cred_file}")
+            return {}
+    except Exception as e:
+        logger.error(f"Error loading source credibility file: {e}")
+        return {}
+# Load source credibility once at module import
+SOURCE_CREDIBILITY = load_source_credibility()
+# Rate limiting configuration
+RATE_LIMITS = {
+    # api_name: {"requests": max_requests, "period": period_in_seconds}
+    "newsapi": {"requests": 100, "period": 3600},  # 100 requests per hour
+    "factcheck": {"requests": 1000, "period": 86400},  # 1000 requests per day
+    "semantic_scholar": {"requests": 10, "period": 300},  # 10 requests per 5 minutes
+    "wikidata": {"requests": 60, "period": 60},  # 60 requests per minute
+    "wikipedia": {"requests": 200, "period": 60},  # 200 requests per minute
+    "rss": {"requests": 300, "period": 3600}  # 300 RSS requests per hour
+}
+# Error backoff settings
+ERROR_BACKOFF = {
+    "max_retries": 5,
+    "initial_backoff": 1,  # seconds
+    "backoff_factor": 2,  # exponential backoff
+}
+# RSS feed settings
+RSS_SETTINGS = {
+    "max_feeds_per_request": 10,  # Maximum number of feeds to try per request
+    "max_age_days": 3,            # Maximum age of RSS items to consider
+    "timeout_seconds": 5,         # Timeout for RSS feed requests
+    "max_workers": 5              # Number of parallel workers for fetching feeds
+}
+# Semantic analysis settings
+SEMANTIC_ANALYSIS_CONFIG = {
+    "similarity_weight": 0.4,      # Weight for semantic similarity
+    "entity_overlap_weight": 0.3,  # Weight for entity matching
+    "base_weight": 0.3,            # Base relevance weight
+    "temporal_boost": 1.2,         # Boost for recent evidence
+    "temporal_penalty": 0.7,       # Penalty for outdated evidence
+    "authority_boosts": {
+        "scientific_consensus": 1.8,
+        "fact_check": 1.5,
+        "high_authority": 1.3
+    }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+streamlit==1.32.0
+langchain>=0.1.6
+langchain_openai>=0.0.5
+langchain_core>=0.1.25
+langgraph>=0.0.27
+transformers==4.36.2
+requests==2.31.0
+beautifulsoup4==4.12.2
+langdetect==1.0.9
+spacy==3.7.2
+SPARQLWrapper==2.0.0
+python-dotenv==1.0.0
+pydantic==2.5.3
+feedparser==6.0.10
+scikit-learn>=1.3.0
+numpy>=1.21.0
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl