Spaces:
Running
Running
Upload 7 files.
Browse files- LICENSE +21 -0
- __init__.py +1 -0
- agent.py +430 -0
- app.py +518 -0
- config.py +130 -0
- requirements.txt +17 -0
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 AskVeracity
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Root package initialization
|
agent.py
ADDED
@@ -0,0 +1,430 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Agent module for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module implements a LangGraph-based agent that orchestrates
|
5 |
+
the fact-checking process. It defines the agent setup, tools,
|
6 |
+
and processing pipeline for claim verification.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import os
|
10 |
+
import time
|
11 |
+
import logging
|
12 |
+
import traceback
|
13 |
+
from langchain_core.tools import tool
|
14 |
+
from langchain.prompts import PromptTemplate
|
15 |
+
from langgraph.prebuilt import create_react_agent
|
16 |
+
|
17 |
+
from utils.models import get_llm_model
|
18 |
+
from utils.performance import PerformanceTracker
|
19 |
+
from modules.claim_extraction import extract_claims
|
20 |
+
from modules.evidence_retrieval import retrieve_combined_evidence
|
21 |
+
from modules.classification import classify_with_llm, aggregate_evidence
|
22 |
+
from modules.explanation import generate_explanation
|
23 |
+
|
24 |
+
# Configure logger
|
25 |
+
logger = logging.getLogger("misinformation_detector")
|
26 |
+
|
27 |
+
# Reference to global performance tracker
|
28 |
+
performance_tracker = PerformanceTracker()
|
29 |
+
|
30 |
+
# Define LangGraph Tools
|
31 |
+
@tool
|
32 |
+
def claim_extractor(query):
|
33 |
+
"""
|
34 |
+
Tool that extracts factual claims from a given text.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
query (str): Text containing potential factual claims
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
str: Extracted factual claim
|
41 |
+
"""
|
42 |
+
performance_tracker.log_claim_processed()
|
43 |
+
return extract_claims(query)
|
44 |
+
|
45 |
+
@tool
|
46 |
+
def evidence_retriever(query):
|
47 |
+
"""
|
48 |
+
Tool that retrieves evidence from multiple sources for a claim.
|
49 |
+
|
50 |
+
Args:
|
51 |
+
query (str): The factual claim to gather evidence for
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
list: List of evidence items from various sources
|
55 |
+
"""
|
56 |
+
return retrieve_combined_evidence(query)
|
57 |
+
|
58 |
+
@tool
|
59 |
+
def truth_classifier(query, evidence):
|
60 |
+
"""
|
61 |
+
Tool that classifies the truthfulness of a claim based on evidence.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
query (str): The factual claim to classify
|
65 |
+
evidence (list): Evidence items to evaluate
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
str: JSON string containing verdict, confidence, and results
|
69 |
+
"""
|
70 |
+
classification_results = classify_with_llm(query, evidence)
|
71 |
+
truth_label, confidence = aggregate_evidence(classification_results)
|
72 |
+
|
73 |
+
# Debug logging
|
74 |
+
logger.info(f"Classification results: {len(classification_results)} items")
|
75 |
+
logger.info(f"Aggregate result: {truth_label}, confidence: {confidence}")
|
76 |
+
|
77 |
+
# Ensure confidence is at least 0.6 for any definitive verdict
|
78 |
+
if "True" in truth_label or "False" in truth_label:
|
79 |
+
confidence = max(confidence, 0.6)
|
80 |
+
|
81 |
+
# Return a dictionary with all needed information
|
82 |
+
result = {
|
83 |
+
"verdict": truth_label,
|
84 |
+
"confidence": confidence,
|
85 |
+
"results": classification_results
|
86 |
+
}
|
87 |
+
|
88 |
+
# Convert to string for consistent handling
|
89 |
+
import json
|
90 |
+
return json.dumps(result)
|
91 |
+
|
92 |
+
@tool
|
93 |
+
def explanation_generator(claim, evidence_results, truth_label):
|
94 |
+
"""
|
95 |
+
Tool that generates a human-readable explanation for the verdict.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
claim (str): The factual claim being verified
|
99 |
+
evidence_results (list): Evidence items and classification results
|
100 |
+
truth_label (str): The verdict (True/False/Uncertain)
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
str: Natural language explanation of the verdict
|
104 |
+
"""
|
105 |
+
explanation = generate_explanation(claim, evidence_results, truth_label)
|
106 |
+
logger.info(f"Generated explanation: {explanation[:100]}...")
|
107 |
+
return explanation
|
108 |
+
|
109 |
+
def setup_agent():
|
110 |
+
"""
|
111 |
+
Create and configure a ReAct agent with the fact-checking tools.
|
112 |
+
|
113 |
+
This function configures a LangGraph ReAct agent with all the
|
114 |
+
necessary tools for fact checking, including claim extraction,
|
115 |
+
evidence retrieval, classification, and explanation generation.
|
116 |
+
|
117 |
+
Returns:
|
118 |
+
object: Configured LangGraph agent ready for claim processing
|
119 |
+
|
120 |
+
Raises:
|
121 |
+
ValueError: If OpenAI API key is not set
|
122 |
+
"""
|
123 |
+
# Make sure OpenAI API key is set
|
124 |
+
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
|
125 |
+
logger.error("OPENAI_API_KEY environment variable not set or empty.")
|
126 |
+
raise ValueError("OpenAI API key is required")
|
127 |
+
|
128 |
+
# Define tools with any customizations
|
129 |
+
tools = [
|
130 |
+
claim_extractor,
|
131 |
+
evidence_retriever,
|
132 |
+
truth_classifier,
|
133 |
+
explanation_generator
|
134 |
+
]
|
135 |
+
|
136 |
+
# Define the prompt template with clearer, more efficient instructions
|
137 |
+
FORMAT_INSTRUCTIONS_TEMPLATE = """
|
138 |
+
Use the following format:
|
139 |
+
Question: the input question you must answer
|
140 |
+
Action: the action to take, should be one of: {tool_names}
|
141 |
+
Action Input: the input to the action
|
142 |
+
Observation: the result of the action
|
143 |
+
... (this Action/Action Input/Observation can repeat N times)
|
144 |
+
Final Answer: the final answer to the original input question
|
145 |
+
"""
|
146 |
+
|
147 |
+
prompt = PromptTemplate(
|
148 |
+
input_variables=["input", "tool_names"],
|
149 |
+
template=f"""
|
150 |
+
You are a fact-checking assistant that verifies claims by gathering evidence and
|
151 |
+
determining their truthfulness. Follow these exact steps in sequence:
|
152 |
+
|
153 |
+
1. Call claim_extractor to extract the main factual claim
|
154 |
+
2. Call evidence_retriever to gather evidence about the claim
|
155 |
+
3. Call truth_classifier to evaluate the claim using the evidence
|
156 |
+
4. Call explanation_generator to explain the result
|
157 |
+
5. Provide your Final Answer that summarizes everything
|
158 |
+
|
159 |
+
Execute these steps in order without unnecessary thinking steps between tool calls.
|
160 |
+
Be direct and efficient in your verification process.
|
161 |
+
|
162 |
+
{FORMAT_INSTRUCTIONS_TEMPLATE}
|
163 |
+
"""
|
164 |
+
)
|
165 |
+
|
166 |
+
try:
|
167 |
+
# Get the LLM model
|
168 |
+
model = get_llm_model()
|
169 |
+
|
170 |
+
# Create the agent with a shorter timeout
|
171 |
+
graph = create_react_agent(model, tools=tools)
|
172 |
+
logger.info("Agent created successfully")
|
173 |
+
return graph
|
174 |
+
except Exception as e:
|
175 |
+
logger.error(f"Error creating agent: {str(e)}")
|
176 |
+
raise e
|
177 |
+
|
178 |
+
def process_claim(claim, agent=None, recursion_limit=20):
|
179 |
+
"""
|
180 |
+
Process a claim to determine its truthfulness using the agent.
|
181 |
+
|
182 |
+
This function invokes the LangGraph agent to process a factual claim,
|
183 |
+
extract supporting evidence, evaluate the claim's truthfulness, and
|
184 |
+
generate a human-readable explanation.
|
185 |
+
|
186 |
+
Args:
|
187 |
+
claim (str): The factual claim to be verified
|
188 |
+
agent (object, optional): Initialized LangGraph agent. If None, an error is logged.
|
189 |
+
recursion_limit (int, optional): Maximum recursion depth for agent. Default: 20.
|
190 |
+
Higher values allow more complex reasoning but increase processing time.
|
191 |
+
|
192 |
+
Returns:
|
193 |
+
dict: Result dictionary containing:
|
194 |
+
- claim: Extracted factual claim
|
195 |
+
- evidence: List of evidence pieces
|
196 |
+
- evidence_count: Number of evidence pieces
|
197 |
+
- classification: Verdict (True/False/Uncertain)
|
198 |
+
- confidence: Confidence score (0-1)
|
199 |
+
- explanation: Human-readable explanation of the verdict
|
200 |
+
- final_answer: Final answer from the agent
|
201 |
+
- Or error information if processing failed
|
202 |
+
"""
|
203 |
+
if agent is None:
|
204 |
+
logger.error("Agent not initialized. Call setup_agent() first.")
|
205 |
+
return None
|
206 |
+
|
207 |
+
start_time = time.time()
|
208 |
+
logger.info(f"Processing claim with agent: {claim}")
|
209 |
+
|
210 |
+
try:
|
211 |
+
# Format inputs for the agent
|
212 |
+
inputs = {"messages": [("user", claim)]}
|
213 |
+
|
214 |
+
# Set configuration - reduced recursion limit for faster processing
|
215 |
+
config = {"recursion_limit": recursion_limit}
|
216 |
+
|
217 |
+
# Invoke the agent
|
218 |
+
response = agent.invoke(inputs, config)
|
219 |
+
|
220 |
+
# Format the response
|
221 |
+
result = format_response(response)
|
222 |
+
|
223 |
+
# Log performance
|
224 |
+
elapsed = time.time() - start_time
|
225 |
+
logger.info(f"Claim processed in {elapsed:.2f} seconds")
|
226 |
+
|
227 |
+
return result
|
228 |
+
|
229 |
+
except Exception as e:
|
230 |
+
logger.error(f"Error processing claim with agent: {str(e)}")
|
231 |
+
logger.error(traceback.format_exc())
|
232 |
+
return {"error": str(e)}
|
233 |
+
|
234 |
+
def format_response(response):
|
235 |
+
"""
|
236 |
+
Format the agent's response into a structured result.
|
237 |
+
|
238 |
+
This function extracts key information from the agent's response,
|
239 |
+
including the claim, evidence, classification, and explanation.
|
240 |
+
It also performs error handling and provides fallback values.
|
241 |
+
|
242 |
+
Args:
|
243 |
+
response (dict): Raw response from the LangGraph agent
|
244 |
+
|
245 |
+
Returns:
|
246 |
+
dict: Structured result containing claim verification data
|
247 |
+
"""
|
248 |
+
try:
|
249 |
+
if not response or "messages" not in response:
|
250 |
+
return {"error": "Invalid response format"}
|
251 |
+
|
252 |
+
messages = response.get("messages", [])
|
253 |
+
|
254 |
+
# Initialize result container with default values
|
255 |
+
result = {
|
256 |
+
"claim": None,
|
257 |
+
"evidence": [],
|
258 |
+
"evidence_count": 0,
|
259 |
+
"classification": "Uncertain",
|
260 |
+
"confidence": 0.2, # Default low confidence
|
261 |
+
"explanation": "Insufficient evidence to evaluate this claim.",
|
262 |
+
"final_answer": None,
|
263 |
+
"thoughts": []
|
264 |
+
}
|
265 |
+
|
266 |
+
# Track if we found results from each tool
|
267 |
+
found_tools = {
|
268 |
+
"claim_extractor": False,
|
269 |
+
"evidence_retriever": False,
|
270 |
+
"truth_classifier": False,
|
271 |
+
"explanation_generator": False
|
272 |
+
}
|
273 |
+
|
274 |
+
# Extract information from messages
|
275 |
+
tool_outputs = {}
|
276 |
+
|
277 |
+
for idx, message in enumerate(messages):
|
278 |
+
# Extract agent thoughts
|
279 |
+
if hasattr(message, "content") and getattr(message, "type", "") == "assistant":
|
280 |
+
content = message.content
|
281 |
+
if "Thought:" in content:
|
282 |
+
thought_parts = content.split("Thought:", 1)
|
283 |
+
if len(thought_parts) > 1:
|
284 |
+
thought = thought_parts[1].split("\n")[0].strip()
|
285 |
+
result["thoughts"].append(thought)
|
286 |
+
|
287 |
+
# Extract tool outputs
|
288 |
+
if hasattr(message, "type") and message.type == "tool":
|
289 |
+
tool_name = getattr(message, "name", "unknown")
|
290 |
+
|
291 |
+
# Store tool outputs
|
292 |
+
tool_outputs[tool_name] = message.content
|
293 |
+
|
294 |
+
# Extract specific information
|
295 |
+
if tool_name == "claim_extractor":
|
296 |
+
found_tools["claim_extractor"] = True
|
297 |
+
if message.content:
|
298 |
+
result["claim"] = message.content
|
299 |
+
|
300 |
+
elif tool_name == "evidence_retriever":
|
301 |
+
found_tools["evidence_retriever"] = True
|
302 |
+
# Handle string representation of a list
|
303 |
+
if message.content:
|
304 |
+
if isinstance(message.content, list):
|
305 |
+
result["evidence"] = message.content
|
306 |
+
result["evidence_count"] = len(message.content)
|
307 |
+
elif isinstance(message.content, str) and message.content.startswith("[") and message.content.endswith("]"):
|
308 |
+
try:
|
309 |
+
import ast
|
310 |
+
parsed_content = ast.literal_eval(message.content)
|
311 |
+
if isinstance(parsed_content, list):
|
312 |
+
result["evidence"] = parsed_content
|
313 |
+
result["evidence_count"] = len(parsed_content)
|
314 |
+
else:
|
315 |
+
result["evidence"] = [message.content]
|
316 |
+
result["evidence_count"] = 1
|
317 |
+
except:
|
318 |
+
result["evidence"] = [message.content]
|
319 |
+
result["evidence_count"] = 1
|
320 |
+
else:
|
321 |
+
result["evidence"] = [message.content]
|
322 |
+
result["evidence_count"] = 1
|
323 |
+
logger.warning(f"Evidence retrieved is not a list: {type(message.content)}")
|
324 |
+
|
325 |
+
elif tool_name == "truth_classifier":
|
326 |
+
found_tools["truth_classifier"] = True
|
327 |
+
|
328 |
+
# Log the incoming content for debugging
|
329 |
+
logger.info(f"Truth classifier content type: {type(message.content)}")
|
330 |
+
logger.info(f"Truth classifier content: {message.content}")
|
331 |
+
|
332 |
+
# Handle JSON formatted result from truth_classifier
|
333 |
+
if isinstance(message.content, str):
|
334 |
+
try:
|
335 |
+
import json
|
336 |
+
# Parse the JSON string
|
337 |
+
parsed_content = json.loads(message.content)
|
338 |
+
|
339 |
+
# Extract the values from the parsed content
|
340 |
+
result["classification"] = parsed_content.get("verdict", "Uncertain")
|
341 |
+
result["confidence"] = float(parsed_content.get("confidence", 0.2))
|
342 |
+
result["classification_results"] = parsed_content.get("results", [])
|
343 |
+
|
344 |
+
logger.info(f"Extracted from JSON: verdict={result['classification']}, confidence={result['confidence']}")
|
345 |
+
except json.JSONDecodeError:
|
346 |
+
logger.warning(f"Could not parse truth classifier JSON: {message.content}")
|
347 |
+
except Exception as e:
|
348 |
+
logger.warning(f"Error extracting from truth classifier output: {e}")
|
349 |
+
else:
|
350 |
+
logger.warning(f"Unexpected truth_classifier content format: {message.content}")
|
351 |
+
|
352 |
+
elif tool_name == "explanation_generator":
|
353 |
+
found_tools["explanation_generator"] = True
|
354 |
+
if message.content:
|
355 |
+
result["explanation"] = message.content
|
356 |
+
logger.info(f"Found explanation from tool: {message.content[:100]}...")
|
357 |
+
|
358 |
+
# Get final answer from last message
|
359 |
+
elif idx == len(messages) - 1 and hasattr(message, "content"):
|
360 |
+
result["final_answer"] = message.content
|
361 |
+
|
362 |
+
# Log which tools weren't found
|
363 |
+
missing_tools = [tool for tool, found in found_tools.items() if not found]
|
364 |
+
if missing_tools:
|
365 |
+
logger.warning(f"Missing tool outputs in response: {', '.join(missing_tools)}")
|
366 |
+
|
367 |
+
# FALLBACK: If we have truth classification but explanation is missing, generate it now
|
368 |
+
if found_tools["truth_classifier"] and not found_tools["explanation_generator"]:
|
369 |
+
logger.info("Explanation generator was not called by the agent, using fallback explanation generation")
|
370 |
+
|
371 |
+
try:
|
372 |
+
# Get the necessary inputs for explanation generation
|
373 |
+
claim = result["claim"]
|
374 |
+
evidence = result["evidence"]
|
375 |
+
truth_label = result["classification"]
|
376 |
+
confidence_value = result["confidence"] # Pass the confidence value
|
377 |
+
classification_results = result.get("classification_results", [])
|
378 |
+
|
379 |
+
# Choose the best available evidence for explanation
|
380 |
+
explanation_evidence = classification_results if classification_results else evidence
|
381 |
+
|
382 |
+
# Generate explanation with confidence value
|
383 |
+
explanation = generate_explanation(claim, explanation_evidence, truth_label, confidence_value)
|
384 |
+
|
385 |
+
# Use the generated explanation
|
386 |
+
if explanation:
|
387 |
+
logger.info(f"Generated fallback explanation: {explanation[:100]}...")
|
388 |
+
result["explanation"] = explanation
|
389 |
+
except Exception as e:
|
390 |
+
logger.error(f"Error generating fallback explanation: {e}")
|
391 |
+
|
392 |
+
# Make sure evidence exists
|
393 |
+
if result["evidence_count"] > 0 and (not result["evidence"] or len(result["evidence"]) == 0):
|
394 |
+
logger.warning("Evidence count is non-zero but evidence list is empty. This is a data inconsistency.")
|
395 |
+
result["evidence_count"] = 0
|
396 |
+
|
397 |
+
# Add debug info about the final result
|
398 |
+
logger.info(f"Final classification: {result['classification']}, confidence: {result['confidence']}")
|
399 |
+
logger.info(f"Final explanation: {result['explanation'][:100]}...")
|
400 |
+
|
401 |
+
# Add performance metrics
|
402 |
+
result["performance"] = performance_tracker.get_summary()
|
403 |
+
|
404 |
+
# Memory management - limit the size of evidence and thoughts
|
405 |
+
# To keep memory usage reasonable for web deployment
|
406 |
+
if "evidence" in result and isinstance(result["evidence"], list):
|
407 |
+
limited_evidence = []
|
408 |
+
for ev in result["evidence"]:
|
409 |
+
if isinstance(ev, str) and len(ev) > 500:
|
410 |
+
limited_evidence.append(ev[:497] + "...")
|
411 |
+
else:
|
412 |
+
limited_evidence.append(ev)
|
413 |
+
result["evidence"] = limited_evidence
|
414 |
+
|
415 |
+
# Limit thoughts to conserve memory
|
416 |
+
if "thoughts" in result and len(result["thoughts"]) > 10:
|
417 |
+
result["thoughts"] = result["thoughts"][:10]
|
418 |
+
|
419 |
+
return result
|
420 |
+
|
421 |
+
except Exception as e:
|
422 |
+
logger.error(f"Error formatting agent response: {str(e)}")
|
423 |
+
logger.error(traceback.format_exc())
|
424 |
+
return {
|
425 |
+
"error": str(e),
|
426 |
+
"traceback": traceback.format_exc(),
|
427 |
+
"classification": "Error",
|
428 |
+
"confidence": 0.1,
|
429 |
+
"explanation": "An error occurred while processing this claim."
|
430 |
+
}
|
app.py
ADDED
@@ -0,0 +1,518 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Main Streamlit application for the Fake News Detector.
|
3 |
+
|
4 |
+
This module implements the user interface for claim verification,
|
5 |
+
rendering the results and handling user interactions. It also
|
6 |
+
manages the application lifecycle including initialization and cleanup.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import streamlit as st
|
10 |
+
import time
|
11 |
+
import json
|
12 |
+
import os
|
13 |
+
import logging
|
14 |
+
import atexit
|
15 |
+
import sys
|
16 |
+
from pathlib import Path
|
17 |
+
|
18 |
+
# Configure logging first, before other imports
|
19 |
+
logging.basicConfig(
|
20 |
+
level=logging.INFO,
|
21 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
22 |
+
handlers=[logging.StreamHandler()]
|
23 |
+
)
|
24 |
+
logger = logging.getLogger("misinformation_detector")
|
25 |
+
|
26 |
+
# Check for critical environment variables
|
27 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
28 |
+
logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.")
|
29 |
+
|
30 |
+
# Import our modules
|
31 |
+
from utils.models import initialize_models
|
32 |
+
from utils.performance import PerformanceTracker
|
33 |
+
|
34 |
+
# Import agent functionality
|
35 |
+
import agent
|
36 |
+
|
37 |
+
# Initialize performance tracker
|
38 |
+
performance_tracker = PerformanceTracker()
|
39 |
+
|
40 |
+
# Ensure data directory exists
|
41 |
+
data_dir = Path("data")
|
42 |
+
if not data_dir.exists():
|
43 |
+
logger.info("Creating data directory")
|
44 |
+
data_dir.mkdir(exist_ok=True)
|
45 |
+
|
46 |
+
# Set page configuration
|
47 |
+
st.set_page_config(
|
48 |
+
page_title="AskVeracity",
|
49 |
+
page_icon="🔍",
|
50 |
+
layout="wide",
|
51 |
+
)
|
52 |
+
|
53 |
+
# Hide the "Press ⌘+Enter to apply" text with CSS
|
54 |
+
st.markdown("""
|
55 |
+
<style>
|
56 |
+
/* Hide the shortcut text that appears at the bottom of text areas */
|
57 |
+
.stTextArea div:has(textarea) + div {
|
58 |
+
visibility: hidden !important;
|
59 |
+
height: 0px !important;
|
60 |
+
position: absolute !important;
|
61 |
+
}
|
62 |
+
</style>
|
63 |
+
""", unsafe_allow_html=True)
|
64 |
+
|
65 |
+
@st.cache_resource
|
66 |
+
def get_agent():
|
67 |
+
"""
|
68 |
+
Initialize and cache the agent for reuse across requests.
|
69 |
+
|
70 |
+
This function creates and caches the fact-checking agent to avoid
|
71 |
+
recreating it for every request. It's decorated with st.cache_resource
|
72 |
+
to ensure the agent is only initialized once per session.
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
object: Initialized LangGraph agent for fact checking
|
76 |
+
"""
|
77 |
+
logger.info("Initializing models and agent (cached)")
|
78 |
+
initialize_models()
|
79 |
+
return agent.setup_agent()
|
80 |
+
|
81 |
+
def cleanup_resources():
|
82 |
+
"""
|
83 |
+
Clean up resources when app is closed.
|
84 |
+
|
85 |
+
This function is registered with atexit to ensure resources
|
86 |
+
are properly released when the application terminates.
|
87 |
+
"""
|
88 |
+
try:
|
89 |
+
# Clear any cached data
|
90 |
+
st.cache_data.clear()
|
91 |
+
|
92 |
+
# Reset performance tracker
|
93 |
+
performance_tracker.reset()
|
94 |
+
|
95 |
+
# Log cleanup
|
96 |
+
logger.info("Resources cleaned up successfully")
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"Error during cleanup: {e}")
|
99 |
+
|
100 |
+
# Register cleanup handler
|
101 |
+
atexit.register(cleanup_resources)
|
102 |
+
|
103 |
+
# App title and description
|
104 |
+
st.title("🔍 AskVeracity")
|
105 |
+
st.markdown("""
|
106 |
+
This is a simple AI-powered tool - a fact-checking system that analyzes claims to determine
|
107 |
+
their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia,
|
108 |
+
news outlets, and academic repositories.
|
109 |
+
""")
|
110 |
+
|
111 |
+
# Sidebar with app information
|
112 |
+
with st.sidebar:
|
113 |
+
st.header("About")
|
114 |
+
st.info(
|
115 |
+
"This system uses a combination of NLP techniques and LLMs to "
|
116 |
+
"extract claims, gather evidence, and classify the truthfulness of statements."
|
117 |
+
)
|
118 |
+
|
119 |
+
# Application information
|
120 |
+
st.markdown("### How It Works")
|
121 |
+
st.info(
|
122 |
+
"1. Enter any recent news or a factual claim\n"
|
123 |
+
"2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n"
|
124 |
+
"3. The system analyzes the evidence to determine truthfulness\n"
|
125 |
+
"4. Results show the verdict with supporting evidence"
|
126 |
+
)
|
127 |
+
|
128 |
+
# Our Mission
|
129 |
+
st.markdown("### Our Mission")
|
130 |
+
st.info(
|
131 |
+
"AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. "
|
132 |
+
"We believe in empowering people with factual information to make informed decisions."
|
133 |
+
)
|
134 |
+
|
135 |
+
# Limitations and Usage
|
136 |
+
st.markdown("### Limitations")
|
137 |
+
st.warning(
|
138 |
+
"Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. "
|
139 |
+
"Performance is typically best with widely-reported news and information published within the last 48 hours. "
|
140 |
+
"Additionally, the system evaluates claims based on current evidence - a claim that was true in the past "
|
141 |
+
"may be judged false if circumstances have changed, and vice versa."
|
142 |
+
)
|
143 |
+
|
144 |
+
# Best Practices
|
145 |
+
st.markdown("### Best Practices")
|
146 |
+
st.success(
|
147 |
+
"For optimal results:\n\n"
|
148 |
+
"• Keep claims short and precise\n\n"
|
149 |
+
"• Include key details in your claim\n\n"
|
150 |
+
"• Phrase claims as direct statements rather than questions\n\n"
|
151 |
+
"• Be specific about who said what"
|
152 |
+
)
|
153 |
+
|
154 |
+
# Example comparison
|
155 |
+
with st.expander("📝 Examples of Effective Claims"):
|
156 |
+
st.markdown("""
|
157 |
+
**Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe."
|
158 |
+
|
159 |
+
**More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction."
|
160 |
+
""")
|
161 |
+
|
162 |
+
# Important Notes
|
163 |
+
st.markdown("### Important Notes")
|
164 |
+
st.info(
|
165 |
+
"• AskVeracity covers general topics and is not specialized in any single domain or location\n\n"
|
166 |
+
"• Results can vary based on available evidence and LLM behavior\n\n"
|
167 |
+
"• The system is designed to indicate uncertainty when evidence is insufficient\n\n"
|
168 |
+
"• AskVeracity is not a chatbot and does not maintain conversation history\n\n"
|
169 |
+
"• We recommend cross-verifying critical information with additional sources"
|
170 |
+
)
|
171 |
+
|
172 |
+
# Privacy Information
|
173 |
+
st.markdown("### Data Privacy")
|
174 |
+
st.info(
|
175 |
+
"We do not collect or store any data about the claims you submit. "
|
176 |
+
"Your interactions are processed by OpenAI's API. Please refer to "
|
177 |
+
"[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices."
|
178 |
+
)
|
179 |
+
|
180 |
+
# Feedback Section
|
181 |
+
st.markdown("### Feedback")
|
182 |
+
st.success(
|
183 |
+
"AskVeracity is evolving and we welcome your feedback to help us improve. "
|
184 |
+
"Please reach out to us with questions, suggestions, or concerns."
|
185 |
+
)
|
186 |
+
|
187 |
+
# Initialize session state variables
|
188 |
+
if 'processing' not in st.session_state:
|
189 |
+
st.session_state.processing = False
|
190 |
+
if 'claim_to_process' not in st.session_state:
|
191 |
+
st.session_state.claim_to_process = ""
|
192 |
+
if 'has_result' not in st.session_state:
|
193 |
+
st.session_state.has_result = False
|
194 |
+
if 'result' not in st.session_state:
|
195 |
+
st.session_state.result = None
|
196 |
+
if 'total_time' not in st.session_state:
|
197 |
+
st.session_state.total_time = 0
|
198 |
+
if 'fresh_state' not in st.session_state:
|
199 |
+
st.session_state.fresh_state = True
|
200 |
+
|
201 |
+
# Main interface
|
202 |
+
st.markdown("### Enter a claim to verify")
|
203 |
+
|
204 |
+
# Input area
|
205 |
+
claim_input = st.text_area("",
|
206 |
+
height=100,
|
207 |
+
placeholder=(
|
208 |
+
"Examples: The Eiffel Tower is located in Rome, Italy. "
|
209 |
+
"Meta recently released its Llama 4 large language model. "
|
210 |
+
"Justin Trudeau is not the Canadian Prime Minister anymore. "
|
211 |
+
"China retaliated with 125% tariffs against U.S. imports. "
|
212 |
+
"A recent piece of news."
|
213 |
+
),
|
214 |
+
key="claim_input_area",
|
215 |
+
label_visibility="collapsed",
|
216 |
+
max_chars=None)
|
217 |
+
|
218 |
+
# Information about result variability
|
219 |
+
st.caption("""
|
220 |
+
💡 **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to:
|
221 |
+
- Incorporate the most recent evidence available
|
222 |
+
- Benefit from the AI's ability to consider multiple perspectives
|
223 |
+
- Adapt to evolving information landscapes
|
224 |
+
""")
|
225 |
+
|
226 |
+
st.warning("⏱️ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.")
|
227 |
+
|
228 |
+
# Button for verifying claim
|
229 |
+
verify_button = st.button(
|
230 |
+
"Verify Claim",
|
231 |
+
type="primary",
|
232 |
+
disabled=st.session_state.processing,
|
233 |
+
key="verify_btn"
|
234 |
+
)
|
235 |
+
|
236 |
+
# Create a clean interface
|
237 |
+
if st.session_state.fresh_state:
|
238 |
+
# Show a clean interface for the first query or when we need to reset
|
239 |
+
analysis_placeholder = st.empty()
|
240 |
+
|
241 |
+
# When button is clicked and not already processing
|
242 |
+
if verify_button and not st.session_state.processing:
|
243 |
+
if not claim_input:
|
244 |
+
st.error("Please enter a claim to verify.")
|
245 |
+
else:
|
246 |
+
# Store the claim and set processing state
|
247 |
+
st.session_state.claim_to_process = claim_input
|
248 |
+
st.session_state.processing = True
|
249 |
+
st.session_state.fresh_state = False
|
250 |
+
# Force a rerun to refresh UI
|
251 |
+
st.rerun()
|
252 |
+
|
253 |
+
else:
|
254 |
+
# This is either during processing or showing results
|
255 |
+
|
256 |
+
# Create a container for processing and results
|
257 |
+
analysis_container = st.container()
|
258 |
+
|
259 |
+
with analysis_container:
|
260 |
+
# If we're processing, show the processing UI
|
261 |
+
if st.session_state.processing:
|
262 |
+
st.subheader("🔄 Processing...")
|
263 |
+
status = st.empty()
|
264 |
+
status.text("Verifying claim... (this may take a while)")
|
265 |
+
progress_bar = st.progress(0)
|
266 |
+
|
267 |
+
# Initialize models and agent if needed
|
268 |
+
if not hasattr(st.session_state, 'agent_initialized'):
|
269 |
+
with st.spinner("Initializing system..."):
|
270 |
+
st.session_state.agent = get_agent()
|
271 |
+
st.session_state.agent_initialized = True
|
272 |
+
|
273 |
+
try:
|
274 |
+
# Use the stored claim for processing
|
275 |
+
claim_to_process = st.session_state.claim_to_process
|
276 |
+
|
277 |
+
# Process the claim with the agent
|
278 |
+
start_time = time.time()
|
279 |
+
result = agent.process_claim(claim_to_process, st.session_state.agent)
|
280 |
+
total_time = time.time() - start_time
|
281 |
+
|
282 |
+
# Update progress as claim processing completes
|
283 |
+
progress_bar.progress(100)
|
284 |
+
|
285 |
+
# Check for None result
|
286 |
+
if result is None:
|
287 |
+
st.error("Failed to process the claim. Please try again.")
|
288 |
+
st.session_state.processing = False
|
289 |
+
st.session_state.fresh_state = True
|
290 |
+
else:
|
291 |
+
# If result exists but key values are missing, provide default values
|
292 |
+
if "classification" not in result or result["classification"] is None:
|
293 |
+
result["classification"] = "Uncertain"
|
294 |
+
|
295 |
+
if "confidence" not in result or result["confidence"] is None:
|
296 |
+
result["confidence"] = 0.6 # Default to 0.6 instead of 0.0
|
297 |
+
|
298 |
+
if "explanation" not in result or result["explanation"] is None:
|
299 |
+
result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim."
|
300 |
+
|
301 |
+
# Update result with timing information
|
302 |
+
if "processing_times" not in result:
|
303 |
+
result["processing_times"] = {"total": total_time}
|
304 |
+
|
305 |
+
# Store the result and timing information
|
306 |
+
st.session_state.result = result
|
307 |
+
st.session_state.total_time = total_time
|
308 |
+
st.session_state.has_result = True
|
309 |
+
st.session_state.processing = False
|
310 |
+
|
311 |
+
# Clear processing indicators before showing results
|
312 |
+
status.empty()
|
313 |
+
progress_bar.empty()
|
314 |
+
|
315 |
+
# Force rerun to display results
|
316 |
+
st.rerun()
|
317 |
+
|
318 |
+
except Exception as e:
|
319 |
+
# Handle any exceptions and reset processing state
|
320 |
+
logger.error(f"Error during claim processing: {str(e)}")
|
321 |
+
st.error(f"An error occurred: {str(e)}")
|
322 |
+
st.session_state.processing = False
|
323 |
+
st.session_state.fresh_state = True
|
324 |
+
# Force rerun to re-enable button
|
325 |
+
st.rerun()
|
326 |
+
|
327 |
+
# Display results if available
|
328 |
+
elif st.session_state.has_result and st.session_state.result:
|
329 |
+
result = st.session_state.result
|
330 |
+
total_time = st.session_state.total_time
|
331 |
+
claim_to_process = st.session_state.claim_to_process
|
332 |
+
|
333 |
+
st.subheader("📊 Verification Results")
|
334 |
+
|
335 |
+
result_col1, result_col2 = st.columns([2, 1])
|
336 |
+
|
337 |
+
with result_col1:
|
338 |
+
# Display both original and processed claim if they differ
|
339 |
+
if "claim" in result and result["claim"] and result["claim"] != claim_to_process:
|
340 |
+
st.markdown(f"**Original Claim:** {claim_to_process}")
|
341 |
+
st.markdown(f"**Processed Claim:** {result['claim']}")
|
342 |
+
else:
|
343 |
+
st.markdown(f"**Claim:** {claim_to_process}")
|
344 |
+
|
345 |
+
# Make verdict colorful based on classification
|
346 |
+
truth_label = result.get('classification', 'Uncertain')
|
347 |
+
if truth_label and "True" in truth_label:
|
348 |
+
verdict_color = "green"
|
349 |
+
elif truth_label and "False" in truth_label:
|
350 |
+
verdict_color = "red"
|
351 |
+
else:
|
352 |
+
verdict_color = "gray"
|
353 |
+
|
354 |
+
st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True)
|
355 |
+
|
356 |
+
# Ensure confidence value is used
|
357 |
+
if "confidence" in result and result["confidence"] is not None:
|
358 |
+
confidence_value = result["confidence"]
|
359 |
+
# Make sure confidence is a numeric value between 0 and 1
|
360 |
+
try:
|
361 |
+
confidence_value = float(confidence_value)
|
362 |
+
if confidence_value < 0:
|
363 |
+
confidence_value = 0.0
|
364 |
+
elif confidence_value > 1:
|
365 |
+
confidence_value = 1.0
|
366 |
+
except (ValueError, TypeError):
|
367 |
+
confidence_value = 0.6 # Fallback to reasonable default
|
368 |
+
else:
|
369 |
+
confidence_value = 0.6 # Default confidence
|
370 |
+
|
371 |
+
# Display the confidence
|
372 |
+
st.markdown(f"**Confidence:** {confidence_value:.2%}")
|
373 |
+
st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}")
|
374 |
+
|
375 |
+
# Add disclaimer about cross-verification
|
376 |
+
st.info("⚠️ **Note:** Please cross-verify important information with additional reliable sources.")
|
377 |
+
|
378 |
+
with result_col2:
|
379 |
+
st.markdown("**Processing Time**")
|
380 |
+
times = result.get("processing_times", {"total": total_time})
|
381 |
+
st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s")
|
382 |
+
|
383 |
+
# Show agent thoughts
|
384 |
+
if "thoughts" in result and result["thoughts"]:
|
385 |
+
st.markdown("**AI Reasoning Process**")
|
386 |
+
thoughts = result.get("thoughts", [])
|
387 |
+
for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts
|
388 |
+
st.markdown(f"{i+1}. {thought}")
|
389 |
+
if len(thoughts) > 5:
|
390 |
+
with st.expander("Show all reasoning steps"):
|
391 |
+
for i, thought in enumerate(thoughts):
|
392 |
+
st.markdown(f"{i+1}. {thought}")
|
393 |
+
|
394 |
+
# Display evidence
|
395 |
+
st.subheader("📝 Evidence")
|
396 |
+
evidence_count = result.get("evidence_count", 0)
|
397 |
+
evidence = result.get("evidence", [])
|
398 |
+
|
399 |
+
# Ensure evidence is a list
|
400 |
+
if not isinstance(evidence, list):
|
401 |
+
if isinstance(evidence, str):
|
402 |
+
# Try to parse string as a list
|
403 |
+
try:
|
404 |
+
import ast
|
405 |
+
parsed_evidence = ast.literal_eval(evidence)
|
406 |
+
if isinstance(parsed_evidence, list):
|
407 |
+
evidence = parsed_evidence
|
408 |
+
else:
|
409 |
+
evidence = [evidence]
|
410 |
+
except:
|
411 |
+
evidence = [evidence]
|
412 |
+
else:
|
413 |
+
evidence = [str(evidence)] if evidence else []
|
414 |
+
|
415 |
+
# Update evidence count based on actual evidence list
|
416 |
+
evidence_count = len(evidence)
|
417 |
+
|
418 |
+
# Check for empty evidence
|
419 |
+
if evidence_count == 0 or not any(ev for ev in evidence if ev):
|
420 |
+
st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.")
|
421 |
+
else:
|
422 |
+
st.markdown(f"Retrieved {evidence_count} pieces of evidence")
|
423 |
+
|
424 |
+
# Get classification results
|
425 |
+
classification_results = result.get("classification_results", [])
|
426 |
+
|
427 |
+
# Only show evidence tabs if we have evidence
|
428 |
+
if evidence and any(ev for ev in evidence if ev):
|
429 |
+
# Create tabs for different evidence categories
|
430 |
+
evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"])
|
431 |
+
|
432 |
+
with evidence_tabs[0]:
|
433 |
+
for i, ev in enumerate(evidence):
|
434 |
+
if ev and isinstance(ev, str) and ev.strip(): # Only show non-empty evidence
|
435 |
+
with st.expander(f"Evidence {i+1}", expanded=i==0):
|
436 |
+
st.text(ev)
|
437 |
+
|
438 |
+
with evidence_tabs[1]:
|
439 |
+
if classification_results:
|
440 |
+
# Check if classification_results items have the expected format
|
441 |
+
valid_results = []
|
442 |
+
for res in classification_results:
|
443 |
+
if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res:
|
444 |
+
if res.get("evidence"): # Only include results with actual evidence
|
445 |
+
valid_results.append(res)
|
446 |
+
|
447 |
+
if valid_results:
|
448 |
+
sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True)
|
449 |
+
top_results = sorted_results[:min(3, len(sorted_results))]
|
450 |
+
|
451 |
+
for i, res in enumerate(top_results):
|
452 |
+
with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0):
|
453 |
+
st.text(res.get("evidence", "No evidence text available"))
|
454 |
+
st.markdown(f"**Classification:** {res.get('label', 'unknown')}")
|
455 |
+
else:
|
456 |
+
# If no valid results, just show the evidence
|
457 |
+
shown = False
|
458 |
+
for i, ev in enumerate(evidence[:3]):
|
459 |
+
if ev and isinstance(ev, str) and ev.strip():
|
460 |
+
with st.expander(f"Evidence {i+1}", expanded=i==0):
|
461 |
+
st.text(ev)
|
462 |
+
shown = True
|
463 |
+
if not shown:
|
464 |
+
st.info("No detailed classification results available.")
|
465 |
+
else:
|
466 |
+
# Just show regular evidence if no classification details
|
467 |
+
shown = False
|
468 |
+
for i, ev in enumerate(evidence[:3]):
|
469 |
+
if ev and isinstance(ev, str) and ev.strip():
|
470 |
+
with st.expander(f"Evidence {i+1}", expanded=i==0):
|
471 |
+
st.text(ev)
|
472 |
+
shown = True
|
473 |
+
if not shown:
|
474 |
+
st.info("No detailed classification results available.")
|
475 |
+
|
476 |
+
with evidence_tabs[2]:
|
477 |
+
evidence_sources = {}
|
478 |
+
for ev in evidence:
|
479 |
+
if not ev or not isinstance(ev, str):
|
480 |
+
continue
|
481 |
+
|
482 |
+
source = "Unknown"
|
483 |
+
# Extract source info from evidence text
|
484 |
+
if "URL:" in ev:
|
485 |
+
import re
|
486 |
+
url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev)
|
487 |
+
if url_match:
|
488 |
+
source = url_match.group(1)
|
489 |
+
|
490 |
+
if source in evidence_sources:
|
491 |
+
evidence_sources[source] += 1
|
492 |
+
else:
|
493 |
+
evidence_sources[source] = 1
|
494 |
+
|
495 |
+
# Display evidence source distribution
|
496 |
+
if evidence_sources:
|
497 |
+
st.markdown("**Evidence Source Distribution**")
|
498 |
+
for source, count in evidence_sources.items():
|
499 |
+
st.markdown(f"- {source}: {count} item(s)")
|
500 |
+
else:
|
501 |
+
st.info("No source information available in the evidence.")
|
502 |
+
else:
|
503 |
+
st.warning("No evidence was retrieved for this claim.")
|
504 |
+
|
505 |
+
# Button to start a new verification
|
506 |
+
if st.button("Verify Another Claim", type="primary", key="new_verify_btn"):
|
507 |
+
# Reset to fresh state for a new verification
|
508 |
+
st.session_state.fresh_state = True
|
509 |
+
st.session_state.has_result = False
|
510 |
+
st.session_state.result = None
|
511 |
+
st.rerun()
|
512 |
+
|
513 |
+
# Footer with additional information
|
514 |
+
st.markdown("---")
|
515 |
+
st.caption("""
|
516 |
+
**AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis.
|
517 |
+
While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information.
|
518 |
+
""")
|
config.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configuration module for the Fake News Detector application.
|
3 |
+
|
4 |
+
This module handles loading configuration parameters, API keys,
|
5 |
+
and source credibility data needed for the fact checking system.
|
6 |
+
It manages environment variables and file-based configurations.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import os
|
10 |
+
import json
|
11 |
+
import logging
|
12 |
+
from pathlib import Path
|
13 |
+
|
14 |
+
# Configure logger
|
15 |
+
logger = logging.getLogger("misinformation_detector")
|
16 |
+
|
17 |
+
# Base paths
|
18 |
+
ROOT_DIR = Path(__file__).parent.absolute()
|
19 |
+
DATA_DIR = ROOT_DIR / "data"
|
20 |
+
|
21 |
+
# Ensure data directory exists
|
22 |
+
DATA_DIR.mkdir(exist_ok=True)
|
23 |
+
|
24 |
+
# First try to get API keys from Streamlit secrets, then fall back to environment variables
|
25 |
+
# try:
|
26 |
+
# import streamlit as st
|
27 |
+
# OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
28 |
+
# NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
|
29 |
+
# FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
|
30 |
+
# except (AttributeError, ImportError):
|
31 |
+
# # Fall back to environment variables if Streamlit secrets aren't available
|
32 |
+
# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
33 |
+
# NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
|
34 |
+
# FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
|
35 |
+
|
36 |
+
|
37 |
+
try:
|
38 |
+
import streamlit as st
|
39 |
+
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
40 |
+
NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
|
41 |
+
FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
|
42 |
+
except (AttributeError, ImportError):
|
43 |
+
# For local testing only - REMOVE BEFORE COMMITTING!
|
44 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY")
|
45 |
+
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3")
|
46 |
+
FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA")
|
47 |
+
|
48 |
+
# Log secrets status (but not the values)
|
49 |
+
if OPENAI_API_KEY:
|
50 |
+
logger.info("OPENAI_API_KEY is set")
|
51 |
+
else:
|
52 |
+
logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
|
53 |
+
|
54 |
+
if NEWS_API_KEY:
|
55 |
+
logger.info("NEWS_API_KEY is set")
|
56 |
+
else:
|
57 |
+
logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
|
58 |
+
|
59 |
+
if FACTCHECK_API_KEY:
|
60 |
+
logger.info("FACTCHECK_API_KEY is set")
|
61 |
+
else:
|
62 |
+
logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
|
63 |
+
|
64 |
+
# Set API key in environment to ensure it's available to all components
|
65 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
66 |
+
|
67 |
+
# Source credibility file path
|
68 |
+
source_cred_file = DATA_DIR / "source_credibility.json"
|
69 |
+
|
70 |
+
def load_source_credibility():
|
71 |
+
"""
|
72 |
+
Load source credibility data from JSON file
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
dict: Mapping of domain names to credibility scores (0-1)
|
76 |
+
Empty dict if file is not found or has errors
|
77 |
+
"""
|
78 |
+
try:
|
79 |
+
if source_cred_file.exists():
|
80 |
+
with open(source_cred_file, 'r') as f:
|
81 |
+
return json.load(f)
|
82 |
+
else:
|
83 |
+
logger.warning(f"Source credibility file not found: {source_cred_file}")
|
84 |
+
return {}
|
85 |
+
except Exception as e:
|
86 |
+
logger.error(f"Error loading source credibility file: {e}")
|
87 |
+
return {}
|
88 |
+
|
89 |
+
# Load source credibility once at module import
|
90 |
+
SOURCE_CREDIBILITY = load_source_credibility()
|
91 |
+
|
92 |
+
# Rate limiting configuration
|
93 |
+
RATE_LIMITS = {
|
94 |
+
# api_name: {"requests": max_requests, "period": period_in_seconds}
|
95 |
+
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
|
96 |
+
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
|
97 |
+
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
|
98 |
+
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
|
99 |
+
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
|
100 |
+
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
|
101 |
+
}
|
102 |
+
|
103 |
+
# Error backoff settings
|
104 |
+
ERROR_BACKOFF = {
|
105 |
+
"max_retries": 5,
|
106 |
+
"initial_backoff": 1, # seconds
|
107 |
+
"backoff_factor": 2, # exponential backoff
|
108 |
+
}
|
109 |
+
|
110 |
+
# RSS feed settings
|
111 |
+
RSS_SETTINGS = {
|
112 |
+
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
|
113 |
+
"max_age_days": 3, # Maximum age of RSS items to consider
|
114 |
+
"timeout_seconds": 5, # Timeout for RSS feed requests
|
115 |
+
"max_workers": 5 # Number of parallel workers for fetching feeds
|
116 |
+
}
|
117 |
+
|
118 |
+
# Semantic analysis settings
|
119 |
+
SEMANTIC_ANALYSIS_CONFIG = {
|
120 |
+
"similarity_weight": 0.4, # Weight for semantic similarity
|
121 |
+
"entity_overlap_weight": 0.3, # Weight for entity matching
|
122 |
+
"base_weight": 0.3, # Base relevance weight
|
123 |
+
"temporal_boost": 1.2, # Boost for recent evidence
|
124 |
+
"temporal_penalty": 0.7, # Penalty for outdated evidence
|
125 |
+
"authority_boosts": {
|
126 |
+
"scientific_consensus": 1.8,
|
127 |
+
"fact_check": 1.5,
|
128 |
+
"high_authority": 1.3
|
129 |
+
}
|
130 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.32.0
|
2 |
+
langchain>=0.1.6
|
3 |
+
langchain_openai>=0.0.5
|
4 |
+
langchain_core>=0.1.25
|
5 |
+
langgraph>=0.0.27
|
6 |
+
transformers==4.36.2
|
7 |
+
requests==2.31.0
|
8 |
+
beautifulsoup4==4.12.2
|
9 |
+
langdetect==1.0.9
|
10 |
+
spacy==3.7.2
|
11 |
+
SPARQLWrapper==2.0.0
|
12 |
+
python-dotenv==1.0.0
|
13 |
+
pydantic==2.5.3
|
14 |
+
feedparser==6.0.10
|
15 |
+
scikit-learn>=1.3.0
|
16 |
+
numpy>=1.21.0
|
17 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl
|