ankanghosh commited on
Commit
7130eb6
·
verified ·
1 Parent(s): 1dfaa8e

Upload 7 files.

Browse files
Files changed (6) hide show
  1. LICENSE +21 -0
  2. __init__.py +1 -0
  3. agent.py +430 -0
  4. app.py +518 -0
  5. config.py +130 -0
  6. requirements.txt +17 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 AskVeracity
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Root package initialization
agent.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent module for the Fake News Detector application.
3
+
4
+ This module implements a LangGraph-based agent that orchestrates
5
+ the fact-checking process. It defines the agent setup, tools,
6
+ and processing pipeline for claim verification.
7
+ """
8
+
9
+ import os
10
+ import time
11
+ import logging
12
+ import traceback
13
+ from langchain_core.tools import tool
14
+ from langchain.prompts import PromptTemplate
15
+ from langgraph.prebuilt import create_react_agent
16
+
17
+ from utils.models import get_llm_model
18
+ from utils.performance import PerformanceTracker
19
+ from modules.claim_extraction import extract_claims
20
+ from modules.evidence_retrieval import retrieve_combined_evidence
21
+ from modules.classification import classify_with_llm, aggregate_evidence
22
+ from modules.explanation import generate_explanation
23
+
24
+ # Configure logger
25
+ logger = logging.getLogger("misinformation_detector")
26
+
27
+ # Reference to global performance tracker
28
+ performance_tracker = PerformanceTracker()
29
+
30
+ # Define LangGraph Tools
31
+ @tool
32
+ def claim_extractor(query):
33
+ """
34
+ Tool that extracts factual claims from a given text.
35
+
36
+ Args:
37
+ query (str): Text containing potential factual claims
38
+
39
+ Returns:
40
+ str: Extracted factual claim
41
+ """
42
+ performance_tracker.log_claim_processed()
43
+ return extract_claims(query)
44
+
45
+ @tool
46
+ def evidence_retriever(query):
47
+ """
48
+ Tool that retrieves evidence from multiple sources for a claim.
49
+
50
+ Args:
51
+ query (str): The factual claim to gather evidence for
52
+
53
+ Returns:
54
+ list: List of evidence items from various sources
55
+ """
56
+ return retrieve_combined_evidence(query)
57
+
58
+ @tool
59
+ def truth_classifier(query, evidence):
60
+ """
61
+ Tool that classifies the truthfulness of a claim based on evidence.
62
+
63
+ Args:
64
+ query (str): The factual claim to classify
65
+ evidence (list): Evidence items to evaluate
66
+
67
+ Returns:
68
+ str: JSON string containing verdict, confidence, and results
69
+ """
70
+ classification_results = classify_with_llm(query, evidence)
71
+ truth_label, confidence = aggregate_evidence(classification_results)
72
+
73
+ # Debug logging
74
+ logger.info(f"Classification results: {len(classification_results)} items")
75
+ logger.info(f"Aggregate result: {truth_label}, confidence: {confidence}")
76
+
77
+ # Ensure confidence is at least 0.6 for any definitive verdict
78
+ if "True" in truth_label or "False" in truth_label:
79
+ confidence = max(confidence, 0.6)
80
+
81
+ # Return a dictionary with all needed information
82
+ result = {
83
+ "verdict": truth_label,
84
+ "confidence": confidence,
85
+ "results": classification_results
86
+ }
87
+
88
+ # Convert to string for consistent handling
89
+ import json
90
+ return json.dumps(result)
91
+
92
+ @tool
93
+ def explanation_generator(claim, evidence_results, truth_label):
94
+ """
95
+ Tool that generates a human-readable explanation for the verdict.
96
+
97
+ Args:
98
+ claim (str): The factual claim being verified
99
+ evidence_results (list): Evidence items and classification results
100
+ truth_label (str): The verdict (True/False/Uncertain)
101
+
102
+ Returns:
103
+ str: Natural language explanation of the verdict
104
+ """
105
+ explanation = generate_explanation(claim, evidence_results, truth_label)
106
+ logger.info(f"Generated explanation: {explanation[:100]}...")
107
+ return explanation
108
+
109
+ def setup_agent():
110
+ """
111
+ Create and configure a ReAct agent with the fact-checking tools.
112
+
113
+ This function configures a LangGraph ReAct agent with all the
114
+ necessary tools for fact checking, including claim extraction,
115
+ evidence retrieval, classification, and explanation generation.
116
+
117
+ Returns:
118
+ object: Configured LangGraph agent ready for claim processing
119
+
120
+ Raises:
121
+ ValueError: If OpenAI API key is not set
122
+ """
123
+ # Make sure OpenAI API key is set
124
+ if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
125
+ logger.error("OPENAI_API_KEY environment variable not set or empty.")
126
+ raise ValueError("OpenAI API key is required")
127
+
128
+ # Define tools with any customizations
129
+ tools = [
130
+ claim_extractor,
131
+ evidence_retriever,
132
+ truth_classifier,
133
+ explanation_generator
134
+ ]
135
+
136
+ # Define the prompt template with clearer, more efficient instructions
137
+ FORMAT_INSTRUCTIONS_TEMPLATE = """
138
+ Use the following format:
139
+ Question: the input question you must answer
140
+ Action: the action to take, should be one of: {tool_names}
141
+ Action Input: the input to the action
142
+ Observation: the result of the action
143
+ ... (this Action/Action Input/Observation can repeat N times)
144
+ Final Answer: the final answer to the original input question
145
+ """
146
+
147
+ prompt = PromptTemplate(
148
+ input_variables=["input", "tool_names"],
149
+ template=f"""
150
+ You are a fact-checking assistant that verifies claims by gathering evidence and
151
+ determining their truthfulness. Follow these exact steps in sequence:
152
+
153
+ 1. Call claim_extractor to extract the main factual claim
154
+ 2. Call evidence_retriever to gather evidence about the claim
155
+ 3. Call truth_classifier to evaluate the claim using the evidence
156
+ 4. Call explanation_generator to explain the result
157
+ 5. Provide your Final Answer that summarizes everything
158
+
159
+ Execute these steps in order without unnecessary thinking steps between tool calls.
160
+ Be direct and efficient in your verification process.
161
+
162
+ {FORMAT_INSTRUCTIONS_TEMPLATE}
163
+ """
164
+ )
165
+
166
+ try:
167
+ # Get the LLM model
168
+ model = get_llm_model()
169
+
170
+ # Create the agent with a shorter timeout
171
+ graph = create_react_agent(model, tools=tools)
172
+ logger.info("Agent created successfully")
173
+ return graph
174
+ except Exception as e:
175
+ logger.error(f"Error creating agent: {str(e)}")
176
+ raise e
177
+
178
+ def process_claim(claim, agent=None, recursion_limit=20):
179
+ """
180
+ Process a claim to determine its truthfulness using the agent.
181
+
182
+ This function invokes the LangGraph agent to process a factual claim,
183
+ extract supporting evidence, evaluate the claim's truthfulness, and
184
+ generate a human-readable explanation.
185
+
186
+ Args:
187
+ claim (str): The factual claim to be verified
188
+ agent (object, optional): Initialized LangGraph agent. If None, an error is logged.
189
+ recursion_limit (int, optional): Maximum recursion depth for agent. Default: 20.
190
+ Higher values allow more complex reasoning but increase processing time.
191
+
192
+ Returns:
193
+ dict: Result dictionary containing:
194
+ - claim: Extracted factual claim
195
+ - evidence: List of evidence pieces
196
+ - evidence_count: Number of evidence pieces
197
+ - classification: Verdict (True/False/Uncertain)
198
+ - confidence: Confidence score (0-1)
199
+ - explanation: Human-readable explanation of the verdict
200
+ - final_answer: Final answer from the agent
201
+ - Or error information if processing failed
202
+ """
203
+ if agent is None:
204
+ logger.error("Agent not initialized. Call setup_agent() first.")
205
+ return None
206
+
207
+ start_time = time.time()
208
+ logger.info(f"Processing claim with agent: {claim}")
209
+
210
+ try:
211
+ # Format inputs for the agent
212
+ inputs = {"messages": [("user", claim)]}
213
+
214
+ # Set configuration - reduced recursion limit for faster processing
215
+ config = {"recursion_limit": recursion_limit}
216
+
217
+ # Invoke the agent
218
+ response = agent.invoke(inputs, config)
219
+
220
+ # Format the response
221
+ result = format_response(response)
222
+
223
+ # Log performance
224
+ elapsed = time.time() - start_time
225
+ logger.info(f"Claim processed in {elapsed:.2f} seconds")
226
+
227
+ return result
228
+
229
+ except Exception as e:
230
+ logger.error(f"Error processing claim with agent: {str(e)}")
231
+ logger.error(traceback.format_exc())
232
+ return {"error": str(e)}
233
+
234
+ def format_response(response):
235
+ """
236
+ Format the agent's response into a structured result.
237
+
238
+ This function extracts key information from the agent's response,
239
+ including the claim, evidence, classification, and explanation.
240
+ It also performs error handling and provides fallback values.
241
+
242
+ Args:
243
+ response (dict): Raw response from the LangGraph agent
244
+
245
+ Returns:
246
+ dict: Structured result containing claim verification data
247
+ """
248
+ try:
249
+ if not response or "messages" not in response:
250
+ return {"error": "Invalid response format"}
251
+
252
+ messages = response.get("messages", [])
253
+
254
+ # Initialize result container with default values
255
+ result = {
256
+ "claim": None,
257
+ "evidence": [],
258
+ "evidence_count": 0,
259
+ "classification": "Uncertain",
260
+ "confidence": 0.2, # Default low confidence
261
+ "explanation": "Insufficient evidence to evaluate this claim.",
262
+ "final_answer": None,
263
+ "thoughts": []
264
+ }
265
+
266
+ # Track if we found results from each tool
267
+ found_tools = {
268
+ "claim_extractor": False,
269
+ "evidence_retriever": False,
270
+ "truth_classifier": False,
271
+ "explanation_generator": False
272
+ }
273
+
274
+ # Extract information from messages
275
+ tool_outputs = {}
276
+
277
+ for idx, message in enumerate(messages):
278
+ # Extract agent thoughts
279
+ if hasattr(message, "content") and getattr(message, "type", "") == "assistant":
280
+ content = message.content
281
+ if "Thought:" in content:
282
+ thought_parts = content.split("Thought:", 1)
283
+ if len(thought_parts) > 1:
284
+ thought = thought_parts[1].split("\n")[0].strip()
285
+ result["thoughts"].append(thought)
286
+
287
+ # Extract tool outputs
288
+ if hasattr(message, "type") and message.type == "tool":
289
+ tool_name = getattr(message, "name", "unknown")
290
+
291
+ # Store tool outputs
292
+ tool_outputs[tool_name] = message.content
293
+
294
+ # Extract specific information
295
+ if tool_name == "claim_extractor":
296
+ found_tools["claim_extractor"] = True
297
+ if message.content:
298
+ result["claim"] = message.content
299
+
300
+ elif tool_name == "evidence_retriever":
301
+ found_tools["evidence_retriever"] = True
302
+ # Handle string representation of a list
303
+ if message.content:
304
+ if isinstance(message.content, list):
305
+ result["evidence"] = message.content
306
+ result["evidence_count"] = len(message.content)
307
+ elif isinstance(message.content, str) and message.content.startswith("[") and message.content.endswith("]"):
308
+ try:
309
+ import ast
310
+ parsed_content = ast.literal_eval(message.content)
311
+ if isinstance(parsed_content, list):
312
+ result["evidence"] = parsed_content
313
+ result["evidence_count"] = len(parsed_content)
314
+ else:
315
+ result["evidence"] = [message.content]
316
+ result["evidence_count"] = 1
317
+ except:
318
+ result["evidence"] = [message.content]
319
+ result["evidence_count"] = 1
320
+ else:
321
+ result["evidence"] = [message.content]
322
+ result["evidence_count"] = 1
323
+ logger.warning(f"Evidence retrieved is not a list: {type(message.content)}")
324
+
325
+ elif tool_name == "truth_classifier":
326
+ found_tools["truth_classifier"] = True
327
+
328
+ # Log the incoming content for debugging
329
+ logger.info(f"Truth classifier content type: {type(message.content)}")
330
+ logger.info(f"Truth classifier content: {message.content}")
331
+
332
+ # Handle JSON formatted result from truth_classifier
333
+ if isinstance(message.content, str):
334
+ try:
335
+ import json
336
+ # Parse the JSON string
337
+ parsed_content = json.loads(message.content)
338
+
339
+ # Extract the values from the parsed content
340
+ result["classification"] = parsed_content.get("verdict", "Uncertain")
341
+ result["confidence"] = float(parsed_content.get("confidence", 0.2))
342
+ result["classification_results"] = parsed_content.get("results", [])
343
+
344
+ logger.info(f"Extracted from JSON: verdict={result['classification']}, confidence={result['confidence']}")
345
+ except json.JSONDecodeError:
346
+ logger.warning(f"Could not parse truth classifier JSON: {message.content}")
347
+ except Exception as e:
348
+ logger.warning(f"Error extracting from truth classifier output: {e}")
349
+ else:
350
+ logger.warning(f"Unexpected truth_classifier content format: {message.content}")
351
+
352
+ elif tool_name == "explanation_generator":
353
+ found_tools["explanation_generator"] = True
354
+ if message.content:
355
+ result["explanation"] = message.content
356
+ logger.info(f"Found explanation from tool: {message.content[:100]}...")
357
+
358
+ # Get final answer from last message
359
+ elif idx == len(messages) - 1 and hasattr(message, "content"):
360
+ result["final_answer"] = message.content
361
+
362
+ # Log which tools weren't found
363
+ missing_tools = [tool for tool, found in found_tools.items() if not found]
364
+ if missing_tools:
365
+ logger.warning(f"Missing tool outputs in response: {', '.join(missing_tools)}")
366
+
367
+ # FALLBACK: If we have truth classification but explanation is missing, generate it now
368
+ if found_tools["truth_classifier"] and not found_tools["explanation_generator"]:
369
+ logger.info("Explanation generator was not called by the agent, using fallback explanation generation")
370
+
371
+ try:
372
+ # Get the necessary inputs for explanation generation
373
+ claim = result["claim"]
374
+ evidence = result["evidence"]
375
+ truth_label = result["classification"]
376
+ confidence_value = result["confidence"] # Pass the confidence value
377
+ classification_results = result.get("classification_results", [])
378
+
379
+ # Choose the best available evidence for explanation
380
+ explanation_evidence = classification_results if classification_results else evidence
381
+
382
+ # Generate explanation with confidence value
383
+ explanation = generate_explanation(claim, explanation_evidence, truth_label, confidence_value)
384
+
385
+ # Use the generated explanation
386
+ if explanation:
387
+ logger.info(f"Generated fallback explanation: {explanation[:100]}...")
388
+ result["explanation"] = explanation
389
+ except Exception as e:
390
+ logger.error(f"Error generating fallback explanation: {e}")
391
+
392
+ # Make sure evidence exists
393
+ if result["evidence_count"] > 0 and (not result["evidence"] or len(result["evidence"]) == 0):
394
+ logger.warning("Evidence count is non-zero but evidence list is empty. This is a data inconsistency.")
395
+ result["evidence_count"] = 0
396
+
397
+ # Add debug info about the final result
398
+ logger.info(f"Final classification: {result['classification']}, confidence: {result['confidence']}")
399
+ logger.info(f"Final explanation: {result['explanation'][:100]}...")
400
+
401
+ # Add performance metrics
402
+ result["performance"] = performance_tracker.get_summary()
403
+
404
+ # Memory management - limit the size of evidence and thoughts
405
+ # To keep memory usage reasonable for web deployment
406
+ if "evidence" in result and isinstance(result["evidence"], list):
407
+ limited_evidence = []
408
+ for ev in result["evidence"]:
409
+ if isinstance(ev, str) and len(ev) > 500:
410
+ limited_evidence.append(ev[:497] + "...")
411
+ else:
412
+ limited_evidence.append(ev)
413
+ result["evidence"] = limited_evidence
414
+
415
+ # Limit thoughts to conserve memory
416
+ if "thoughts" in result and len(result["thoughts"]) > 10:
417
+ result["thoughts"] = result["thoughts"][:10]
418
+
419
+ return result
420
+
421
+ except Exception as e:
422
+ logger.error(f"Error formatting agent response: {str(e)}")
423
+ logger.error(traceback.format_exc())
424
+ return {
425
+ "error": str(e),
426
+ "traceback": traceback.format_exc(),
427
+ "classification": "Error",
428
+ "confidence": 0.1,
429
+ "explanation": "An error occurred while processing this claim."
430
+ }
app.py ADDED
@@ -0,0 +1,518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main Streamlit application for the Fake News Detector.
3
+
4
+ This module implements the user interface for claim verification,
5
+ rendering the results and handling user interactions. It also
6
+ manages the application lifecycle including initialization and cleanup.
7
+ """
8
+
9
+ import streamlit as st
10
+ import time
11
+ import json
12
+ import os
13
+ import logging
14
+ import atexit
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ # Configure logging first, before other imports
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
22
+ handlers=[logging.StreamHandler()]
23
+ )
24
+ logger = logging.getLogger("misinformation_detector")
25
+
26
+ # Check for critical environment variables
27
+ if not os.environ.get("OPENAI_API_KEY"):
28
+ logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.")
29
+
30
+ # Import our modules
31
+ from utils.models import initialize_models
32
+ from utils.performance import PerformanceTracker
33
+
34
+ # Import agent functionality
35
+ import agent
36
+
37
+ # Initialize performance tracker
38
+ performance_tracker = PerformanceTracker()
39
+
40
+ # Ensure data directory exists
41
+ data_dir = Path("data")
42
+ if not data_dir.exists():
43
+ logger.info("Creating data directory")
44
+ data_dir.mkdir(exist_ok=True)
45
+
46
+ # Set page configuration
47
+ st.set_page_config(
48
+ page_title="AskVeracity",
49
+ page_icon="🔍",
50
+ layout="wide",
51
+ )
52
+
53
+ # Hide the "Press ⌘+Enter to apply" text with CSS
54
+ st.markdown("""
55
+ <style>
56
+ /* Hide the shortcut text that appears at the bottom of text areas */
57
+ .stTextArea div:has(textarea) + div {
58
+ visibility: hidden !important;
59
+ height: 0px !important;
60
+ position: absolute !important;
61
+ }
62
+ </style>
63
+ """, unsafe_allow_html=True)
64
+
65
+ @st.cache_resource
66
+ def get_agent():
67
+ """
68
+ Initialize and cache the agent for reuse across requests.
69
+
70
+ This function creates and caches the fact-checking agent to avoid
71
+ recreating it for every request. It's decorated with st.cache_resource
72
+ to ensure the agent is only initialized once per session.
73
+
74
+ Returns:
75
+ object: Initialized LangGraph agent for fact checking
76
+ """
77
+ logger.info("Initializing models and agent (cached)")
78
+ initialize_models()
79
+ return agent.setup_agent()
80
+
81
+ def cleanup_resources():
82
+ """
83
+ Clean up resources when app is closed.
84
+
85
+ This function is registered with atexit to ensure resources
86
+ are properly released when the application terminates.
87
+ """
88
+ try:
89
+ # Clear any cached data
90
+ st.cache_data.clear()
91
+
92
+ # Reset performance tracker
93
+ performance_tracker.reset()
94
+
95
+ # Log cleanup
96
+ logger.info("Resources cleaned up successfully")
97
+ except Exception as e:
98
+ logger.error(f"Error during cleanup: {e}")
99
+
100
+ # Register cleanup handler
101
+ atexit.register(cleanup_resources)
102
+
103
+ # App title and description
104
+ st.title("🔍 AskVeracity")
105
+ st.markdown("""
106
+ This is a simple AI-powered tool - a fact-checking system that analyzes claims to determine
107
+ their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia,
108
+ news outlets, and academic repositories.
109
+ """)
110
+
111
+ # Sidebar with app information
112
+ with st.sidebar:
113
+ st.header("About")
114
+ st.info(
115
+ "This system uses a combination of NLP techniques and LLMs to "
116
+ "extract claims, gather evidence, and classify the truthfulness of statements."
117
+ )
118
+
119
+ # Application information
120
+ st.markdown("### How It Works")
121
+ st.info(
122
+ "1. Enter any recent news or a factual claim\n"
123
+ "2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n"
124
+ "3. The system analyzes the evidence to determine truthfulness\n"
125
+ "4. Results show the verdict with supporting evidence"
126
+ )
127
+
128
+ # Our Mission
129
+ st.markdown("### Our Mission")
130
+ st.info(
131
+ "AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. "
132
+ "We believe in empowering people with factual information to make informed decisions."
133
+ )
134
+
135
+ # Limitations and Usage
136
+ st.markdown("### Limitations")
137
+ st.warning(
138
+ "Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. "
139
+ "Performance is typically best with widely-reported news and information published within the last 48 hours. "
140
+ "Additionally, the system evaluates claims based on current evidence - a claim that was true in the past "
141
+ "may be judged false if circumstances have changed, and vice versa."
142
+ )
143
+
144
+ # Best Practices
145
+ st.markdown("### Best Practices")
146
+ st.success(
147
+ "For optimal results:\n\n"
148
+ "• Keep claims short and precise\n\n"
149
+ "• Include key details in your claim\n\n"
150
+ "• Phrase claims as direct statements rather than questions\n\n"
151
+ "• Be specific about who said what"
152
+ )
153
+
154
+ # Example comparison
155
+ with st.expander("📝 Examples of Effective Claims"):
156
+ st.markdown("""
157
+ **Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe."
158
+
159
+ **More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction."
160
+ """)
161
+
162
+ # Important Notes
163
+ st.markdown("### Important Notes")
164
+ st.info(
165
+ "• AskVeracity covers general topics and is not specialized in any single domain or location\n\n"
166
+ "• Results can vary based on available evidence and LLM behavior\n\n"
167
+ "• The system is designed to indicate uncertainty when evidence is insufficient\n\n"
168
+ "• AskVeracity is not a chatbot and does not maintain conversation history\n\n"
169
+ "• We recommend cross-verifying critical information with additional sources"
170
+ )
171
+
172
+ # Privacy Information
173
+ st.markdown("### Data Privacy")
174
+ st.info(
175
+ "We do not collect or store any data about the claims you submit. "
176
+ "Your interactions are processed by OpenAI's API. Please refer to "
177
+ "[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices."
178
+ )
179
+
180
+ # Feedback Section
181
+ st.markdown("### Feedback")
182
+ st.success(
183
+ "AskVeracity is evolving and we welcome your feedback to help us improve. "
184
+ "Please reach out to us with questions, suggestions, or concerns."
185
+ )
186
+
187
+ # Initialize session state variables
188
+ if 'processing' not in st.session_state:
189
+ st.session_state.processing = False
190
+ if 'claim_to_process' not in st.session_state:
191
+ st.session_state.claim_to_process = ""
192
+ if 'has_result' not in st.session_state:
193
+ st.session_state.has_result = False
194
+ if 'result' not in st.session_state:
195
+ st.session_state.result = None
196
+ if 'total_time' not in st.session_state:
197
+ st.session_state.total_time = 0
198
+ if 'fresh_state' not in st.session_state:
199
+ st.session_state.fresh_state = True
200
+
201
+ # Main interface
202
+ st.markdown("### Enter a claim to verify")
203
+
204
+ # Input area
205
+ claim_input = st.text_area("",
206
+ height=100,
207
+ placeholder=(
208
+ "Examples: The Eiffel Tower is located in Rome, Italy. "
209
+ "Meta recently released its Llama 4 large language model. "
210
+ "Justin Trudeau is not the Canadian Prime Minister anymore. "
211
+ "China retaliated with 125% tariffs against U.S. imports. "
212
+ "A recent piece of news."
213
+ ),
214
+ key="claim_input_area",
215
+ label_visibility="collapsed",
216
+ max_chars=None)
217
+
218
+ # Information about result variability
219
+ st.caption("""
220
+ 💡 **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to:
221
+ - Incorporate the most recent evidence available
222
+ - Benefit from the AI's ability to consider multiple perspectives
223
+ - Adapt to evolving information landscapes
224
+ """)
225
+
226
+ st.warning("⏱️ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.")
227
+
228
+ # Button for verifying claim
229
+ verify_button = st.button(
230
+ "Verify Claim",
231
+ type="primary",
232
+ disabled=st.session_state.processing,
233
+ key="verify_btn"
234
+ )
235
+
236
+ # Create a clean interface
237
+ if st.session_state.fresh_state:
238
+ # Show a clean interface for the first query or when we need to reset
239
+ analysis_placeholder = st.empty()
240
+
241
+ # When button is clicked and not already processing
242
+ if verify_button and not st.session_state.processing:
243
+ if not claim_input:
244
+ st.error("Please enter a claim to verify.")
245
+ else:
246
+ # Store the claim and set processing state
247
+ st.session_state.claim_to_process = claim_input
248
+ st.session_state.processing = True
249
+ st.session_state.fresh_state = False
250
+ # Force a rerun to refresh UI
251
+ st.rerun()
252
+
253
+ else:
254
+ # This is either during processing or showing results
255
+
256
+ # Create a container for processing and results
257
+ analysis_container = st.container()
258
+
259
+ with analysis_container:
260
+ # If we're processing, show the processing UI
261
+ if st.session_state.processing:
262
+ st.subheader("🔄 Processing...")
263
+ status = st.empty()
264
+ status.text("Verifying claim... (this may take a while)")
265
+ progress_bar = st.progress(0)
266
+
267
+ # Initialize models and agent if needed
268
+ if not hasattr(st.session_state, 'agent_initialized'):
269
+ with st.spinner("Initializing system..."):
270
+ st.session_state.agent = get_agent()
271
+ st.session_state.agent_initialized = True
272
+
273
+ try:
274
+ # Use the stored claim for processing
275
+ claim_to_process = st.session_state.claim_to_process
276
+
277
+ # Process the claim with the agent
278
+ start_time = time.time()
279
+ result = agent.process_claim(claim_to_process, st.session_state.agent)
280
+ total_time = time.time() - start_time
281
+
282
+ # Update progress as claim processing completes
283
+ progress_bar.progress(100)
284
+
285
+ # Check for None result
286
+ if result is None:
287
+ st.error("Failed to process the claim. Please try again.")
288
+ st.session_state.processing = False
289
+ st.session_state.fresh_state = True
290
+ else:
291
+ # If result exists but key values are missing, provide default values
292
+ if "classification" not in result or result["classification"] is None:
293
+ result["classification"] = "Uncertain"
294
+
295
+ if "confidence" not in result or result["confidence"] is None:
296
+ result["confidence"] = 0.6 # Default to 0.6 instead of 0.0
297
+
298
+ if "explanation" not in result or result["explanation"] is None:
299
+ result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim."
300
+
301
+ # Update result with timing information
302
+ if "processing_times" not in result:
303
+ result["processing_times"] = {"total": total_time}
304
+
305
+ # Store the result and timing information
306
+ st.session_state.result = result
307
+ st.session_state.total_time = total_time
308
+ st.session_state.has_result = True
309
+ st.session_state.processing = False
310
+
311
+ # Clear processing indicators before showing results
312
+ status.empty()
313
+ progress_bar.empty()
314
+
315
+ # Force rerun to display results
316
+ st.rerun()
317
+
318
+ except Exception as e:
319
+ # Handle any exceptions and reset processing state
320
+ logger.error(f"Error during claim processing: {str(e)}")
321
+ st.error(f"An error occurred: {str(e)}")
322
+ st.session_state.processing = False
323
+ st.session_state.fresh_state = True
324
+ # Force rerun to re-enable button
325
+ st.rerun()
326
+
327
+ # Display results if available
328
+ elif st.session_state.has_result and st.session_state.result:
329
+ result = st.session_state.result
330
+ total_time = st.session_state.total_time
331
+ claim_to_process = st.session_state.claim_to_process
332
+
333
+ st.subheader("📊 Verification Results")
334
+
335
+ result_col1, result_col2 = st.columns([2, 1])
336
+
337
+ with result_col1:
338
+ # Display both original and processed claim if they differ
339
+ if "claim" in result and result["claim"] and result["claim"] != claim_to_process:
340
+ st.markdown(f"**Original Claim:** {claim_to_process}")
341
+ st.markdown(f"**Processed Claim:** {result['claim']}")
342
+ else:
343
+ st.markdown(f"**Claim:** {claim_to_process}")
344
+
345
+ # Make verdict colorful based on classification
346
+ truth_label = result.get('classification', 'Uncertain')
347
+ if truth_label and "True" in truth_label:
348
+ verdict_color = "green"
349
+ elif truth_label and "False" in truth_label:
350
+ verdict_color = "red"
351
+ else:
352
+ verdict_color = "gray"
353
+
354
+ st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True)
355
+
356
+ # Ensure confidence value is used
357
+ if "confidence" in result and result["confidence"] is not None:
358
+ confidence_value = result["confidence"]
359
+ # Make sure confidence is a numeric value between 0 and 1
360
+ try:
361
+ confidence_value = float(confidence_value)
362
+ if confidence_value < 0:
363
+ confidence_value = 0.0
364
+ elif confidence_value > 1:
365
+ confidence_value = 1.0
366
+ except (ValueError, TypeError):
367
+ confidence_value = 0.6 # Fallback to reasonable default
368
+ else:
369
+ confidence_value = 0.6 # Default confidence
370
+
371
+ # Display the confidence
372
+ st.markdown(f"**Confidence:** {confidence_value:.2%}")
373
+ st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}")
374
+
375
+ # Add disclaimer about cross-verification
376
+ st.info("⚠️ **Note:** Please cross-verify important information with additional reliable sources.")
377
+
378
+ with result_col2:
379
+ st.markdown("**Processing Time**")
380
+ times = result.get("processing_times", {"total": total_time})
381
+ st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s")
382
+
383
+ # Show agent thoughts
384
+ if "thoughts" in result and result["thoughts"]:
385
+ st.markdown("**AI Reasoning Process**")
386
+ thoughts = result.get("thoughts", [])
387
+ for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts
388
+ st.markdown(f"{i+1}. {thought}")
389
+ if len(thoughts) > 5:
390
+ with st.expander("Show all reasoning steps"):
391
+ for i, thought in enumerate(thoughts):
392
+ st.markdown(f"{i+1}. {thought}")
393
+
394
+ # Display evidence
395
+ st.subheader("📝 Evidence")
396
+ evidence_count = result.get("evidence_count", 0)
397
+ evidence = result.get("evidence", [])
398
+
399
+ # Ensure evidence is a list
400
+ if not isinstance(evidence, list):
401
+ if isinstance(evidence, str):
402
+ # Try to parse string as a list
403
+ try:
404
+ import ast
405
+ parsed_evidence = ast.literal_eval(evidence)
406
+ if isinstance(parsed_evidence, list):
407
+ evidence = parsed_evidence
408
+ else:
409
+ evidence = [evidence]
410
+ except:
411
+ evidence = [evidence]
412
+ else:
413
+ evidence = [str(evidence)] if evidence else []
414
+
415
+ # Update evidence count based on actual evidence list
416
+ evidence_count = len(evidence)
417
+
418
+ # Check for empty evidence
419
+ if evidence_count == 0 or not any(ev for ev in evidence if ev):
420
+ st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.")
421
+ else:
422
+ st.markdown(f"Retrieved {evidence_count} pieces of evidence")
423
+
424
+ # Get classification results
425
+ classification_results = result.get("classification_results", [])
426
+
427
+ # Only show evidence tabs if we have evidence
428
+ if evidence and any(ev for ev in evidence if ev):
429
+ # Create tabs for different evidence categories
430
+ evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"])
431
+
432
+ with evidence_tabs[0]:
433
+ for i, ev in enumerate(evidence):
434
+ if ev and isinstance(ev, str) and ev.strip(): # Only show non-empty evidence
435
+ with st.expander(f"Evidence {i+1}", expanded=i==0):
436
+ st.text(ev)
437
+
438
+ with evidence_tabs[1]:
439
+ if classification_results:
440
+ # Check if classification_results items have the expected format
441
+ valid_results = []
442
+ for res in classification_results:
443
+ if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res:
444
+ if res.get("evidence"): # Only include results with actual evidence
445
+ valid_results.append(res)
446
+
447
+ if valid_results:
448
+ sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True)
449
+ top_results = sorted_results[:min(3, len(sorted_results))]
450
+
451
+ for i, res in enumerate(top_results):
452
+ with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0):
453
+ st.text(res.get("evidence", "No evidence text available"))
454
+ st.markdown(f"**Classification:** {res.get('label', 'unknown')}")
455
+ else:
456
+ # If no valid results, just show the evidence
457
+ shown = False
458
+ for i, ev in enumerate(evidence[:3]):
459
+ if ev and isinstance(ev, str) and ev.strip():
460
+ with st.expander(f"Evidence {i+1}", expanded=i==0):
461
+ st.text(ev)
462
+ shown = True
463
+ if not shown:
464
+ st.info("No detailed classification results available.")
465
+ else:
466
+ # Just show regular evidence if no classification details
467
+ shown = False
468
+ for i, ev in enumerate(evidence[:3]):
469
+ if ev and isinstance(ev, str) and ev.strip():
470
+ with st.expander(f"Evidence {i+1}", expanded=i==0):
471
+ st.text(ev)
472
+ shown = True
473
+ if not shown:
474
+ st.info("No detailed classification results available.")
475
+
476
+ with evidence_tabs[2]:
477
+ evidence_sources = {}
478
+ for ev in evidence:
479
+ if not ev or not isinstance(ev, str):
480
+ continue
481
+
482
+ source = "Unknown"
483
+ # Extract source info from evidence text
484
+ if "URL:" in ev:
485
+ import re
486
+ url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev)
487
+ if url_match:
488
+ source = url_match.group(1)
489
+
490
+ if source in evidence_sources:
491
+ evidence_sources[source] += 1
492
+ else:
493
+ evidence_sources[source] = 1
494
+
495
+ # Display evidence source distribution
496
+ if evidence_sources:
497
+ st.markdown("**Evidence Source Distribution**")
498
+ for source, count in evidence_sources.items():
499
+ st.markdown(f"- {source}: {count} item(s)")
500
+ else:
501
+ st.info("No source information available in the evidence.")
502
+ else:
503
+ st.warning("No evidence was retrieved for this claim.")
504
+
505
+ # Button to start a new verification
506
+ if st.button("Verify Another Claim", type="primary", key="new_verify_btn"):
507
+ # Reset to fresh state for a new verification
508
+ st.session_state.fresh_state = True
509
+ st.session_state.has_result = False
510
+ st.session_state.result = None
511
+ st.rerun()
512
+
513
+ # Footer with additional information
514
+ st.markdown("---")
515
+ st.caption("""
516
+ **AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis.
517
+ While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information.
518
+ """)
config.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration module for the Fake News Detector application.
3
+
4
+ This module handles loading configuration parameters, API keys,
5
+ and source credibility data needed for the fact checking system.
6
+ It manages environment variables and file-based configurations.
7
+ """
8
+
9
+ import os
10
+ import json
11
+ import logging
12
+ from pathlib import Path
13
+
14
+ # Configure logger
15
+ logger = logging.getLogger("misinformation_detector")
16
+
17
+ # Base paths
18
+ ROOT_DIR = Path(__file__).parent.absolute()
19
+ DATA_DIR = ROOT_DIR / "data"
20
+
21
+ # Ensure data directory exists
22
+ DATA_DIR.mkdir(exist_ok=True)
23
+
24
+ # First try to get API keys from Streamlit secrets, then fall back to environment variables
25
+ # try:
26
+ # import streamlit as st
27
+ # OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
28
+ # NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
29
+ # FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
30
+ # except (AttributeError, ImportError):
31
+ # # Fall back to environment variables if Streamlit secrets aren't available
32
+ # OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
33
+ # NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
34
+ # FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
35
+
36
+
37
+ try:
38
+ import streamlit as st
39
+ OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
40
+ NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
41
+ FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
42
+ except (AttributeError, ImportError):
43
+ # For local testing only - REMOVE BEFORE COMMITTING!
44
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY")
45
+ NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3")
46
+ FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA")
47
+
48
+ # Log secrets status (but not the values)
49
+ if OPENAI_API_KEY:
50
+ logger.info("OPENAI_API_KEY is set")
51
+ else:
52
+ logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
53
+
54
+ if NEWS_API_KEY:
55
+ logger.info("NEWS_API_KEY is set")
56
+ else:
57
+ logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
58
+
59
+ if FACTCHECK_API_KEY:
60
+ logger.info("FACTCHECK_API_KEY is set")
61
+ else:
62
+ logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
63
+
64
+ # Set API key in environment to ensure it's available to all components
65
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
66
+
67
+ # Source credibility file path
68
+ source_cred_file = DATA_DIR / "source_credibility.json"
69
+
70
+ def load_source_credibility():
71
+ """
72
+ Load source credibility data from JSON file
73
+
74
+ Returns:
75
+ dict: Mapping of domain names to credibility scores (0-1)
76
+ Empty dict if file is not found or has errors
77
+ """
78
+ try:
79
+ if source_cred_file.exists():
80
+ with open(source_cred_file, 'r') as f:
81
+ return json.load(f)
82
+ else:
83
+ logger.warning(f"Source credibility file not found: {source_cred_file}")
84
+ return {}
85
+ except Exception as e:
86
+ logger.error(f"Error loading source credibility file: {e}")
87
+ return {}
88
+
89
+ # Load source credibility once at module import
90
+ SOURCE_CREDIBILITY = load_source_credibility()
91
+
92
+ # Rate limiting configuration
93
+ RATE_LIMITS = {
94
+ # api_name: {"requests": max_requests, "period": period_in_seconds}
95
+ "newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
96
+ "factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
97
+ "semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
98
+ "wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
99
+ "wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
100
+ "rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
101
+ }
102
+
103
+ # Error backoff settings
104
+ ERROR_BACKOFF = {
105
+ "max_retries": 5,
106
+ "initial_backoff": 1, # seconds
107
+ "backoff_factor": 2, # exponential backoff
108
+ }
109
+
110
+ # RSS feed settings
111
+ RSS_SETTINGS = {
112
+ "max_feeds_per_request": 10, # Maximum number of feeds to try per request
113
+ "max_age_days": 3, # Maximum age of RSS items to consider
114
+ "timeout_seconds": 5, # Timeout for RSS feed requests
115
+ "max_workers": 5 # Number of parallel workers for fetching feeds
116
+ }
117
+
118
+ # Semantic analysis settings
119
+ SEMANTIC_ANALYSIS_CONFIG = {
120
+ "similarity_weight": 0.4, # Weight for semantic similarity
121
+ "entity_overlap_weight": 0.3, # Weight for entity matching
122
+ "base_weight": 0.3, # Base relevance weight
123
+ "temporal_boost": 1.2, # Boost for recent evidence
124
+ "temporal_penalty": 0.7, # Penalty for outdated evidence
125
+ "authority_boosts": {
126
+ "scientific_consensus": 1.8,
127
+ "fact_check": 1.5,
128
+ "high_authority": 1.3
129
+ }
130
+ }
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.32.0
2
+ langchain>=0.1.6
3
+ langchain_openai>=0.0.5
4
+ langchain_core>=0.1.25
5
+ langgraph>=0.0.27
6
+ transformers==4.36.2
7
+ requests==2.31.0
8
+ beautifulsoup4==4.12.2
9
+ langdetect==1.0.9
10
+ spacy==3.7.2
11
+ SPARQLWrapper==2.0.0
12
+ python-dotenv==1.0.0
13
+ pydantic==2.5.3
14
+ feedparser==6.0.10
15
+ scikit-learn>=1.3.0
16
+ numpy>=1.21.0
17
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl