askveracity / config.py
ankanghosh's picture
Upload 7 files.
7130eb6 verified
raw
history blame
4.99 kB
"""
Configuration module for the Fake News Detector application.
This module handles loading configuration parameters, API keys,
and source credibility data needed for the fact checking system.
It manages environment variables and file-based configurations.
"""
import os
import json
import logging
from pathlib import Path
# Configure logger
logger = logging.getLogger("misinformation_detector")
# Base paths
ROOT_DIR = Path(__file__).parent.absolute()
DATA_DIR = ROOT_DIR / "data"
# Ensure data directory exists
DATA_DIR.mkdir(exist_ok=True)
# First try to get API keys from Streamlit secrets, then fall back to environment variables
# try:
# import streamlit as st
# OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
# NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
# FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
# except (AttributeError, ImportError):
# # Fall back to environment variables if Streamlit secrets aren't available
# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
# NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
# FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
try:
import streamlit as st
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
except (AttributeError, ImportError):
# For local testing only - REMOVE BEFORE COMMITTING!
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY")
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3")
FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA")
# Log secrets status (but not the values)
if OPENAI_API_KEY:
logger.info("OPENAI_API_KEY is set")
else:
logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
if NEWS_API_KEY:
logger.info("NEWS_API_KEY is set")
else:
logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
if FACTCHECK_API_KEY:
logger.info("FACTCHECK_API_KEY is set")
else:
logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
# Set API key in environment to ensure it's available to all components
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
# Source credibility file path
source_cred_file = DATA_DIR / "source_credibility.json"
def load_source_credibility():
"""
Load source credibility data from JSON file
Returns:
dict: Mapping of domain names to credibility scores (0-1)
Empty dict if file is not found or has errors
"""
try:
if source_cred_file.exists():
with open(source_cred_file, 'r') as f:
return json.load(f)
else:
logger.warning(f"Source credibility file not found: {source_cred_file}")
return {}
except Exception as e:
logger.error(f"Error loading source credibility file: {e}")
return {}
# Load source credibility once at module import
SOURCE_CREDIBILITY = load_source_credibility()
# Rate limiting configuration
RATE_LIMITS = {
# api_name: {"requests": max_requests, "period": period_in_seconds}
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
}
# Error backoff settings
ERROR_BACKOFF = {
"max_retries": 5,
"initial_backoff": 1, # seconds
"backoff_factor": 2, # exponential backoff
}
# RSS feed settings
RSS_SETTINGS = {
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
"max_age_days": 3, # Maximum age of RSS items to consider
"timeout_seconds": 5, # Timeout for RSS feed requests
"max_workers": 5 # Number of parallel workers for fetching feeds
}
# Semantic analysis settings
SEMANTIC_ANALYSIS_CONFIG = {
"similarity_weight": 0.4, # Weight for semantic similarity
"entity_overlap_weight": 0.3, # Weight for entity matching
"base_weight": 0.3, # Base relevance weight
"temporal_boost": 1.2, # Boost for recent evidence
"temporal_penalty": 0.7, # Penalty for outdated evidence
"authority_boosts": {
"scientific_consensus": 1.8,
"fact_check": 1.5,
"high_authority": 1.3
}
}