Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| """ | |
| Configuration module for the Fake News Detector application. | |
| This module handles loading configuration parameters, API keys, | |
| and source credibility data needed for the fact checking system. | |
| It manages environment variables and file-based configurations. | |
| """ | |
| import os | |
| import json | |
| import logging | |
| from pathlib import Path | |
| # Configure logger | |
| logger = logging.getLogger("misinformation_detector") | |
| # Base paths | |
| ROOT_DIR = Path(__file__).parent.absolute() | |
| DATA_DIR = ROOT_DIR / "data" | |
| # Ensure data directory exists | |
| DATA_DIR.mkdir(exist_ok=True) | |
| # First try to get API keys from Streamlit secrets, then fall back to environment variables | |
| # try: | |
| # import streamlit as st | |
| # OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")) | |
| # NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", "")) | |
| # FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", "")) | |
| # except (AttributeError, ImportError): | |
| # # Fall back to environment variables if Streamlit secrets aren't available | |
| # OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") | |
| # NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "") | |
| # FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "") | |
| try: | |
| import streamlit as st | |
| OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")) | |
| NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", "")) | |
| FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", "")) | |
| except (AttributeError, ImportError): | |
| # For local testing only - REMOVE BEFORE COMMITTING! | |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY") | |
| NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3") | |
| FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA") | |
| # Log secrets status (but not the values) | |
| if OPENAI_API_KEY: | |
| logger.info("OPENAI_API_KEY is set") | |
| else: | |
| logger.warning("OPENAI_API_KEY not set. The application will not function properly.") | |
| if NEWS_API_KEY: | |
| logger.info("NEWS_API_KEY is set") | |
| else: | |
| logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.") | |
| if FACTCHECK_API_KEY: | |
| logger.info("FACTCHECK_API_KEY is set") | |
| else: | |
| logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.") | |
| # Set API key in environment to ensure it's available to all components | |
| os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
| # Source credibility file path | |
| source_cred_file = DATA_DIR / "source_credibility.json" | |
| def load_source_credibility(): | |
| """ | |
| Load source credibility data from JSON file | |
| Returns: | |
| dict: Mapping of domain names to credibility scores (0-1) | |
| Empty dict if file is not found or has errors | |
| """ | |
| try: | |
| if source_cred_file.exists(): | |
| with open(source_cred_file, 'r') as f: | |
| return json.load(f) | |
| else: | |
| logger.warning(f"Source credibility file not found: {source_cred_file}") | |
| return {} | |
| except Exception as e: | |
| logger.error(f"Error loading source credibility file: {e}") | |
| return {} | |
| # Load source credibility once at module import | |
| SOURCE_CREDIBILITY = load_source_credibility() | |
| # Rate limiting configuration | |
| RATE_LIMITS = { | |
| # api_name: {"requests": max_requests, "period": period_in_seconds} | |
| "newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour | |
| "factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day | |
| "semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes | |
| "wikidata": {"requests": 60, "period": 60}, # 60 requests per minute | |
| "wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute | |
| "rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour | |
| } | |
| # Error backoff settings | |
| ERROR_BACKOFF = { | |
| "max_retries": 5, | |
| "initial_backoff": 1, # seconds | |
| "backoff_factor": 2, # exponential backoff | |
| } | |
| # RSS feed settings | |
| RSS_SETTINGS = { | |
| "max_feeds_per_request": 10, # Maximum number of feeds to try per request | |
| "max_age_days": 3, # Maximum age of RSS items to consider | |
| "timeout_seconds": 5, # Timeout for RSS feed requests | |
| "max_workers": 5 # Number of parallel workers for fetching feeds | |
| } | |
| # Semantic analysis settings | |
| SEMANTIC_ANALYSIS_CONFIG = { | |
| "similarity_weight": 0.4, # Weight for semantic similarity | |
| "entity_overlap_weight": 0.3, # Weight for entity matching | |
| "base_weight": 0.3, # Base relevance weight | |
| "temporal_boost": 1.2, # Boost for recent evidence | |
| "temporal_penalty": 0.7, # Penalty for outdated evidence | |
| "authority_boosts": { | |
| "scientific_consensus": 1.8, | |
| "fact_check": 1.5, | |
| "high_authority": 1.3 | |
| } | |
| } | 
