Spaces:

ankanghosh
/

askveracity

Running

App Files Files Community

askveracity / config.py

ankanghosh

Upload 7 files.

7130eb6 verified about 2 months ago

raw

history blame

4.99 kB

	"""
	Configuration module for the Fake News Detector application.

	This module handles loading configuration parameters, API keys,
	and source credibility data needed for the fact checking system.
	It manages environment variables and file-based configurations.
	"""

	import os
	import json
	import logging
	from pathlib import Path

	# Configure logger
	logger = logging.getLogger("misinformation_detector")

	# Base paths
	ROOT_DIR = Path(__file__).parent.absolute()
	DATA_DIR = ROOT_DIR / "data"

	# Ensure data directory exists
	DATA_DIR.mkdir(exist_ok=True)

	# First try to get API keys from Streamlit secrets, then fall back to environment variables
	# try:
	# import streamlit as st
	# OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
	# NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
	# FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
	# except (AttributeError, ImportError):
	# # Fall back to environment variables if Streamlit secrets aren't available
	# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
	# NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
	# FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")


	try:
	import streamlit as st
	OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
	NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
	FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
	except (AttributeError, ImportError):
	# For local testing only - REMOVE BEFORE COMMITTING!
	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY")
	NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3")
	FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA")

	# Log secrets status (but not the values)
	if OPENAI_API_KEY:
	logger.info("OPENAI_API_KEY is set")
	else:
	logger.warning("OPENAI_API_KEY not set. The application will not function properly.")

	if NEWS_API_KEY:
	logger.info("NEWS_API_KEY is set")
	else:
	logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")

	if FACTCHECK_API_KEY:
	logger.info("FACTCHECK_API_KEY is set")
	else:
	logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")

	# Set API key in environment to ensure it's available to all components
	os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

	# Source credibility file path
	source_cred_file = DATA_DIR / "source_credibility.json"

	def load_source_credibility():
	"""
	Load source credibility data from JSON file

	Returns:
	dict: Mapping of domain names to credibility scores (0-1)
	Empty dict if file is not found or has errors
	"""
	try:
	if source_cred_file.exists():
	with open(source_cred_file, 'r') as f:
	return json.load(f)
	else:
	logger.warning(f"Source credibility file not found: {source_cred_file}")
	return {}
	except Exception as e:
	logger.error(f"Error loading source credibility file: {e}")
	return {}

	# Load source credibility once at module import
	SOURCE_CREDIBILITY = load_source_credibility()

	# Rate limiting configuration
	RATE_LIMITS = {
	# api_name: {"requests": max_requests, "period": period_in_seconds}
	"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
	"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
	"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
	"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
	"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
	"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
	}

	# Error backoff settings
	ERROR_BACKOFF = {
	"max_retries": 5,
	"initial_backoff": 1, # seconds
	"backoff_factor": 2, # exponential backoff
	}

	# RSS feed settings
	RSS_SETTINGS = {
	"max_feeds_per_request": 10, # Maximum number of feeds to try per request
	"max_age_days": 3, # Maximum age of RSS items to consider
	"timeout_seconds": 5, # Timeout for RSS feed requests
	"max_workers": 5 # Number of parallel workers for fetching feeds
	}

	# Semantic analysis settings
	SEMANTIC_ANALYSIS_CONFIG = {
	"similarity_weight": 0.4, # Weight for semantic similarity
	"entity_overlap_weight": 0.3, # Weight for entity matching
	"base_weight": 0.3, # Base relevance weight
	"temporal_boost": 1.2, # Boost for recent evidence
	"temporal_penalty": 0.7, # Penalty for outdated evidence
	"authority_boosts": {
	"scientific_consensus": 1.8,
	"fact_check": 1.5,
	"high_authority": 1.3
	}
	}