Spaces:
Running
Running
Delete config.py
Browse files
config.py
DELETED
@@ -1,118 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Configuration module for the Fake News Detector application.
|
3 |
-
|
4 |
-
This module handles loading configuration parameters, API keys,
|
5 |
-
and source credibility data needed for the fact checking system.
|
6 |
-
It manages environment variables and file-based configurations.
|
7 |
-
"""
|
8 |
-
|
9 |
-
import os
|
10 |
-
import json
|
11 |
-
import logging
|
12 |
-
from pathlib import Path
|
13 |
-
|
14 |
-
# Configure logger
|
15 |
-
logger = logging.getLogger("misinformation_detector")
|
16 |
-
|
17 |
-
# Base paths
|
18 |
-
ROOT_DIR = Path(__file__).parent.absolute()
|
19 |
-
DATA_DIR = ROOT_DIR / "data"
|
20 |
-
|
21 |
-
# Ensure data directory exists
|
22 |
-
DATA_DIR.mkdir(exist_ok=True)
|
23 |
-
|
24 |
-
# First try to get API keys from Streamlit secrets, then fall back to environment variables
|
25 |
-
try:
|
26 |
-
import streamlit as st
|
27 |
-
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
28 |
-
NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
|
29 |
-
FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
|
30 |
-
except (AttributeError, ImportError):
|
31 |
-
# Fall back to environment variables if Streamlit secrets aren't available
|
32 |
-
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
33 |
-
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
|
34 |
-
FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
|
35 |
-
|
36 |
-
# Log secrets status (but not the values)
|
37 |
-
if OPENAI_API_KEY:
|
38 |
-
logger.info("OPENAI_API_KEY is set")
|
39 |
-
else:
|
40 |
-
logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
|
41 |
-
|
42 |
-
if NEWS_API_KEY:
|
43 |
-
logger.info("NEWS_API_KEY is set")
|
44 |
-
else:
|
45 |
-
logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
|
46 |
-
|
47 |
-
if FACTCHECK_API_KEY:
|
48 |
-
logger.info("FACTCHECK_API_KEY is set")
|
49 |
-
else:
|
50 |
-
logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
|
51 |
-
|
52 |
-
# Set API key in environment to ensure it's available to all components
|
53 |
-
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
54 |
-
|
55 |
-
# Source credibility file path
|
56 |
-
source_cred_file = DATA_DIR / "source_credibility.json"
|
57 |
-
|
58 |
-
def load_source_credibility():
|
59 |
-
"""
|
60 |
-
Load source credibility data from JSON file
|
61 |
-
|
62 |
-
Returns:
|
63 |
-
dict: Mapping of domain names to credibility scores (0-1)
|
64 |
-
Empty dict if file is not found or has errors
|
65 |
-
"""
|
66 |
-
try:
|
67 |
-
if source_cred_file.exists():
|
68 |
-
with open(source_cred_file, 'r') as f:
|
69 |
-
return json.load(f)
|
70 |
-
else:
|
71 |
-
logger.warning(f"Source credibility file not found: {source_cred_file}")
|
72 |
-
return {}
|
73 |
-
except Exception as e:
|
74 |
-
logger.error(f"Error loading source credibility file: {e}")
|
75 |
-
return {}
|
76 |
-
|
77 |
-
# Load source credibility once at module import
|
78 |
-
SOURCE_CREDIBILITY = load_source_credibility()
|
79 |
-
|
80 |
-
# Rate limiting configuration
|
81 |
-
RATE_LIMITS = {
|
82 |
-
# api_name: {"requests": max_requests, "period": period_in_seconds}
|
83 |
-
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
|
84 |
-
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
|
85 |
-
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
|
86 |
-
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
|
87 |
-
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
|
88 |
-
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
|
89 |
-
}
|
90 |
-
|
91 |
-
# Error backoff settings
|
92 |
-
ERROR_BACKOFF = {
|
93 |
-
"max_retries": 5,
|
94 |
-
"initial_backoff": 1, # seconds
|
95 |
-
"backoff_factor": 2, # exponential backoff
|
96 |
-
}
|
97 |
-
|
98 |
-
# RSS feed settings
|
99 |
-
RSS_SETTINGS = {
|
100 |
-
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
|
101 |
-
"max_age_days": 3, # Maximum age of RSS items to consider
|
102 |
-
"timeout_seconds": 5, # Timeout for RSS feed requests
|
103 |
-
"max_workers": 5 # Number of parallel workers for fetching feeds
|
104 |
-
}
|
105 |
-
|
106 |
-
# Semantic analysis settings
|
107 |
-
SEMANTIC_ANALYSIS_CONFIG = {
|
108 |
-
"similarity_weight": 0.4, # Weight for semantic similarity
|
109 |
-
"entity_overlap_weight": 0.3, # Weight for entity matching
|
110 |
-
"base_weight": 0.3, # Base relevance weight
|
111 |
-
"temporal_boost": 1.2, # Boost for recent evidence
|
112 |
-
"temporal_penalty": 0.7, # Penalty for outdated evidence
|
113 |
-
"authority_boosts": {
|
114 |
-
"scientific_consensus": 1.8,
|
115 |
-
"fact_check": 1.5,
|
116 |
-
"high_authority": 1.3
|
117 |
-
}
|
118 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|