Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline,
|
3 |
import feedparser
|
4 |
from datetime import datetime, timedelta
|
5 |
import json
|
@@ -20,30 +20,6 @@ logging.basicConfig(
|
|
20 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
21 |
)
|
22 |
|
23 |
-
# News sources and their RSS feeds
|
24 |
-
NEWS_SOURCES = {
|
25 |
-
"Technology": {
|
26 |
-
"TechCrunch": "https://techcrunch.com/feed/",
|
27 |
-
"Wired": "https://www.wired.com/feed/rss",
|
28 |
-
"The Verge": "https://www.theverge.com/rss/index.xml"
|
29 |
-
},
|
30 |
-
"Business": {
|
31 |
-
"Financial Times": "https://www.ft.com/rss/home",
|
32 |
-
"Business Insider": "https://www.businessinsider.com/rss",
|
33 |
-
"Forbes": "https://www.forbes.com/real-time/feed2/"
|
34 |
-
},
|
35 |
-
"Science": {
|
36 |
-
"Science Daily": "https://www.sciencedaily.com/rss/all.xml",
|
37 |
-
"Nature": "http://feeds.nature.com/nature/rss/current",
|
38 |
-
"Scientific American": "http://rss.sciam.com/ScientificAmerican-Global"
|
39 |
-
},
|
40 |
-
"World News": {
|
41 |
-
"Reuters": "http://feeds.reuters.com/reuters/topNews",
|
42 |
-
"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
43 |
-
"CNN": "http://rss.cnn.com/rss/edition_world.rss"
|
44 |
-
}
|
45 |
-
}
|
46 |
-
|
47 |
# Language codes and their corresponding MarianMT model names
|
48 |
LANGUAGE_CODES = {
|
49 |
"English": {"code": "en", "model": None}, # No translation needed for English
|
@@ -59,6 +35,8 @@ LANGUAGE_CODES = {
|
|
59 |
"Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
|
60 |
}
|
61 |
|
|
|
|
|
62 |
# Initialize global variables
|
63 |
summarizer = None
|
64 |
translators = {}
|
@@ -71,8 +49,11 @@ class NewsCache:
|
|
71 |
|
72 |
def store_summary(self, content_hash, summary, language=None):
|
73 |
cache_key = f"{content_hash}_{language}" if language else content_hash
|
|
|
74 |
if len(self.summaries) >= self.max_cache_size:
|
|
|
75 |
self.summaries.pop(next(iter(self.summaries)))
|
|
|
76 |
self.summaries[cache_key] = summary
|
77 |
|
78 |
def get_summary(self, content_hash, language=None):
|
@@ -81,44 +62,6 @@ class NewsCache:
|
|
81 |
|
82 |
news_cache = NewsCache()
|
83 |
|
84 |
-
def get_content_hash(content):
|
85 |
-
"""Generate a hash for the content"""
|
86 |
-
return hashlib.md5(content.encode()).hexdigest()
|
87 |
-
|
88 |
-
def parse_date(date_str):
|
89 |
-
"""Parse date string to datetime object"""
|
90 |
-
try:
|
91 |
-
return parsedate_to_datetime(date_str).replace(tzinfo=pytz.UTC)
|
92 |
-
except:
|
93 |
-
return None
|
94 |
-
|
95 |
-
def fetch_news_from_rss(categories):
|
96 |
-
"""Fetch news from RSS feeds based on user interests"""
|
97 |
-
articles = []
|
98 |
-
cutoff_time = datetime.now(pytz.UTC) - timedelta(hours=8)
|
99 |
-
|
100 |
-
for category in categories:
|
101 |
-
if category in NEWS_SOURCES:
|
102 |
-
for source, feed_url in NEWS_SOURCES[category].items():
|
103 |
-
try:
|
104 |
-
feed = feedparser.parse(feed_url)
|
105 |
-
for entry in feed.entries:
|
106 |
-
published = parse_date(entry.get('published'))
|
107 |
-
if published and published > cutoff_time:
|
108 |
-
articles.append({
|
109 |
-
'title': entry.get('title', ''),
|
110 |
-
'description': BeautifulSoup(entry.get('description', ''), 'html.parser').get_text(),
|
111 |
-
'link': entry.get('link', ''),
|
112 |
-
'published': entry.get('published', ''),
|
113 |
-
'category': category,
|
114 |
-
'source': source
|
115 |
-
})
|
116 |
-
except Exception as e:
|
117 |
-
logging.error(f"Error fetching from {feed_url}: {e}")
|
118 |
-
continue
|
119 |
-
|
120 |
-
return articles
|
121 |
-
|
122 |
def initialize_models():
|
123 |
"""Initialize the summarization and translation models"""
|
124 |
global summarizer, translators
|
@@ -135,7 +78,7 @@ def initialize_models():
|
|
135 |
for lang, info in LANGUAGE_CODES.items():
|
136 |
if info["model"]: # Skip English as it doesn't need translation
|
137 |
try:
|
138 |
-
model =
|
139 |
tokenizer = AutoTokenizer.from_pretrained(info["model"])
|
140 |
translators[lang] = (model, tokenizer)
|
141 |
logging.info(f"Initialized translator for {lang}")
|
@@ -315,6 +258,8 @@ def get_personalized_summary(name, progress=gr.Progress()):
|
|
315 |
progress(1.0, desc="Done!")
|
316 |
return "\n".join(summaries)
|
317 |
|
|
|
|
|
318 |
# Gradio interface
|
319 |
with gr.Blocks(title="Enhanced News Summarizer") as demo:
|
320 |
gr.Markdown("# 📰 Enhanced AI News Summarizer")
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
|
3 |
import feedparser
|
4 |
from datetime import datetime, timedelta
|
5 |
import json
|
|
|
20 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
21 |
)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# Language codes and their corresponding MarianMT model names
|
24 |
LANGUAGE_CODES = {
|
25 |
"English": {"code": "en", "model": None}, # No translation needed for English
|
|
|
35 |
"Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
|
36 |
}
|
37 |
|
38 |
+
# [Previous NEWS_SOURCES definition remains the same...]
|
39 |
+
|
40 |
# Initialize global variables
|
41 |
summarizer = None
|
42 |
translators = {}
|
|
|
49 |
|
50 |
def store_summary(self, content_hash, summary, language=None):
|
51 |
cache_key = f"{content_hash}_{language}" if language else content_hash
|
52 |
+
|
53 |
if len(self.summaries) >= self.max_cache_size:
|
54 |
+
# Remove oldest entry if cache is full
|
55 |
self.summaries.pop(next(iter(self.summaries)))
|
56 |
+
|
57 |
self.summaries[cache_key] = summary
|
58 |
|
59 |
def get_summary(self, content_hash, language=None):
|
|
|
62 |
|
63 |
news_cache = NewsCache()
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def initialize_models():
|
66 |
"""Initialize the summarization and translation models"""
|
67 |
global summarizer, translators
|
|
|
78 |
for lang, info in LANGUAGE_CODES.items():
|
79 |
if info["model"]: # Skip English as it doesn't need translation
|
80 |
try:
|
81 |
+
model = AutoModelForSeq2SeqGeneration.from_pretrained(info["model"])
|
82 |
tokenizer = AutoTokenizer.from_pretrained(info["model"])
|
83 |
translators[lang] = (model, tokenizer)
|
84 |
logging.info(f"Initialized translator for {lang}")
|
|
|
258 |
progress(1.0, desc="Done!")
|
259 |
return "\n".join(summaries)
|
260 |
|
261 |
+
# [Rest of the code remains the same...]
|
262 |
+
|
263 |
# Gradio interface
|
264 |
with gr.Blocks(title="Enhanced News Summarizer") as demo:
|
265 |
gr.Markdown("# 📰 Enhanced AI News Summarizer")
|