Spaces:

loayshabet
/

news-sumarry

Running

App Files Files Community

loayshabet commited on Nov 17, 2024

Commit

602dc07

verified ·

1 Parent(s): e7e8dd6

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -64

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import pipeline, MarianMTModel, AutoTokenizer
 import feedparser
 from datetime import datetime, timedelta
 import json
@@ -20,30 +20,6 @@ logging.basicConfig(
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-# News sources and their RSS feeds
-NEWS_SOURCES = {
-    "Technology": {
-        "TechCrunch": "https://techcrunch.com/feed/",
-        "Wired": "https://www.wired.com/feed/rss",
-        "The Verge": "https://www.theverge.com/rss/index.xml"
-    },
-    "Business": {
-        "Financial Times": "https://www.ft.com/rss/home",
-        "Business Insider": "https://www.businessinsider.com/rss",
-        "Forbes": "https://www.forbes.com/real-time/feed2/"
-    },
-    "Science": {
-        "Science Daily": "https://www.sciencedaily.com/rss/all.xml",
-        "Nature": "http://feeds.nature.com/nature/rss/current",
-        "Scientific American": "http://rss.sciam.com/ScientificAmerican-Global"
-    },
-    "World News": {
-        "Reuters": "http://feeds.reuters.com/reuters/topNews",
-        "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
-        "CNN": "http://rss.cnn.com/rss/edition_world.rss"
-    }
-}
 # Language codes and their corresponding MarianMT model names
 LANGUAGE_CODES = {
     "English": {"code": "en", "model": None},  # No translation needed for English
@@ -59,6 +35,8 @@ LANGUAGE_CODES = {
     "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
 }
 # Initialize global variables
 summarizer = None
 translators = {}
@@ -71,8 +49,11 @@ class NewsCache:
     def store_summary(self, content_hash, summary, language=None):
         cache_key = f"{content_hash}_{language}" if language else content_hash
         if len(self.summaries) >= self.max_cache_size:
             self.summaries.pop(next(iter(self.summaries)))
         self.summaries[cache_key] = summary
     def get_summary(self, content_hash, language=None):
@@ -81,44 +62,6 @@ class NewsCache:
 news_cache = NewsCache()
-def get_content_hash(content):
-    """Generate a hash for the content"""
-    return hashlib.md5(content.encode()).hexdigest()
-def parse_date(date_str):
-    """Parse date string to datetime object"""
-    try:
-        return parsedate_to_datetime(date_str).replace(tzinfo=pytz.UTC)
-    except:
-        return None
-def fetch_news_from_rss(categories):
-    """Fetch news from RSS feeds based on user interests"""
-    articles = []
-    cutoff_time = datetime.now(pytz.UTC) - timedelta(hours=8)
-    for category in categories:
-        if category in NEWS_SOURCES:
-            for source, feed_url in NEWS_SOURCES[category].items():
-                try:
-                    feed = feedparser.parse(feed_url)
-                    for entry in feed.entries:
-                        published = parse_date(entry.get('published'))
-                        if published and published > cutoff_time:
-                            articles.append({
-                                'title': entry.get('title', ''),
-                                'description': BeautifulSoup(entry.get('description', ''), 'html.parser').get_text(),
-                                'link': entry.get('link', ''),
-                                'published': entry.get('published', ''),
-                                'category': category,
-                                'source': source
-                            })
-                except Exception as e:
-                    logging.error(f"Error fetching from {feed_url}: {e}")
-                    continue
-    return articles
 def initialize_models():
     """Initialize the summarization and translation models"""
     global summarizer, translators
@@ -135,7 +78,7 @@ def initialize_models():
         for lang, info in LANGUAGE_CODES.items():
             if info["model"]:  # Skip English as it doesn't need translation
                 try:
-                    model = MarianMTModel.from_pretrained(info["model"])
                     tokenizer = AutoTokenizer.from_pretrained(info["model"])
                     translators[lang] = (model, tokenizer)
                     logging.info(f"Initialized translator for {lang}")
@@ -315,6 +258,8 @@ def get_personalized_summary(name, progress=gr.Progress()):
     progress(1.0, desc="Done!")
     return "\n".join(summaries)
 # Gradio interface
 with gr.Blocks(title="Enhanced News Summarizer") as demo:
     gr.Markdown("# 📰 Enhanced AI News Summarizer")

 import gradio as gr
+from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
 import feedparser
 from datetime import datetime, timedelta
 import json
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 # Language codes and their corresponding MarianMT model names
 LANGUAGE_CODES = {
     "English": {"code": "en", "model": None},  # No translation needed for English
     "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
 }
+# [Previous NEWS_SOURCES definition remains the same...]
 # Initialize global variables
 summarizer = None
 translators = {}
     def store_summary(self, content_hash, summary, language=None):
         cache_key = f"{content_hash}_{language}" if language else content_hash
         if len(self.summaries) >= self.max_cache_size:
+            # Remove oldest entry if cache is full
             self.summaries.pop(next(iter(self.summaries)))
         self.summaries[cache_key] = summary
     def get_summary(self, content_hash, language=None):
 news_cache = NewsCache()
 def initialize_models():
     """Initialize the summarization and translation models"""
     global summarizer, translators
         for lang, info in LANGUAGE_CODES.items():
             if info["model"]:  # Skip English as it doesn't need translation
                 try:
+                    model = AutoModelForSeq2SeqGeneration.from_pretrained(info["model"])
                     tokenizer = AutoTokenizer.from_pretrained(info["model"])
                     translators[lang] = (model, tokenizer)
                     logging.info(f"Initialized translator for {lang}")
     progress(1.0, desc="Done!")
     return "\n".join(summaries)
+# [Rest of the code remains the same...]
 # Gradio interface
 with gr.Blocks(title="Enhanced News Summarizer") as demo:
     gr.Markdown("# 📰 Enhanced AI News Summarizer")