Spaces:
Sleeping
Sleeping
| from newsapi import NewsApiClient | |
| from newspaper import Article | |
| import os | |
| __export__ = ["News"] | |
| class News: | |
| __EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek", "Politico"] | |
| __CATEGORIES__ = [ | |
| "General", | |
| # "Business", | |
| # "Entertainment", | |
| # "Health", | |
| # "Science", | |
| "Technology" | |
| ] | |
| def __init__(self): | |
| newsapi_key = os.environ.get("NEWS_API_KEY") | |
| self.newsapi = NewsApiClient(api_key=newsapi_key) | |
| def get_sources(self, category=None): | |
| sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"] | |
| sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__} | |
| return sources | |
| def get_top_headlines(self, num_headlines=5, category=None): | |
| sources = self.get_sources(category.lower() if category else category) | |
| headlines = self.newsapi.get_top_headlines( | |
| sources=", ".join(sources), | |
| page_size=num_headlines, | |
| language="en", | |
| )["articles"] | |
| headlines = self._get_articles_from_headlines(headlines) | |
| return headlines | |
| def get_headlines(self, num_headlines=5, query=None): | |
| sources = self.get_sources() | |
| headlines = self.newsapi.get_everything( | |
| q=query, | |
| sources=", ".join(sources), | |
| page_size=num_headlines, | |
| lanuguage="en", | |
| )["articles"] | |
| headlines = self._get_articles_from_headlines(headlines) | |
| return headlines | |
| def _get_articles_from_headlines(self, headlines): | |
| for headline in headlines: | |
| del headline["author"] | |
| headline["source"] = headline["source"]["name"] | |
| article = Article(headline["url"]) | |
| article.download() | |
| article.parse() | |
| headline["content"] = article.text | |
| return headlines | |