rajat5ranjan commited on
Commit
ed573b5
·
verified ·
1 Parent(s): 72e1a01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py CHANGED
@@ -23,6 +23,8 @@ import json
23
  import pandas as pd
24
  import numpy as np
25
  import altair as alt
 
 
26
 
27
  st.set_page_config(layout="wide")
28
 
@@ -420,6 +422,51 @@ elif activities=="News Sentiment":
420
  }}
421
 
422
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  llm_prompt = PromptTemplate.from_template(llm_prompt_template)
424
 
425
  llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
 
23
  import pandas as pd
24
  import numpy as np
25
  import altair as alt
26
+ from GoogleNews import GoogleNews
27
+ from newspaper import Article
28
 
29
  st.set_page_config(layout="wide")
30
 
 
422
  }}
423
 
424
  """
425
+
426
+ def get_google_news_documents(query: str, max_articles: int = 10):
427
+ """
428
+ Fetches news articles from Google News and returns a list of LangChain Document objects.
429
+
430
+ Args:
431
+ query (str): Search query for Google News.
432
+ max_articles (int): Maximum number of articles to fetch and parse.
433
+
434
+ Returns:
435
+ List[Document]: List of LangChain Document objects containing article content and metadata.
436
+ """
437
+ logging.info(f"Fetching articles for query: '{query}'")
438
+ googlenews = GoogleNews(lang="en")
439
+ googlenews.search(query)
440
+ articles = googlenews.result()
441
+
442
+ documents = []
443
+ for article in articles[:max_articles]:
444
+ url = article.get("link")
445
+ try:
446
+ news_article = Article(url)
447
+ news_article.download()
448
+ news_article.parse()
449
+ content = news_article.text.strip()
450
+
451
+ if content:
452
+ doc = Document(
453
+ page_content=content,
454
+ metadata={
455
+ "source": "Google News",
456
+ "title": article.get("title", ""),
457
+ "published": article.get("date", ""),
458
+ "link": url,
459
+ }
460
+ )
461
+ documents.append(doc)
462
+
463
+ except Exception as e:
464
+ st.write(f"Failed to process article: {url} — Error: {e}")
465
+
466
+ return documents
467
+
468
+ google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News", max_articles=10)
469
+ docs.extend(google_docs)
470
  llm_prompt = PromptTemplate.from_template(llm_prompt_template)
471
 
472
  llm_chain = LLMChain(llm=llm,prompt=llm_prompt)