rajat5ranjan commited on
Commit
dd363c0
·
verified ·
1 Parent(s): fa7bacb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -78
app.py CHANGED
@@ -45,6 +45,81 @@ llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro",google_api_key = GOOGLE_API_
45
 
46
  activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  if activities == "Symbol Analysis":
50
  ticker_user = st.text_input("Enter Ticker for NSE Stocks","")
@@ -264,7 +339,9 @@ if activities == "Symbol Analysis":
264
  # else:
265
  # st.warning(f"Failed to retrieve image. Status code: {response.status_code}")
266
  # st.warning("Response:", response.text)
267
-
 
 
268
  llm_prompt = PromptTemplate.from_template(llm_prompt_template)
269
 
270
  llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
@@ -424,82 +501,6 @@ elif activities=="News Sentiment":
424
  """
425
 
426
 
427
- def clean_google_news_url(url: str):
428
- """
429
- Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
430
- Keeps content up to .html or .cms.
431
- """
432
- for ext in [".html", ".cms"]:
433
- if ext in url:
434
- return url.split(ext)[0] + ext
435
- return url.split("&")[0] # fallback
436
- def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
437
- """
438
- Fetches news articles from Google News and returns a list of LangChain Document objects,
439
- using requests + BeautifulSoup instead of newspaper3k.
440
-
441
- Args:
442
- query (str): Search query for Google News.
443
- max_articles (int): Number of articles to fetch.
444
- timeout (int): Timeout for HTTP requests.
445
-
446
- Returns:
447
- List[Document]: Parsed article content as LangChain Document objects.
448
- """
449
- st.caption(f"Fetching articles for query: '{query}'")
450
-
451
- googlenews = GoogleNews(lang="en")
452
- # Set time range to last `days` days
453
- end_date = datetime.today()
454
- days = 2
455
- start_date = end_date - timedelta(days=days)
456
- googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
457
-
458
- googlenews.search(query)
459
- articles = googlenews.result()
460
-
461
- documents = []
462
- i=1
463
- for article in articles:
464
-
465
-
466
- url = clean_google_news_url(article.get("link"))
467
- try:
468
- with st.spinner(f" Trying URL... {url}"):
469
- # st.caption()
470
- response = requests.get(url, timeout=timeout, headers={
471
- "User-Agent": "Mozilla/5.0"
472
- })
473
- response.raise_for_status()
474
- soup = BeautifulSoup(response.text, "html.parser")
475
-
476
- # Extract visible <p> tags to simulate main content
477
- paragraphs = soup.find_all("p")
478
- content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
479
-
480
- if content and len(content) > 200: # crude filter to skip empty or useless pages
481
- doc = Document(
482
- page_content=content,
483
- metadata={
484
- "source": "Google News",
485
- "title": article.get("title", ""),
486
- "published": article.get("date", ""),
487
- "link": url,
488
- }
489
- )
490
- documents.append(doc)
491
-
492
- if i > max_articles:
493
- st.caption("max articles reached...")
494
- break
495
-
496
- i+=1
497
- except Exception as e:
498
- # st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
499
- pass
500
-
501
- return documents
502
-
503
  google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
504
  docs.extend(google_docs)
505
  # st.write(docs)
@@ -528,8 +529,8 @@ elif activities=="News Sentiment":
528
 
529
  # Layout
530
  for stock in top_picks:
531
- col1,col2,col3, col4 = st.columns([1,1,1, 1])
532
  st.subheader(f"{stock['company']} ({stock['ticker']})")
 
533
  with col1:
534
  st.markdown(f"**📰 Critical News:** {stock['critical_news']}")
535
  with col2:
 
45
 
46
  activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])
47
 
48
+ def clean_google_news_url(url: str):
49
+ """
50
+ Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
51
+ Keeps content up to .html or .cms.
52
+ """
53
+ for ext in [".html", ".cms"]:
54
+ if ext in url:
55
+ return url.split(ext)[0] + ext
56
+ return url.split("&")[0] # fallback
57
+ def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
58
+ """
59
+ Fetches news articles from Google News and returns a list of LangChain Document objects,
60
+ using requests + BeautifulSoup instead of newspaper3k.
61
+
62
+ Args:
63
+ query (str): Search query for Google News.
64
+ max_articles (int): Number of articles to fetch.
65
+ timeout (int): Timeout for HTTP requests.
66
+
67
+ Returns:
68
+ List[Document]: Parsed article content as LangChain Document objects.
69
+ """
70
+ st.caption(f"Fetching articles for query: '{query}'")
71
+
72
+ googlenews = GoogleNews(lang="en")
73
+ # Set time range to last `days` days
74
+ end_date = datetime.today()
75
+ days = 2
76
+ start_date = end_date - timedelta(days=days)
77
+ googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
78
+
79
+ googlenews.search(query)
80
+ articles = googlenews.result()
81
+
82
+ documents = []
83
+ i=1
84
+ for article in articles:
85
+
86
+
87
+ url = clean_google_news_url(article.get("link"))
88
+ try:
89
+ with st.spinner(f" Trying URL... {url}"):
90
+ # st.caption()
91
+ response = requests.get(url, timeout=timeout, headers={
92
+ "User-Agent": "Mozilla/5.0"
93
+ })
94
+ response.raise_for_status()
95
+ soup = BeautifulSoup(response.text, "html.parser")
96
+
97
+ # Extract visible <p> tags to simulate main content
98
+ paragraphs = soup.find_all("p")
99
+ content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
100
+
101
+ if content and len(content) > 200: # crude filter to skip empty or useless pages
102
+ doc = Document(
103
+ page_content=content,
104
+ metadata={
105
+ "source": "Google News",
106
+ "title": article.get("title", ""),
107
+ "published": article.get("date", ""),
108
+ "link": url,
109
+ }
110
+ )
111
+ documents.append(doc)
112
+
113
+ if i > max_articles:
114
+ st.caption("max articles reached...")
115
+ break
116
+
117
+ i+=1
118
+ except Exception as e:
119
+ # st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
120
+ pass
121
+
122
+ return documents
123
 
124
  if activities == "Symbol Analysis":
125
  ticker_user = st.text_input("Enter Ticker for NSE Stocks","")
 
339
  # else:
340
  # st.warning(f"Failed to retrieve image. Status code: {response.status_code}")
341
  # st.warning("Response:", response.text)
342
+
343
+ google_docs = get_google_news_documents(f"Trending News for {ticker_user}", max_articles=10)
344
+ docs.extend(google_docs)
345
  llm_prompt = PromptTemplate.from_template(llm_prompt_template)
346
 
347
  llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
 
501
  """
502
 
503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
505
  docs.extend(google_docs)
506
  # st.write(docs)
 
529
 
530
  # Layout
531
  for stock in top_picks:
 
532
  st.subheader(f"{stock['company']} ({stock['ticker']})")
533
+ col1,col2,col3, col4 = st.columns([1,1,1, 1])
534
  with col1:
535
  st.markdown(f"**📰 Critical News:** {stock['critical_news']}")
536
  with col2: