Update app.py
Browse files
app.py
CHANGED
@@ -45,6 +45,81 @@ llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro",google_api_key = GOOGLE_API_
|
|
45 |
|
46 |
activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
if activities == "Symbol Analysis":
|
50 |
ticker_user = st.text_input("Enter Ticker for NSE Stocks","")
|
@@ -264,7 +339,9 @@ if activities == "Symbol Analysis":
|
|
264 |
# else:
|
265 |
# st.warning(f"Failed to retrieve image. Status code: {response.status_code}")
|
266 |
# st.warning("Response:", response.text)
|
267 |
-
|
|
|
|
|
268 |
llm_prompt = PromptTemplate.from_template(llm_prompt_template)
|
269 |
|
270 |
llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
|
@@ -424,82 +501,6 @@ elif activities=="News Sentiment":
|
|
424 |
"""
|
425 |
|
426 |
|
427 |
-
def clean_google_news_url(url: str):
|
428 |
-
"""
|
429 |
-
Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
|
430 |
-
Keeps content up to .html or .cms.
|
431 |
-
"""
|
432 |
-
for ext in [".html", ".cms"]:
|
433 |
-
if ext in url:
|
434 |
-
return url.split(ext)[0] + ext
|
435 |
-
return url.split("&")[0] # fallback
|
436 |
-
def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
|
437 |
-
"""
|
438 |
-
Fetches news articles from Google News and returns a list of LangChain Document objects,
|
439 |
-
using requests + BeautifulSoup instead of newspaper3k.
|
440 |
-
|
441 |
-
Args:
|
442 |
-
query (str): Search query for Google News.
|
443 |
-
max_articles (int): Number of articles to fetch.
|
444 |
-
timeout (int): Timeout for HTTP requests.
|
445 |
-
|
446 |
-
Returns:
|
447 |
-
List[Document]: Parsed article content as LangChain Document objects.
|
448 |
-
"""
|
449 |
-
st.caption(f"Fetching articles for query: '{query}'")
|
450 |
-
|
451 |
-
googlenews = GoogleNews(lang="en")
|
452 |
-
# Set time range to last `days` days
|
453 |
-
end_date = datetime.today()
|
454 |
-
days = 2
|
455 |
-
start_date = end_date - timedelta(days=days)
|
456 |
-
googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
|
457 |
-
|
458 |
-
googlenews.search(query)
|
459 |
-
articles = googlenews.result()
|
460 |
-
|
461 |
-
documents = []
|
462 |
-
i=1
|
463 |
-
for article in articles:
|
464 |
-
|
465 |
-
|
466 |
-
url = clean_google_news_url(article.get("link"))
|
467 |
-
try:
|
468 |
-
with st.spinner(f" Trying URL... {url}"):
|
469 |
-
# st.caption()
|
470 |
-
response = requests.get(url, timeout=timeout, headers={
|
471 |
-
"User-Agent": "Mozilla/5.0"
|
472 |
-
})
|
473 |
-
response.raise_for_status()
|
474 |
-
soup = BeautifulSoup(response.text, "html.parser")
|
475 |
-
|
476 |
-
# Extract visible <p> tags to simulate main content
|
477 |
-
paragraphs = soup.find_all("p")
|
478 |
-
content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
|
479 |
-
|
480 |
-
if content and len(content) > 200: # crude filter to skip empty or useless pages
|
481 |
-
doc = Document(
|
482 |
-
page_content=content,
|
483 |
-
metadata={
|
484 |
-
"source": "Google News",
|
485 |
-
"title": article.get("title", ""),
|
486 |
-
"published": article.get("date", ""),
|
487 |
-
"link": url,
|
488 |
-
}
|
489 |
-
)
|
490 |
-
documents.append(doc)
|
491 |
-
|
492 |
-
if i > max_articles:
|
493 |
-
st.caption("max articles reached...")
|
494 |
-
break
|
495 |
-
|
496 |
-
i+=1
|
497 |
-
except Exception as e:
|
498 |
-
# st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
|
499 |
-
pass
|
500 |
-
|
501 |
-
return documents
|
502 |
-
|
503 |
google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
|
504 |
docs.extend(google_docs)
|
505 |
# st.write(docs)
|
@@ -528,8 +529,8 @@ elif activities=="News Sentiment":
|
|
528 |
|
529 |
# Layout
|
530 |
for stock in top_picks:
|
531 |
-
col1,col2,col3, col4 = st.columns([1,1,1, 1])
|
532 |
st.subheader(f"{stock['company']} ({stock['ticker']})")
|
|
|
533 |
with col1:
|
534 |
st.markdown(f"**📰 Critical News:** {stock['critical_news']}")
|
535 |
with col2:
|
|
|
45 |
|
46 |
activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])
|
47 |
|
48 |
+
def clean_google_news_url(url: str):
|
49 |
+
"""
|
50 |
+
Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
|
51 |
+
Keeps content up to .html or .cms.
|
52 |
+
"""
|
53 |
+
for ext in [".html", ".cms"]:
|
54 |
+
if ext in url:
|
55 |
+
return url.split(ext)[0] + ext
|
56 |
+
return url.split("&")[0] # fallback
|
57 |
+
def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
|
58 |
+
"""
|
59 |
+
Fetches news articles from Google News and returns a list of LangChain Document objects,
|
60 |
+
using requests + BeautifulSoup instead of newspaper3k.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
query (str): Search query for Google News.
|
64 |
+
max_articles (int): Number of articles to fetch.
|
65 |
+
timeout (int): Timeout for HTTP requests.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
List[Document]: Parsed article content as LangChain Document objects.
|
69 |
+
"""
|
70 |
+
st.caption(f"Fetching articles for query: '{query}'")
|
71 |
+
|
72 |
+
googlenews = GoogleNews(lang="en")
|
73 |
+
# Set time range to last `days` days
|
74 |
+
end_date = datetime.today()
|
75 |
+
days = 2
|
76 |
+
start_date = end_date - timedelta(days=days)
|
77 |
+
googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
|
78 |
+
|
79 |
+
googlenews.search(query)
|
80 |
+
articles = googlenews.result()
|
81 |
+
|
82 |
+
documents = []
|
83 |
+
i=1
|
84 |
+
for article in articles:
|
85 |
+
|
86 |
+
|
87 |
+
url = clean_google_news_url(article.get("link"))
|
88 |
+
try:
|
89 |
+
with st.spinner(f" Trying URL... {url}"):
|
90 |
+
# st.caption()
|
91 |
+
response = requests.get(url, timeout=timeout, headers={
|
92 |
+
"User-Agent": "Mozilla/5.0"
|
93 |
+
})
|
94 |
+
response.raise_for_status()
|
95 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
96 |
+
|
97 |
+
# Extract visible <p> tags to simulate main content
|
98 |
+
paragraphs = soup.find_all("p")
|
99 |
+
content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
|
100 |
+
|
101 |
+
if content and len(content) > 200: # crude filter to skip empty or useless pages
|
102 |
+
doc = Document(
|
103 |
+
page_content=content,
|
104 |
+
metadata={
|
105 |
+
"source": "Google News",
|
106 |
+
"title": article.get("title", ""),
|
107 |
+
"published": article.get("date", ""),
|
108 |
+
"link": url,
|
109 |
+
}
|
110 |
+
)
|
111 |
+
documents.append(doc)
|
112 |
+
|
113 |
+
if i > max_articles:
|
114 |
+
st.caption("max articles reached...")
|
115 |
+
break
|
116 |
+
|
117 |
+
i+=1
|
118 |
+
except Exception as e:
|
119 |
+
# st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
|
120 |
+
pass
|
121 |
+
|
122 |
+
return documents
|
123 |
|
124 |
if activities == "Symbol Analysis":
|
125 |
ticker_user = st.text_input("Enter Ticker for NSE Stocks","")
|
|
|
339 |
# else:
|
340 |
# st.warning(f"Failed to retrieve image. Status code: {response.status_code}")
|
341 |
# st.warning("Response:", response.text)
|
342 |
+
|
343 |
+
google_docs = get_google_news_documents(f"Trending News for {ticker_user}", max_articles=10)
|
344 |
+
docs.extend(google_docs)
|
345 |
llm_prompt = PromptTemplate.from_template(llm_prompt_template)
|
346 |
|
347 |
llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
|
|
|
501 |
"""
|
502 |
|
503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
|
505 |
docs.extend(google_docs)
|
506 |
# st.write(docs)
|
|
|
529 |
|
530 |
# Layout
|
531 |
for stock in top_picks:
|
|
|
532 |
st.subheader(f"{stock['company']} ({stock['ticker']})")
|
533 |
+
col1,col2,col3, col4 = st.columns([1,1,1, 1])
|
534 |
with col1:
|
535 |
st.markdown(f"**📰 Critical News:** {stock['critical_news']}")
|
536 |
with col2:
|