Spaces:

rajat5ranjan
/

StockMarketInsights

Running

App Files Files Community

StockMarketInsights / app.py

rajat5ranjan

Update app.py

bbcb8ba verified 16 days ago

raw

history blame

26.2 kB

	import streamlit as st
	import os
	import getpass
	from langchain import PromptTemplate
	from langchain import hub
	from langchain.docstore.document import Document
	from langchain.document_loaders import WebBaseLoader
	from langchain.schema import StrOutputParser
	from langchain.schema.prompt_template import format_document
	from langchain.schema.runnable import RunnablePassthrough
	import google.generativeai as genai
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.chains.llm import LLMChain
	from langchain.chains import StuffDocumentsChain
	from langchain_core.messages import HumanMessage
	import requests
	from tradingview_ta import TA_Handler, Interval
	import yfinance as yf
	from datetime import datetime, timedelta
	from newsapi import NewsApiClient
	import json
	import pandas as pd
	import numpy as np
	import altair as alt
	from GoogleNews import GoogleNews
	from bs4 import BeautifulSoup
	import requests
	from urllib.parse import urlparse, urlunparse
	# from langchain.embeddings import OpenAIEmbeddings
	from stock_vector_db import *
	from datetime import datetime


	# Set your HF dataset repo and token
	HF_REPO_ID = "rajat5ranjan/stock-insights"
	HF_TOKEN = st.secrets["hf_token"] # Store your HF token safely in Streamlit secrets


	st.set_page_config(layout="wide")

	GOOGLE_API_KEY=os.environ['GOOGLE_API_KEY']

	st.title('Stock Market Insights')
	st.sidebar.image("https://myndroot.com/wp-content/uploads/2023/12/Gemini-Dext.jpg",width =100)
	st.sidebar.markdown("The App uses Google Gemini API for Text and Vision along with 🦜️🔗 LangChain")
	st.sidebar.info("Know more about [NSE Tickers](https://www.google.com/search?q=nse+tickers+list&sca_esv=a6c39f4d03c5324c&sca_upv=1&rlz=1C1GCEB_enIN1011IN1011&sxsrf=ADLYWILQPbew-0SrvUUWpI8Y29_uOOgbvA%3A1716470016765&ei=AEFPZp-zLvzHp84P_ZWtuA0&oq=NSE+Tickers+&gs_lp=Egxnd3Mtd2l6LXNlcnAiDE5TRSBUaWNrZXJzICoCCAAyBRAAGIAEMggQABgWGAoYHjIGEAAYFhgeMgYQABgWGB4yBhAAGBYYHjIGEAAYFhgeMgYQABgWGB4yBhAAGBYYHjILEAAYgAQYhgMYigUyCxAAGIAEGIYDGIoFSIIbUL0PWL0PcAF4AZABAJgB8QKgAfECqgEDMy0xuAEByAEA-AEBmAICoAKKA8ICChAAGLADGNYEGEeYAwCIBgGQBgiSBwUxLjMtMaAHtQU&sclient=gws-wiz-serp)")

	st.sidebar.info("Know more about [Charts](https://chart-img.com/)")

	gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro",google_api_key = GOOGLE_API_KEY)
	#llm_vis = ChatGoogleGenerativeAI(model="gemini-pro-vision",google_api_key = GOOGLE_API_KEY)


	activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])

	def clean_google_news_url(url: str):
	"""
	Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
	Keeps content up to .html or .cms.
	"""
	for ext in [".html", ".cms"]:
	if ext in url:
	return url.split(ext)[0] + ext
	return url.split("&")[0] # fallback
	def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
	"""
	Fetches news articles from Google News and returns a list of LangChain Document objects,
	using requests + BeautifulSoup instead of newspaper3k.

	Args:
	query (str): Search query for Google News.
	max_articles (int): Number of articles to fetch.
	timeout (int): Timeout for HTTP requests.

	Returns:
	List[Document]: Parsed article content as LangChain Document objects.
	"""
	st.sidebar.caption(f"Fetching articles for query: '{query}'")

	googlenews = GoogleNews(lang="en")
	# Set time range to last `days` days
	end_date = datetime.today()
	days = 2
	start_date = end_date - timedelta(days=days)
	googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))

	googlenews.search(query)
	articles = googlenews.result()

	documents = []
	i=1
	for article in articles:


	url = clean_google_news_url(article.get("link"))
	try:
	with st.spinner(f" Trying URL... {url}"):
	# st.caption()
	response = requests.get(url, timeout=timeout, headers={
	"User-Agent": "Mozilla/5.0"
	})
	response.raise_for_status()
	soup = BeautifulSoup(response.text, "html.parser")

	# Extract visible <p> tags to simulate main content
	paragraphs = soup.find_all("p")
	content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])

	if content and len(content) > 200: # crude filter to skip empty or useless pages
	doc = Document(
	page_content=content,
	metadata={
	"source": "Google News",
	"title": article.get("title", ""),
	"published": article.get("date", ""),
	"link": url,
	}
	)
	documents.append(doc)

	if i > max_articles:
	st.caption("max articles reached...")
	break

	i+=1
	except Exception as e:
	# st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
	pass

	return documents

	if activities == "Symbol Analysis":
	ticker_user = st.text_input("Enter Ticker for NSE Stocks","")
	def get_tradingview_analysis(symbol, exchange, screener, interval):
	try:
	stock = TA_Handler(
	symbol=symbol,
	screener=screener,
	exchange=exchange,
	interval=interval,
	)
	analysis_summary = stock.get_analysis()
	return analysis_summary
	except Exception as e:
	st.error("Kindly enter correct symbol/ticker...")
	st.stop()


	if ticker_user!="":


	# st.sidebar.subheader('Prompt')
	# user_prompt = st.sidebar.text_area("Enter Prompt",llm_prompt_template)
	#https://huggingface.co/spaces/pradeepodela/Stock-Analyser/blob/main/app.py
	interval = Interval.INTERVAL_1_DAY
	analysis_summary = get_tradingview_analysis(
	symbol=ticker_user,
	exchange="NSE",
	screener="india",
	interval=interval,
	)

	# st.title("Analysis Summary")
	# st.dataframe(analysis_summary.summary)
	# query = f"{ticker_user} stock"

	details = {
	"symbol": ticker_user,
	"exchange": "NSE",
	"screener": "india",
	"interval": interval,
	}
	# st.title("Details")
	# st.dataframe(details)

	# st.title("Oscillator Analysis")
	# st.dataframe(analysis_summary.oscillators)

	# st.title("Moving Averages")
	# st.dataframe(analysis_summary.moving_averages)

	# st.title("Summary")
	# st.dataframe(analysis_summary.summary)

	# st.title("Indicators")
	# st.dataframe(analysis_summary.indicators)

	# Page Title
	st.subheader(f"📊 Stock Analysis: :red[{ticker_user}] ({details['exchange']})")

	# --- Row 1: Details + Summary ---
	# col1, col2 = st.columns([1, 3])
	# with col1:
	# st.write(analysis_summary.summary)
	summary= analysis_summary.summary
	BUY_PER = (summary['BUY']/(summary['BUY'] + summary['SELL']+ summary['NEUTRAL']))*100
	st.markdown(f"##### RECOMMENDATION : :red[{summary['RECOMMENDATION']}] \| BUY CONFIDENCE %: :red[{round(BUY_PER,2)}]")

	# --- Row 2: Oscillators + Moving Averages ---
	# col3, col4 = st.columns(2)
	# with col3:
	# st.subheader("⚙️ Oscillator Analysis")
	# st.dataframe(analysis_summary.oscillators, use_container_width=True)

	# with col4:
	# st.subheader("📈 Moving Averages")
	# st.dataframe(analysis_summary.moving_averages, use_container_width=True)

	# # --- Row 3: Indicators ---
	# st.subheader("🔍 Indicators")
	# st.dataframe(analysis_summary.indicators, use_container_width=True)

	# url = "https://api.chart-img.com/v2/tradingview/advanced-chart"
	# api_key = "l0iUFRSeqC9z7nDPTd1hnafPh2RrdcEy6rl6tNqV"
	# headers = {
	# "x-api-key": api_key,
	# "content-type": "application/json"
	# }
	# data = {
	# "height": 400,
	# "theme": "light",
	# "interval": "1D",
	# "session": "extended",
	# "symbol": f"NSE:{ticker_user}"
	# }

	# response = requests.post(url, headers=headers, json=data)

	# if response.status_code == 200:
	# with open("chart_t1.jpg", "wb") as f:
	# f.write(response.content)
	# with col2:
	# st.image("chart_t1.jpg", caption='')
	# else:
	# st.warning(f"Failed to retrieve image. Status code: {response.status_code}")
	# st.warning("Response:", response.text)


	url1 = f"https://www.google.com/finance/quote/{ticker_user}:NSE?hl=en"
	url2 = f"https://in.tradingview.com/symbols/NSE-{ticker_user}/"
	url3 = f"https://in.tradingview.com/symbols/NSE-{ticker_user}/news/"
	url4 = f"https://in.tradingview.com/symbols/NSE-{ticker_user}/minds/"

	loader = WebBaseLoader([url1,url2,url3,url4])
	docs = loader.load()


	st.divider()
	# llm_prompt_template = """You are an expert Stock Market Trader for stock market insights based on fundamental, analytical, profit based and company financials.
	# Based on the context below
	# {context}, Summarize the stock based on Historical data based on fundamental, price, news, sentiment , any red flags and suggest rating of the Stock in a 1 to 10 Scale"""

	llm_prompt_template = """You are an expert Stock Market Trader specializing in stock market insights derived from fundamental analysis, analytical trends, profit-based evaluations, news indicators from different sites and detailed company financials.
	Using your expertise, please analyze the stock based on the provided context below.

	Context:
	{input_documents}

	Task:
	Summarize the stock based on its historical and current data. Keep it CONCISE & BRIEF.
	Evaluate the stock on the following parameters:
	1. Company Fundamentals: Assess the stock's intrinsic value, growth potential, and financial health.
	2. Current & Future Price Trends: Analyze historical price movements and current price trends.
	3. News and Sentiment: Review recent news articles, press releases, and social media sentiment.
	4. Red Flags: Identify any potential risks or warning signs.
	5. Provide a rating for the stock on a scale of 1 to 10.
	6. Advise if the stock is a good buy for the next 1,5, 10 weeks.
	7. Suggest at what price we need to buy and hold or sell the stock

	PROVIDE THE DETAILS based on just the FACTS present in the document
	PROVIDE THE DETAILS IN an JSON Object. Stick to the below JSON object
	{{
	"stock_summary": {{
	"company_name": "",
	"ticker": "",
	"exchange": "",
	"description": "",
	"current_price": "",
	"market_cap": "",
	"historical_performance": {{
	"5_day": "",
	"1_month": "",
	"6_months": "",
	"1_year": "",
	"5_years": ""
	}}
	}},
	"evaluation_parameters": {{
	"company_fundamentals": {{
	"assessment": "",
	"key_metrics": {{
	"pe_ratio": "",
	"volume":"",
	"revenue_growth_yoy": "",
	"net_income_growth_yoy": "",
	"eps_growth_yoy": "",
	"dividend_yield": "",
	"balance_sheet": "",
	"return_on_capital": ""
	}}
	}},
	"current_and_future_price_trends": {{
	"assessment": "",
	"historical_trends": "",
	"current_trends": "",
	"technical_analysis_notes": "",
	"technical_indicators":""
	}},
	"news_and_sentiment": {{
	"assessment": "",
	"positive_sentiment": [
	"",
	"",
	""
	],
	"negative_sentiment": [
	"",
	"",
	""
	]
	}},
	"red_flags": [
	{{
	"flag": "",
	"details": ""
	}},
	{{
	"flag": "",
	"details": ""
	}},
	{{
	"flag": "",
	"details": ""
	}}
	]
	}},
	"overall_rating": {{
	"rating": "ranging from 1 to 10, 1 being low rated, 10 being highly rated",
	"justification": ""
	}},
	"investment_advice": {{
	"next_1_weeks_outlook": "",
	"next_5_weeks_outlook": "",
	"next_10_weeks_outlook": "",
	"price_action_suggestions": {{
	"buy": "",
	"hold": "",
	"sell": ""
	}}
	}}
	}}
	"""



	google_docs = get_google_news_documents(f"Trending News for {ticker_user}", max_articles=10)
	docs.extend(google_docs)
	llm_prompt = PromptTemplate.from_template(llm_prompt_template)

	llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
	stuff_chain = StuffDocumentsChain(llm_chain=llm_chain,document_variable_name="input_documents")

	# res = stuff_chain.invoke(docs)
	res = stuff_chain.invoke({"input_documents": docs})
	try:
	raw_text = res["output_text"]

	# Remove markdown code block delimiters if present
	if raw_text.startswith("```json"):
	raw_text = raw_text[len("```json"):]

	if raw_text.endswith("```"):
	raw_text = raw_text[:-3]

	# Also strip leading/trailing whitespace/newlines
	raw_text = raw_text.strip()


	data = json.loads(raw_text)
	# data = res["output_text"]
	# Header Info
	st.markdown(f"### {data['stock_summary']['company_name']} ({data['stock_summary']['ticker']}) \| {data['stock_summary']['exchange']}")
	st.markdown(f"Description: {data['stock_summary']['description']}")

	# === Row 1: Price and Market Cap ===
	row1 = st.columns(3)
	row1[0].metric("💰 Current Price", data["stock_summary"]["current_price"])
	row1[1].metric("🏢 Market Cap", data["stock_summary"]["market_cap"])
	row1[2].metric("⭐ Rating", data['overall_rating']['rating'])

	# === Row 2: Historical Performance ===
	st.subheader("📊 Historical Performance")
	perf_cols = st.columns(len(data["stock_summary"]["historical_performance"]))
	for i, (k, v) in enumerate(data["stock_summary"]["historical_performance"].items()):
	perf_cols[i].metric(k.replace("_", " ").title(), v)

	# === Row 3: Fundamentals ===
	st.subheader("📘 Company Fundamentals")
	row3 = st.columns(4)
	metrics = data["evaluation_parameters"]["company_fundamentals"]["key_metrics"]
	row3[0].metric("P/E Ratio", metrics["pe_ratio"])
	row3[1].metric("EPS YoY", metrics["eps_growth_yoy"])
	row3[2].metric("Revenue YoY", metrics["revenue_growth_yoy"])
	row3[3].metric("Dividend Yield", metrics["dividend_yield"])

	row3b = st.columns(4)
	row3b[0].metric("Net Income YoY", metrics["net_income_growth_yoy"])
	row3b[1].metric("Volume", metrics["volume"])
	row3b[2].metric("Return on Capital", metrics["return_on_capital"])
	row3b[3].metric("Balance Sheet", metrics["balance_sheet"])

	st.info(data["evaluation_parameters"]["company_fundamentals"]["assessment"])

	# === Row 4: Trends and Technicals ===
	st.subheader("📈 Trends & Technical Analysis")
	row4 = st.columns(3)
	row4[0].markdown(f"Historical Trends: {data['evaluation_parameters']['current_and_future_price_trends']['historical_trends']}")
	row4[1].markdown(f"Current Trends: {data['evaluation_parameters']['current_and_future_price_trends']['current_trends']}")
	row4[2].markdown(f"Technical Indicators: {data['evaluation_parameters']['current_and_future_price_trends']['technical_indicators']}")

	st.success(data["evaluation_parameters"]["current_and_future_price_trends"]["assessment"])
	st.caption(f"📝 Notes: {data['evaluation_parameters']['current_and_future_price_trends']['technical_analysis_notes']}")

	# === Row 5: Sentiment ===
	st.subheader("📰 News & Sentiment")
	sentiment_cols = st.columns(2)
	with sentiment_cols[0]:
	st.success("👍 Positive Sentiment")
	for s in data["evaluation_parameters"]["news_and_sentiment"]["positive_sentiment"]:
	st.write(f"✅ {s}")
	with sentiment_cols[1]:
	st.error("👎 Negative Sentiment")
	for s in data["evaluation_parameters"]["news_and_sentiment"]["negative_sentiment"]:
	st.write(f"❌ {s}")
	st.info(data["evaluation_parameters"]["news_and_sentiment"]["assessment"])

	# === Row 6: Red Flags ===
	st.subheader("🚩 Red Flags")
	red_flag_cols = st.columns(3)
	for i, flag in enumerate(data["evaluation_parameters"]["red_flags"]):
	red_flag_cols[i].warning(f"{flag['flag']}\n{flag['details']}")

	# === Row 7: Investment Advice ===
	st.subheader("💡 Investment Advice")
	advice_cols = st.columns(3)
	advice = data["investment_advice"]
	advice_cols[0].markdown(f"Next 1 Week\n{advice['next_1_weeks_outlook']}")
	advice_cols[1].markdown(f"Next 5 Weeks\n{advice['next_5_weeks_outlook']}")
	advice_cols[2].markdown(f"Next 10 Weeks\n{advice['next_10_weeks_outlook']}")

	action_cols = st.columns(3)
	action_cols[0].success(f"Buy: {advice['price_action_suggestions']['buy']}")
	action_cols[1].info(f"Hold: {advice['price_action_suggestions']['hold']}")
	action_cols[2].error(f"Sell: {advice['price_action_suggestions']['sell']}")

	# === Footer ===
	st.caption("Generated by AI-powered financial analysis dashboard.")
	except json.JSONDecodeError as e:
	st.error(f"JSON decode error: {e}")
	st.write("Raw text was:")
	st.text(res["output_text"])
	elif activities=="News Sentiment":



	# Initialize embedding model and vector DB once per session (cache if needed)
	# embedding_model = OpenAIEmbeddings()
	vector_db = HFVectorDB(hf_repo_id=HF_REPO_ID, hf_token=HF_TOKEN, embedding_model=gemini_embeddings)


	# log_df = pd.DataFrame(db.log_data)
	# if log_df.empty:
	# st.write("No log")
	# else:
	# log_df["date"] = pd.to_datetime(log_df["date"])

	# with st.sidebar.expander("🕒 History Filters"):
	# selected_date = st.date_input("Pick a date", value=datetime.now().date())
	# filtered = log_df[log_df["date"] == pd.to_datetime(selected_date)]

	# st.write(f"Found {len(filtered)} entries on {selected_date}")
	# for _, row in filtered.iterrows():
	# st.markdown(f"{row['company']} ({row['ticker']}) — {row['sentiment']} → {row['action']}")
	# st.caption(f"Reason: {row['reason']}")




	if st.button("Get Live Updates..."):
	url1 = f"https://economictimes.indiatimes.com/markets/stocks/news"
	url2 = f"https://www.livemint.com/market/stock-market-news/"
	url3 = f"https://in.tradingview.com/ideas/editors-picks/?type=trade"
	url4 = f"https://pulse.zerodha.com/"
	url5 = "https://upstox.com/news/market-news/stocks/"
	# url6 = "https://trendlyne.com/market-insights/"

	loader = WebBaseLoader([url1,
	url2,
	url3,
	url4,
	url5,
	# url6
	])
	docs = loader.load()
	# st.write(docs)
	st.divider()

	llm_prompt_template = """You are an expert Stock Market Trader specializing in stock market insights derived from fundamental analysis, analytical trends, profit-based evaluations, news indicators from different sites and detailed company financials.
	You will receive stock market news articles or stocks in news from various news websites which have India stock news feed. For the below context/input_documents, perform the following tasks:

	Context:
	{input_documents}

	1. Top picks: After analyzing all provided data, rank the top 5-10 stocks to look at this week, including tickers, current sentiment, and why they made the list.
	2. Identify the stock(s) mentioned (by ticker and company name).
	3. Sentiment analysis: classify as Bullish, Bearish, or Neutral.
	4. Extract critical news: What is the main event or update? (e.g., earnings beat, regulatory approval, management change, major contract or macro impact).
	5. Summarize impact: Briefly explain how this news might affect stock price and investor behavior (e.g., “could boost investor confidence”, “sign indicates profit pressure”, etc.).
	6. Actionable signal: Based on the sentiment and news, suggest whether this is a “Buy”, “Sell”, “Hold”, or “Watch” recommendation, and the rationale.

	PROVIDE THE DETAILS based on just the FACTS present in the document. Do NOT DUPLICATE the Output & hallucinate.
	*Format your output as JSON* with the following structure:

	```json
	{{
	"top_picks": [
	{{
	"ticker": "TICKER",
	"company": "Company Name",
	"sentiment": "Bullish\|Bearish\|Neutral",
	"critical_news": "Brief summary of the key event",
	"impact_summary": "How this may affect the stock",
	"action": "Buy\|Sell\|Hold\|Watch",
	"reason": "Why this stock ranks among top picks"
	}},
	...
	]
	}}

	"""


	google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
	docs.extend(google_docs)
	# st.write(docs)
	llm_prompt = PromptTemplate.from_template(llm_prompt_template)

	llm_chain = LLMChain(llm=llm,prompt=llm_prompt)
	stuff_chain = StuffDocumentsChain(llm_chain=llm_chain,document_variable_name="input_documents")

	# res = stuff_chain.invoke(docs)
	res = stuff_chain.invoke({"input_documents": docs})
	raw_text = res["output_text"]
	# Remove markdown code block delimiters if present
	if raw_text.startswith("```json"):
	raw_text = raw_text[len("```json"):]

	if raw_text.endswith("```"):
	raw_text = raw_text[:-3]

	# Also strip leading/trailing whitespace/newlines
	raw_text = raw_text.strip()

	# Parse JSON
	parsed_data = json.loads(raw_text)
	top_picks = parsed_data.get("top_picks", [])

	# Save LLM output to FAISS DB
	today = datetime.now()
	# db.store_top_picks(top_picks, today)

	# Add docs to vector DB and save/upload index
	vector_db.add_documents(docs)


	# Save top picks json for backtesting
	save_top_picks_json(top_picks, today, path="top_picks.jsonl")

	# Optionally add top picks as documents to vector DB
	add_top_picks_to_vector_db(vector_db, top_picks, today)

	# Layout
	for stock in top_picks:
	st.subheader(f"{stock['company']} ({stock['ticker']})")
	col1,col2,col3, col4 = st.columns([1,1,1, 1])
	with col1:
	st.markdown(f"📰 Critical News: {stock['critical_news']}")
	with col2:
	st.markdown(f"📈 Impact Summary: {stock['impact_summary']}")
	with col3:
	st.markdown(f"💡 Reason for Top Pick: {stock['reason']}")
	with col4:
	sentiment_color = {
	"Bullish": "🟢 Bullish",
	"Bearish": "🔴 Bearish",
	"Neutral": "🟡 Neutral"
	}.get(stock["sentiment"], stock["sentiment"])
	st.metric(label="Sentiment", value=sentiment_color)
	st.markdown(f"🚦 Action: :red[{stock['action']}]")
	else:
	pass
	st.divider()



	else:
	pass

	# Add copyright line at the bottom
	st.markdown(
	"""
	<hr>
	<p style="text-align:center; font-size:12px; color:gray;">
	© 2025 RAJAT RANJAN. All rights reserved.
	</p>
	""",
	unsafe_allow_html=True
	)