Spaces:

rajat5ranjan
/

StockMarketInsights

Sleeping

App Files Files Community

rajat5ranjan commited on Jul 16

Commit

92eaa07

verified ·

1 Parent(s): dd363c0

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -70

app.py CHANGED Viewed

@@ -45,81 +45,81 @@ llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro",google_api_key = GOOGLE_API_
 activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])
-    def clean_google_news_url(url: str):
-        """
-        Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
-        Keeps content up to .html or .cms.
-        """
-        for ext in [".html", ".cms"]:
-            if ext in url:
-                return url.split(ext)[0] + ext
-        return url.split("&")[0]  # fallback
-    def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
-        """
-        Fetches news articles from Google News and returns a list of LangChain Document objects,
-        using requests + BeautifulSoup instead of newspaper3k.
-        Args:
-            query (str): Search query for Google News.
-            max_articles (int): Number of articles to fetch.
-            timeout (int): Timeout for HTTP requests.
-        Returns:
-            List[Document]: Parsed article content as LangChain Document objects.
-        """
-        st.caption(f"Fetching articles for query: '{query}'")
-        googlenews = GoogleNews(lang="en")
-        # Set time range to last `days` days
-        end_date = datetime.today()
-        days = 2
-        start_date = end_date - timedelta(days=days)
-        googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
-        googlenews.search(query)
-        articles = googlenews.result()
-        documents = []
-        i=1
-        for article in articles:
-            url = clean_google_news_url(article.get("link"))
-            try:
-                with st.spinner(f" Trying URL... {url}"):
-                    # st.caption()
-                    response = requests.get(url, timeout=timeout, headers={
-                        "User-Agent": "Mozilla/5.0"
-                    })
-                    response.raise_for_status()
-                    soup = BeautifulSoup(response.text, "html.parser")
-                    # Extract visible <p> tags to simulate main content
-                    paragraphs = soup.find_all("p")
-                    content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
-                    if content and len(content) > 200:  # crude filter to skip empty or useless pages
-                        doc = Document(
-                            page_content=content,
-                            metadata={
-                                "source": "Google News",
-                                "title": article.get("title", ""),
-                                "published": article.get("date", ""),
-                                "link": url,
-                            }
-                        )
-                        documents.append(doc)
-                    if i > max_articles:
-                        st.caption("max articles reached...")
-                        break
-                    i+=1
-            except Exception as e:
-                # st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
-                pass
-        return documents
 if activities == "Symbol Analysis":
     ticker_user = st.text_input("Enter Ticker for NSE Stocks","")

 activities = st.sidebar.selectbox("Select", ["Symbol Analysis", "News Sentiment"])
+def clean_google_news_url(url: str):
+    """
+    Cleans Google News redirect URLs by removing tracking parameters like &ved= and &usg=.
+    Keeps content up to .html or .cms.
+    """
+    for ext in [".html", ".cms"]:
+        if ext in url:
+            return url.split(ext)[0] + ext
+    return url.split("&")[0]  # fallback
+def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
+    """
+    Fetches news articles from Google News and returns a list of LangChain Document objects,
+    using requests + BeautifulSoup instead of newspaper3k.
+    Args:
+        query (str): Search query for Google News.
+        max_articles (int): Number of articles to fetch.
+        timeout (int): Timeout for HTTP requests.
+    Returns:
+        List[Document]: Parsed article content as LangChain Document objects.
+    """
+    st.caption(f"Fetching articles for query: '{query}'")
+    googlenews = GoogleNews(lang="en")
+    # Set time range to last `days` days
+    end_date = datetime.today()
+    days = 2
+    start_date = end_date - timedelta(days=days)
+    googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
+    googlenews.search(query)
+    articles = googlenews.result()
+    documents = []
+    i=1
+    for article in articles:
+        url = clean_google_news_url(article.get("link"))
+        try:
+            with st.spinner(f" Trying URL... {url}"):
+                # st.caption()
+                response = requests.get(url, timeout=timeout, headers={
+                    "User-Agent": "Mozilla/5.0"
+                })
+                response.raise_for_status()
+                soup = BeautifulSoup(response.text, "html.parser")
+                # Extract visible <p> tags to simulate main content
+                paragraphs = soup.find_all("p")
+                content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
+                if content and len(content) > 200:  # crude filter to skip empty or useless pages
+                    doc = Document(
+                        page_content=content,
+                        metadata={
+                            "source": "Google News",
+                            "title": article.get("title", ""),
+                            "published": article.get("date", ""),
+                            "link": url,
+                        }
+                    )
+                    documents.append(doc)
+                if i > max_articles:
+                    st.caption("max articles reached...")
+                    break
+                i+=1
+        except Exception as e:
+            # st.error(f"Failed to fetch or parse article: {url} — Error: {e}")
+            pass
+    return documents
 if activities == "Symbol Analysis":
     ticker_user = st.text_input("Enter Ticker for NSE Stocks","")