Spaces:

rajat5ranjan
/

StockMarketInsights

Running

rajat5ranjan commited on Jul 16

Commit

1dea348

verified ·

1 Parent(s): e44c761

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ import altair as alt
 from GoogleNews import GoogleNews
 from bs4 import BeautifulSoup
 import requests
 st.set_page_config(layout="wide")
 GOOGLE_API_KEY=os.environ['GOOGLE_API_KEY']
@@ -424,6 +424,15 @@ elif activities=="News Sentiment":
     """
     def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
         """
         Fetches news articles from Google News and returns a list of LangChain Document objects,
@@ -444,8 +453,11 @@ elif activities=="News Sentiment":
         documents = []
         for article in articles[:max_articles]:
-            url = article.get("link")
             try:
                 response = requests.get(url, timeout=timeout, headers={
                     "User-Agent": "Mozilla/5.0"
                 })

 from GoogleNews import GoogleNews
 from bs4 import BeautifulSoup
 import requests
+from urllib.parse import urlparse, urlunparse
 st.set_page_config(layout="wide")
 GOOGLE_API_KEY=os.environ['GOOGLE_API_KEY']
     """
+    def clean_google_news_url(url: str) -> str:
+        """
+        Remove Google tracking parameters (&ved=..., &usg=..., etc.) from article URLs.
+        Keeps only the main part up to .html
+        """
+        if ".html" in url:
+            return url.split(".html")[0] + ".html"
+        return url
     def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
         """
         Fetches news articles from Google News and returns a list of LangChain Document objects,
         documents = []
         for article in articles[:max_articles]:
+            url = clean_google_news_url(article.get("link"))
             try:
+                st.caption(f" Trying URL... {url}")
                 response = requests.get(url, timeout=timeout, headers={
                     "User-Agent": "Mozilla/5.0"
                 })