Spaces:
Running
Running
Sigrid De los Santos
commited on
Commit
·
97063b2
1
Parent(s):
9c57dcd
Remove remaining binary file for Hugging Face
Browse files- src/news_analysis.py +177 -27
src/news_analysis.py
CHANGED
@@ -1,3 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import csv
|
3 |
from datetime import datetime
|
@@ -7,15 +175,11 @@ from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
|
|
7 |
import requests
|
8 |
from dotenv import load_dotenv
|
9 |
from fin_interpreter import analyze_article
|
10 |
-
from tavily import TavilyClient
|
11 |
|
12 |
-
# === Load environment
|
13 |
load_dotenv()
|
14 |
OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
|
15 |
-
TAVILY_KEY =
|
16 |
-
|
17 |
-
# === Initialize Tavily Client ===
|
18 |
-
tavily_client = TavilyClient(api_key=TAVILY_KEY)
|
19 |
|
20 |
# === Get OpenAI client when needed ===
|
21 |
def get_llm():
|
@@ -31,8 +195,9 @@ def get_related_terms(topic):
|
|
31 |
response = llm.invoke(prompt)
|
32 |
return response.content.split(",")
|
33 |
|
|
|
34 |
def tavily_search(query, days, max_results=10):
|
35 |
-
api_key = os.
|
36 |
url = "https://api.tavily.com/search"
|
37 |
headers = {"Authorization": f"Bearer {api_key}"}
|
38 |
payload = {
|
@@ -77,28 +242,13 @@ def fetch_deep_news(topic, days):
|
|
77 |
for query in all_queries:
|
78 |
try:
|
79 |
print(f"🔍 Tavily query: {query}")
|
80 |
-
response =
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
"Content-Type": "application/json"
|
85 |
-
},
|
86 |
-
json={
|
87 |
-
"query": query,
|
88 |
-
"search_depth": "advanced",
|
89 |
-
"topic": "news",
|
90 |
-
"days": int(days),
|
91 |
-
"max_results": 10,
|
92 |
-
"include_answer": False,
|
93 |
-
"include_raw_content": False
|
94 |
-
}
|
95 |
-
)
|
96 |
-
|
97 |
-
if response.status_code != 200:
|
98 |
-
print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
|
99 |
continue
|
100 |
|
101 |
-
for item in response.
|
102 |
url = item.get("url")
|
103 |
content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
104 |
if url and url not in seen_urls and len(content) > 150:
|
|
|
1 |
+
# import os
|
2 |
+
# import csv
|
3 |
+
# from datetime import datetime
|
4 |
+
# from langchain_openai import ChatOpenAI
|
5 |
+
# from langchain_core.prompts import PromptTemplate
|
6 |
+
# from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
|
7 |
+
# import requests
|
8 |
+
# from dotenv import load_dotenv
|
9 |
+
# from fin_interpreter import analyze_article
|
10 |
+
# from tavily import TavilyClient
|
11 |
+
|
12 |
+
# # === Load environment or passed keys ===
|
13 |
+
# load_dotenv()
|
14 |
+
# OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
|
15 |
+
# TAVILY_KEY = os.environ.get("TAVILY_API_KEY") or os.getenv("TAVILY_KEY")
|
16 |
+
|
17 |
+
# # === Initialize Tavily Client ===
|
18 |
+
# tavily_client = TavilyClient(api_key=TAVILY_KEY)
|
19 |
+
|
20 |
+
# # === Get OpenAI client when needed ===
|
21 |
+
# def get_llm():
|
22 |
+
# openai_key = os.environ.get("OPENAI_API_KEY")
|
23 |
+
# if not openai_key:
|
24 |
+
# raise ValueError("OPENAI_API_KEY not found.")
|
25 |
+
# return ChatOpenAI(model_name="gpt-4.1", openai_api_key=openai_key)
|
26 |
+
|
27 |
+
# # === Related Terms ===
|
28 |
+
# def get_related_terms(topic):
|
29 |
+
# llm = get_llm()
|
30 |
+
# prompt = f"What are 5 closely related financial or industry terms to '{topic}'?"
|
31 |
+
# response = llm.invoke(prompt)
|
32 |
+
# return response.content.split(",")
|
33 |
+
|
34 |
+
# def tavily_search(query, days, max_results=10):
|
35 |
+
# api_key = os.getenv("TAVILY_KEY")
|
36 |
+
# url = "https://api.tavily.com/search"
|
37 |
+
# headers = {"Authorization": f"Bearer {api_key}"}
|
38 |
+
# payload = {
|
39 |
+
# "query": query,
|
40 |
+
# "search_depth": "advanced",
|
41 |
+
# "topic": "news",
|
42 |
+
# "days": int(days),
|
43 |
+
# "max_results": max_results,
|
44 |
+
# "include_answer": False,
|
45 |
+
# "include_raw_content": False
|
46 |
+
# }
|
47 |
+
# response = requests.post(url, json=payload, headers=headers)
|
48 |
+
# return response.json()
|
49 |
+
|
50 |
+
# # === Smart News Search ===
|
51 |
+
# def fetch_deep_news(topic, days):
|
52 |
+
# all_results = []
|
53 |
+
# seen_urls = set()
|
54 |
+
|
55 |
+
# base_queries = [
|
56 |
+
# topic,
|
57 |
+
# f"{topic} AND startup",
|
58 |
+
# f"{topic} AND acquisition OR merger OR funding",
|
59 |
+
# f"{topic} AND CEO OR executive OR leadership",
|
60 |
+
# f"{topic} AND venture capital OR Series A OR Series B",
|
61 |
+
# f"{topic} AND government grant OR approval OR contract",
|
62 |
+
# f"{topic} AND underrated OR small-cap OR micro-cap"
|
63 |
+
# ]
|
64 |
+
|
65 |
+
# investor_queries = [
|
66 |
+
# f"{topic} AND BlackRock OR Vanguard OR SoftBank",
|
67 |
+
# f"{topic} AND Elon Musk OR Sam Altman OR Peter Thiel",
|
68 |
+
# f"{topic} AND Berkshire Hathaway OR Warren Buffett",
|
69 |
+
# f"{topic} AND institutional investor OR hedge fund",
|
70 |
+
# ]
|
71 |
+
|
72 |
+
# related_terms = get_related_terms(topic)
|
73 |
+
# synonym_queries = [f"{term} AND {kw}" for term in related_terms for kw in ["startup", "funding", "merger", "acquisition"]]
|
74 |
+
|
75 |
+
# all_queries = base_queries + investor_queries + synonym_queries
|
76 |
+
|
77 |
+
# for query in all_queries:
|
78 |
+
# try:
|
79 |
+
# print(f"🔍 Tavily query: {query}")
|
80 |
+
# response = requests.post(
|
81 |
+
# url="https://api.tavily.com/search",
|
82 |
+
# headers={
|
83 |
+
# "Authorization": f"Bearer {TAVILY_KEY}",
|
84 |
+
# "Content-Type": "application/json"
|
85 |
+
# },
|
86 |
+
# json={
|
87 |
+
# "query": query,
|
88 |
+
# "search_depth": "advanced",
|
89 |
+
# "topic": "news",
|
90 |
+
# "days": int(days),
|
91 |
+
# "max_results": 10,
|
92 |
+
# "include_answer": False,
|
93 |
+
# "include_raw_content": False
|
94 |
+
# }
|
95 |
+
# )
|
96 |
+
|
97 |
+
# if response.status_code != 200:
|
98 |
+
# print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
|
99 |
+
# continue
|
100 |
+
|
101 |
+
# for item in response.json().get("results", []):
|
102 |
+
# url = item.get("url")
|
103 |
+
# content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
104 |
+
# if url and url not in seen_urls and len(content) > 150:
|
105 |
+
# all_results.append({
|
106 |
+
# "title": item.get("title"),
|
107 |
+
# "url": url,
|
108 |
+
# "content": content
|
109 |
+
# })
|
110 |
+
# seen_urls.add(url)
|
111 |
+
|
112 |
+
# except Exception as e:
|
113 |
+
# print(f"⚠️ Tavily request failed for query '{query}': {e}")
|
114 |
+
|
115 |
+
# print(f"📰 Total articles collected: {len(all_results)}")
|
116 |
+
# return all_results
|
117 |
+
|
118 |
+
# # === Generate Markdown Report ===
|
119 |
+
# def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400):
|
120 |
+
# news_results = news_results[:max_articles]
|
121 |
+
|
122 |
+
# for item in news_results:
|
123 |
+
# result = analyze_article(item["content"])
|
124 |
+
# item["fin_sentiment"] = result.get("sentiment", "neutral")
|
125 |
+
# item["fin_confidence"] = result.get("confidence", 0.0)
|
126 |
+
# item["investment_decision"] = result.get("investment_decision", "Watch")
|
127 |
+
|
128 |
+
# article_summary = "".join(
|
129 |
+
# f"- **{item['title']}**: {item['content'][:max_chars_per_article]}... "
|
130 |
+
# f"(Sentiment: {item['fin_sentiment'].title()}, Confidence: {item['fin_confidence']:.2f}, "
|
131 |
+
# f"Decision: {item['investment_decision']}) [link]({item['url']})\n"
|
132 |
+
# for item in news_results
|
133 |
+
# )
|
134 |
+
|
135 |
+
# prompt = PromptTemplate.from_template("""
|
136 |
+
# You're a highly focused value investor. Analyze this week's news on "{Topic}".
|
137 |
+
|
138 |
+
# Your goal is to uncover:
|
139 |
+
# - Meaningful events (e.g., CEO joining a startup, insider buys, big-name partnerships)
|
140 |
+
# - Startups or small caps that may signal undervalued opportunity
|
141 |
+
# - Connections to key individuals or institutions (e.g., Elon Musk investing, Sam Altman joining)
|
142 |
+
# - Companies with strong fundamentals: low P/E, low P/B, high ROE, recent IPOs, moats, or high free cash flow
|
143 |
+
|
144 |
+
# ### News
|
145 |
+
# {ArticleSummaries}
|
146 |
+
|
147 |
+
# Write a markdown memo with:
|
148 |
+
# 1. **Key Value Signals**
|
149 |
+
# 2. **Stocks or Startups to Watch**
|
150 |
+
# 3. **What Smart Money Might Be Acting On**
|
151 |
+
# 4. **References**
|
152 |
+
# 5. **Investment Hypothesis**
|
153 |
+
|
154 |
+
# Include context and macroeconomic/regulatory angles. Add an intro on sentiment and market trends for the week.
|
155 |
+
# """)
|
156 |
+
|
157 |
+
# chat_prompt = ChatPromptTemplate.from_messages([
|
158 |
+
# SystemMessagePromptTemplate(prompt=prompt)
|
159 |
+
# ])
|
160 |
+
# prompt_value = chat_prompt.format_prompt(
|
161 |
+
# Topic=topic,
|
162 |
+
# ArticleSummaries=article_summary
|
163 |
+
# ).to_messages()
|
164 |
+
|
165 |
+
# llm = get_llm()
|
166 |
+
# result = llm.invoke(prompt_value)
|
167 |
+
# return result.content
|
168 |
+
|
169 |
import os
|
170 |
import csv
|
171 |
from datetime import datetime
|
|
|
175 |
import requests
|
176 |
from dotenv import load_dotenv
|
177 |
from fin_interpreter import analyze_article
|
|
|
178 |
|
179 |
+
# === Load environment ===
|
180 |
load_dotenv()
|
181 |
OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
|
182 |
+
TAVILY_KEY = None # Will be accessed dynamically at runtime
|
|
|
|
|
|
|
183 |
|
184 |
# === Get OpenAI client when needed ===
|
185 |
def get_llm():
|
|
|
195 |
response = llm.invoke(prompt)
|
196 |
return response.content.split(",")
|
197 |
|
198 |
+
# === Tavily Search ===
|
199 |
def tavily_search(query, days, max_results=10):
|
200 |
+
api_key = os.environ.get("TAVILY_API_KEY") or TAVILY_KEY
|
201 |
url = "https://api.tavily.com/search"
|
202 |
headers = {"Authorization": f"Bearer {api_key}"}
|
203 |
payload = {
|
|
|
242 |
for query in all_queries:
|
243 |
try:
|
244 |
print(f"🔍 Tavily query: {query}")
|
245 |
+
response = tavily_search(query, days)
|
246 |
+
|
247 |
+
if not isinstance(response, dict) or "results" not in response:
|
248 |
+
print(f"⚠️ Tavily API response issue: {response}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
continue
|
250 |
|
251 |
+
for item in response.get("results", []):
|
252 |
url = item.get("url")
|
253 |
content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
254 |
if url and url not in seen_urls and len(content) > 150:
|