Sigrid De los Santos commited on
Commit
97063b2
·
1 Parent(s): 9c57dcd

Remove remaining binary file for Hugging Face

Browse files
Files changed (1) hide show
  1. src/news_analysis.py +177 -27
src/news_analysis.py CHANGED
@@ -1,3 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import csv
3
  from datetime import datetime
@@ -7,15 +175,11 @@ from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
7
  import requests
8
  from dotenv import load_dotenv
9
  from fin_interpreter import analyze_article
10
- from tavily import TavilyClient
11
 
12
- # === Load environment or passed keys ===
13
  load_dotenv()
14
  OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
15
- TAVILY_KEY = os.environ.get("TAVILY_API_KEY") or os.getenv("TAVILY_KEY")
16
-
17
- # === Initialize Tavily Client ===
18
- tavily_client = TavilyClient(api_key=TAVILY_KEY)
19
 
20
  # === Get OpenAI client when needed ===
21
  def get_llm():
@@ -31,8 +195,9 @@ def get_related_terms(topic):
31
  response = llm.invoke(prompt)
32
  return response.content.split(",")
33
 
 
34
  def tavily_search(query, days, max_results=10):
35
- api_key = os.getenv("TAVILY_KEY")
36
  url = "https://api.tavily.com/search"
37
  headers = {"Authorization": f"Bearer {api_key}"}
38
  payload = {
@@ -77,28 +242,13 @@ def fetch_deep_news(topic, days):
77
  for query in all_queries:
78
  try:
79
  print(f"🔍 Tavily query: {query}")
80
- response = requests.post(
81
- url="https://api.tavily.com/search",
82
- headers={
83
- "Authorization": f"Bearer {TAVILY_KEY}",
84
- "Content-Type": "application/json"
85
- },
86
- json={
87
- "query": query,
88
- "search_depth": "advanced",
89
- "topic": "news",
90
- "days": int(days),
91
- "max_results": 10,
92
- "include_answer": False,
93
- "include_raw_content": False
94
- }
95
- )
96
-
97
- if response.status_code != 200:
98
- print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
99
  continue
100
 
101
- for item in response.json().get("results", []):
102
  url = item.get("url")
103
  content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
104
  if url and url not in seen_urls and len(content) > 150:
 
1
+ # import os
2
+ # import csv
3
+ # from datetime import datetime
4
+ # from langchain_openai import ChatOpenAI
5
+ # from langchain_core.prompts import PromptTemplate
6
+ # from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
7
+ # import requests
8
+ # from dotenv import load_dotenv
9
+ # from fin_interpreter import analyze_article
10
+ # from tavily import TavilyClient
11
+
12
+ # # === Load environment or passed keys ===
13
+ # load_dotenv()
14
+ # OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
15
+ # TAVILY_KEY = os.environ.get("TAVILY_API_KEY") or os.getenv("TAVILY_KEY")
16
+
17
+ # # === Initialize Tavily Client ===
18
+ # tavily_client = TavilyClient(api_key=TAVILY_KEY)
19
+
20
+ # # === Get OpenAI client when needed ===
21
+ # def get_llm():
22
+ # openai_key = os.environ.get("OPENAI_API_KEY")
23
+ # if not openai_key:
24
+ # raise ValueError("OPENAI_API_KEY not found.")
25
+ # return ChatOpenAI(model_name="gpt-4.1", openai_api_key=openai_key)
26
+
27
+ # # === Related Terms ===
28
+ # def get_related_terms(topic):
29
+ # llm = get_llm()
30
+ # prompt = f"What are 5 closely related financial or industry terms to '{topic}'?"
31
+ # response = llm.invoke(prompt)
32
+ # return response.content.split(",")
33
+
34
+ # def tavily_search(query, days, max_results=10):
35
+ # api_key = os.getenv("TAVILY_KEY")
36
+ # url = "https://api.tavily.com/search"
37
+ # headers = {"Authorization": f"Bearer {api_key}"}
38
+ # payload = {
39
+ # "query": query,
40
+ # "search_depth": "advanced",
41
+ # "topic": "news",
42
+ # "days": int(days),
43
+ # "max_results": max_results,
44
+ # "include_answer": False,
45
+ # "include_raw_content": False
46
+ # }
47
+ # response = requests.post(url, json=payload, headers=headers)
48
+ # return response.json()
49
+
50
+ # # === Smart News Search ===
51
+ # def fetch_deep_news(topic, days):
52
+ # all_results = []
53
+ # seen_urls = set()
54
+
55
+ # base_queries = [
56
+ # topic,
57
+ # f"{topic} AND startup",
58
+ # f"{topic} AND acquisition OR merger OR funding",
59
+ # f"{topic} AND CEO OR executive OR leadership",
60
+ # f"{topic} AND venture capital OR Series A OR Series B",
61
+ # f"{topic} AND government grant OR approval OR contract",
62
+ # f"{topic} AND underrated OR small-cap OR micro-cap"
63
+ # ]
64
+
65
+ # investor_queries = [
66
+ # f"{topic} AND BlackRock OR Vanguard OR SoftBank",
67
+ # f"{topic} AND Elon Musk OR Sam Altman OR Peter Thiel",
68
+ # f"{topic} AND Berkshire Hathaway OR Warren Buffett",
69
+ # f"{topic} AND institutional investor OR hedge fund",
70
+ # ]
71
+
72
+ # related_terms = get_related_terms(topic)
73
+ # synonym_queries = [f"{term} AND {kw}" for term in related_terms for kw in ["startup", "funding", "merger", "acquisition"]]
74
+
75
+ # all_queries = base_queries + investor_queries + synonym_queries
76
+
77
+ # for query in all_queries:
78
+ # try:
79
+ # print(f"🔍 Tavily query: {query}")
80
+ # response = requests.post(
81
+ # url="https://api.tavily.com/search",
82
+ # headers={
83
+ # "Authorization": f"Bearer {TAVILY_KEY}",
84
+ # "Content-Type": "application/json"
85
+ # },
86
+ # json={
87
+ # "query": query,
88
+ # "search_depth": "advanced",
89
+ # "topic": "news",
90
+ # "days": int(days),
91
+ # "max_results": 10,
92
+ # "include_answer": False,
93
+ # "include_raw_content": False
94
+ # }
95
+ # )
96
+
97
+ # if response.status_code != 200:
98
+ # print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
99
+ # continue
100
+
101
+ # for item in response.json().get("results", []):
102
+ # url = item.get("url")
103
+ # content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
104
+ # if url and url not in seen_urls and len(content) > 150:
105
+ # all_results.append({
106
+ # "title": item.get("title"),
107
+ # "url": url,
108
+ # "content": content
109
+ # })
110
+ # seen_urls.add(url)
111
+
112
+ # except Exception as e:
113
+ # print(f"⚠️ Tavily request failed for query '{query}': {e}")
114
+
115
+ # print(f"📰 Total articles collected: {len(all_results)}")
116
+ # return all_results
117
+
118
+ # # === Generate Markdown Report ===
119
+ # def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400):
120
+ # news_results = news_results[:max_articles]
121
+
122
+ # for item in news_results:
123
+ # result = analyze_article(item["content"])
124
+ # item["fin_sentiment"] = result.get("sentiment", "neutral")
125
+ # item["fin_confidence"] = result.get("confidence", 0.0)
126
+ # item["investment_decision"] = result.get("investment_decision", "Watch")
127
+
128
+ # article_summary = "".join(
129
+ # f"- **{item['title']}**: {item['content'][:max_chars_per_article]}... "
130
+ # f"(Sentiment: {item['fin_sentiment'].title()}, Confidence: {item['fin_confidence']:.2f}, "
131
+ # f"Decision: {item['investment_decision']}) [link]({item['url']})\n"
132
+ # for item in news_results
133
+ # )
134
+
135
+ # prompt = PromptTemplate.from_template("""
136
+ # You're a highly focused value investor. Analyze this week's news on "{Topic}".
137
+
138
+ # Your goal is to uncover:
139
+ # - Meaningful events (e.g., CEO joining a startup, insider buys, big-name partnerships)
140
+ # - Startups or small caps that may signal undervalued opportunity
141
+ # - Connections to key individuals or institutions (e.g., Elon Musk investing, Sam Altman joining)
142
+ # - Companies with strong fundamentals: low P/E, low P/B, high ROE, recent IPOs, moats, or high free cash flow
143
+
144
+ # ### News
145
+ # {ArticleSummaries}
146
+
147
+ # Write a markdown memo with:
148
+ # 1. **Key Value Signals**
149
+ # 2. **Stocks or Startups to Watch**
150
+ # 3. **What Smart Money Might Be Acting On**
151
+ # 4. **References**
152
+ # 5. **Investment Hypothesis**
153
+
154
+ # Include context and macroeconomic/regulatory angles. Add an intro on sentiment and market trends for the week.
155
+ # """)
156
+
157
+ # chat_prompt = ChatPromptTemplate.from_messages([
158
+ # SystemMessagePromptTemplate(prompt=prompt)
159
+ # ])
160
+ # prompt_value = chat_prompt.format_prompt(
161
+ # Topic=topic,
162
+ # ArticleSummaries=article_summary
163
+ # ).to_messages()
164
+
165
+ # llm = get_llm()
166
+ # result = llm.invoke(prompt_value)
167
+ # return result.content
168
+
169
  import os
170
  import csv
171
  from datetime import datetime
 
175
  import requests
176
  from dotenv import load_dotenv
177
  from fin_interpreter import analyze_article
 
178
 
179
+ # === Load environment ===
180
  load_dotenv()
181
  OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
182
+ TAVILY_KEY = None # Will be accessed dynamically at runtime
 
 
 
183
 
184
  # === Get OpenAI client when needed ===
185
  def get_llm():
 
195
  response = llm.invoke(prompt)
196
  return response.content.split(",")
197
 
198
+ # === Tavily Search ===
199
  def tavily_search(query, days, max_results=10):
200
+ api_key = os.environ.get("TAVILY_API_KEY") or TAVILY_KEY
201
  url = "https://api.tavily.com/search"
202
  headers = {"Authorization": f"Bearer {api_key}"}
203
  payload = {
 
242
  for query in all_queries:
243
  try:
244
  print(f"🔍 Tavily query: {query}")
245
+ response = tavily_search(query, days)
246
+
247
+ if not isinstance(response, dict) or "results" not in response:
248
+ print(f"⚠️ Tavily API response issue: {response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  continue
250
 
251
+ for item in response.get("results", []):
252
  url = item.get("url")
253
  content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
254
  if url and url not in seen_urls and len(content) > 150: