Sigrid De los Santos commited on
Commit
a9b1809
Β·
1 Parent(s): 3e4bf85

Remove remaining binary file for Hugging Face

Browse files
Files changed (1) hide show
  1. src/main.py +62 -50
src/main.py CHANGED
@@ -3,14 +3,16 @@ import sys
3
  from datetime import datetime
4
  from dotenv import load_dotenv
5
  import pandas as pd
 
 
 
6
 
7
- from image_search import search_unsplash_image
8
  from md_html import convert_single_md_to_html as convert_md_to_html
9
  from news_analysis import fetch_deep_news, generate_value_investor_report
10
  from csv_utils import detect_changes
11
 
12
- # Setup paths
13
- BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
14
  DATA_DIR = os.path.join(BASE_DIR, "data")
15
  HTML_DIR = os.path.join(BASE_DIR, "html")
16
  CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
@@ -18,7 +20,7 @@ CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
18
  os.makedirs(DATA_DIR, exist_ok=True)
19
  os.makedirs(HTML_DIR, exist_ok=True)
20
 
21
- # Load .env
22
  load_dotenv()
23
 
24
  def build_metrics_box(topic, num_articles):
@@ -30,16 +32,33 @@ def build_metrics_box(topic, num_articles):
30
  >
31
  """
32
 
33
- def run_value_investing_analysis(csv_path, progress_callback=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  current_df = pd.read_csv(csv_path)
35
  prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
36
-
37
  if os.path.exists(prev_path):
38
  previous_df = pd.read_csv(prev_path)
39
  changed_df = detect_changes(current_df, previous_df)
40
  if changed_df.empty:
41
- if progress_callback:
42
- progress_callback("βœ… No changes detected. Skipping processing.")
43
  return []
44
  else:
45
  changed_df = current_df
@@ -49,27 +68,18 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
49
  for _, row in changed_df.iterrows():
50
  topic = row.get("topic")
51
  timespan = row.get("timespan_days", 7)
52
-
53
- if progress_callback:
54
- progress_callback(f"πŸ” Processing: {topic} ({timespan} days)")
55
 
56
  news = fetch_deep_news(topic, timespan)
57
  if not news:
58
- if progress_callback:
59
- progress_callback(f"⚠️ No news found for: {topic}")
60
  continue
61
 
62
- if progress_callback:
63
- progress_callback(f"🧠 Analyzing news for: {topic}")
64
-
65
  report_body = generate_value_investor_report(topic, news)
66
-
67
- # Use placeholder image instead of API call
68
- image_url = "https://via.placeholder.com/1281x721?text=No+Image"
69
- image_credit = "Image unavailable"
70
 
71
  metrics_md = build_metrics_box(topic, len(news))
72
- full_md = metrics_md + report_body
73
 
74
  base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
75
  filename = base_filename + ".md"
@@ -81,30 +91,22 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
81
  filepath = os.path.join(DATA_DIR, filename)
82
  counter += 1
83
 
84
- if progress_callback:
85
- progress_callback(f"πŸ“ Saving markdown for: {topic}")
86
-
87
  with open(filepath, "w", encoding="utf-8") as f:
88
  f.write(full_md)
89
 
90
  new_md_files.append(filepath)
91
 
92
- if progress_callback:
93
- progress_callback(f"βœ… Markdown reports saved to: `{DATA_DIR}`")
94
-
95
  current_df.to_csv(prev_path, index=False)
96
  return new_md_files
97
 
98
- def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
99
  os.environ["TAVILY_API_KEY"] = tavily_api_key
100
 
101
- new_md_files = run_value_investing_analysis(csv_path, progress_callback)
102
  new_html_paths = []
103
 
104
  for md_path in new_md_files:
105
- if progress_callback:
106
- progress_callback(f"🌐 Converting to HTML: {os.path.basename(md_path)}")
107
-
108
  convert_md_to_html(md_path, HTML_DIR)
109
  html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
110
  new_html_paths.append(html_path)
@@ -117,19 +119,18 @@ if __name__ == "__main__":
117
  convert_md_to_html(md, HTML_DIR)
118
  print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
119
 
 
120
  # import os
121
  # import sys
122
  # from datetime import datetime
123
  # from dotenv import load_dotenv
 
124
 
125
  # from image_search import search_unsplash_image
126
  # from md_html import convert_single_md_to_html as convert_md_to_html
127
  # from news_analysis import fetch_deep_news, generate_value_investor_report
128
-
129
- # import pandas as pd
130
  # from csv_utils import detect_changes
131
 
132
-
133
  # # Setup paths
134
  # BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
135
  # DATA_DIR = os.path.join(BASE_DIR, "data")
@@ -151,14 +152,16 @@ if __name__ == "__main__":
151
  # >
152
  # """
153
 
154
- # def run_value_investing_analysis(csv_path):
155
  # current_df = pd.read_csv(csv_path)
156
  # prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
 
157
  # if os.path.exists(prev_path):
158
  # previous_df = pd.read_csv(prev_path)
159
  # changed_df = detect_changes(current_df, previous_df)
160
  # if changed_df.empty:
161
- # print("βœ… No changes detected. Skipping processing.")
 
162
  # return []
163
  # else:
164
  # changed_df = current_df
@@ -168,20 +171,24 @@ if __name__ == "__main__":
168
  # for _, row in changed_df.iterrows():
169
  # topic = row.get("topic")
170
  # timespan = row.get("timespan_days", 7)
171
- # print(f"\nπŸ” Processing: {topic} ({timespan} days)")
 
 
172
 
173
  # news = fetch_deep_news(topic, timespan)
174
  # if not news:
175
- # print(f"⚠️ No news found for: {topic}")
 
176
  # continue
177
 
178
- # report_body = generate_value_investor_report(topic, news)
179
- # from image_search import search_unsplash_image
180
 
181
- # # Later inside your loop
182
- # image_url, image_credit = search_unsplash_image(topic)
183
 
184
- # #image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))
 
 
185
 
186
  # metrics_md = build_metrics_box(topic, len(news))
187
  # full_md = metrics_md + report_body
@@ -196,34 +203,39 @@ if __name__ == "__main__":
196
  # filepath = os.path.join(DATA_DIR, filename)
197
  # counter += 1
198
 
 
 
 
199
  # with open(filepath, "w", encoding="utf-8") as f:
200
  # f.write(full_md)
201
 
202
  # new_md_files.append(filepath)
203
 
204
- # print(f"βœ… Markdown saved to: {DATA_DIR}")
 
 
205
  # current_df.to_csv(prev_path, index=False)
206
  # return new_md_files
207
 
208
-
209
- # def run_pipeline(csv_path, tavily_api_key):
210
  # os.environ["TAVILY_API_KEY"] = tavily_api_key
211
 
212
- # new_md_files = run_value_investing_analysis(csv_path)
213
  # new_html_paths = []
214
 
215
  # for md_path in new_md_files:
 
 
 
216
  # convert_md_to_html(md_path, HTML_DIR)
217
  # html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
218
  # new_html_paths.append(html_path)
219
 
220
  # return new_html_paths
221
 
222
-
223
  # if __name__ == "__main__":
224
  # md_files = run_value_investing_analysis(CSV_PATH)
225
  # for md in md_files:
226
  # convert_md_to_html(md, HTML_DIR)
227
  # print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
228
 
229
-
 
3
  from datetime import datetime
4
  from dotenv import load_dotenv
5
  import pandas as pd
6
+ from io import BytesIO
7
+ import base64
8
+ import matplotlib.pyplot as plt
9
 
 
10
  from md_html import convert_single_md_to_html as convert_md_to_html
11
  from news_analysis import fetch_deep_news, generate_value_investor_report
12
  from csv_utils import detect_changes
13
 
14
+ # === Setup Paths ===
15
+ BASE_DIR = os.path.dirname(os.path.dirname(__file__))
16
  DATA_DIR = os.path.join(BASE_DIR, "data")
17
  HTML_DIR = os.path.join(BASE_DIR, "html")
18
  CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
 
20
  os.makedirs(DATA_DIR, exist_ok=True)
21
  os.makedirs(HTML_DIR, exist_ok=True)
22
 
23
+ # === Load .env ===
24
  load_dotenv()
25
 
26
  def build_metrics_box(topic, num_articles):
 
32
  >
33
  """
34
 
35
+ def create_sentiment_chart_md(topic):
36
+ # Placeholder dummy chart
37
+ dates = pd.date_range(end=datetime.today(), periods=7)
38
+ values = [100 + i * 3 for i in range(7)]
39
+
40
+ plt.figure(figsize=(6, 3))
41
+ plt.plot(dates, values, marker='o')
42
+ plt.title(f"πŸ“ˆ Sentiment Trend: {topic}")
43
+ plt.xlabel("Date")
44
+ plt.ylabel("Sentiment")
45
+ plt.grid(True)
46
+
47
+ buffer = BytesIO()
48
+ plt.savefig(buffer, format='png')
49
+ plt.close()
50
+ buffer.seek(0)
51
+ encoded = base64.b64encode(buffer.read()).decode("utf-8")
52
+ return f"![Sentiment Trend](data:image/png;base64,{encoded})"
53
+
54
+ def run_value_investing_analysis(csv_path):
55
  current_df = pd.read_csv(csv_path)
56
  prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
 
57
  if os.path.exists(prev_path):
58
  previous_df = pd.read_csv(prev_path)
59
  changed_df = detect_changes(current_df, previous_df)
60
  if changed_df.empty:
61
+ print("βœ… No changes detected. Skipping processing.")
 
62
  return []
63
  else:
64
  changed_df = current_df
 
68
  for _, row in changed_df.iterrows():
69
  topic = row.get("topic")
70
  timespan = row.get("timespan_days", 7)
71
+ print(f"\nπŸ” Processing: {topic} ({timespan} days)")
 
 
72
 
73
  news = fetch_deep_news(topic, timespan)
74
  if not news:
75
+ print(f"⚠️ No news found for: {topic}")
 
76
  continue
77
 
 
 
 
78
  report_body = generate_value_investor_report(topic, news)
79
+ chart_md = create_sentiment_chart_md(topic)
 
 
 
80
 
81
  metrics_md = build_metrics_box(topic, len(news))
82
+ full_md = metrics_md + report_body + "\n\n" + chart_md
83
 
84
  base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
85
  filename = base_filename + ".md"
 
91
  filepath = os.path.join(DATA_DIR, filename)
92
  counter += 1
93
 
 
 
 
94
  with open(filepath, "w", encoding="utf-8") as f:
95
  f.write(full_md)
96
 
97
  new_md_files.append(filepath)
98
 
99
+ print(f"βœ… Markdown saved to: {DATA_DIR}")
 
 
100
  current_df.to_csv(prev_path, index=False)
101
  return new_md_files
102
 
103
+ def run_pipeline(csv_path, tavily_api_key):
104
  os.environ["TAVILY_API_KEY"] = tavily_api_key
105
 
106
+ new_md_files = run_value_investing_analysis(csv_path)
107
  new_html_paths = []
108
 
109
  for md_path in new_md_files:
 
 
 
110
  convert_md_to_html(md_path, HTML_DIR)
111
  html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
112
  new_html_paths.append(html_path)
 
119
  convert_md_to_html(md, HTML_DIR)
120
  print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
121
 
122
+
123
  # import os
124
  # import sys
125
  # from datetime import datetime
126
  # from dotenv import load_dotenv
127
+ # import pandas as pd
128
 
129
  # from image_search import search_unsplash_image
130
  # from md_html import convert_single_md_to_html as convert_md_to_html
131
  # from news_analysis import fetch_deep_news, generate_value_investor_report
 
 
132
  # from csv_utils import detect_changes
133
 
 
134
  # # Setup paths
135
  # BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
136
  # DATA_DIR = os.path.join(BASE_DIR, "data")
 
152
  # >
153
  # """
154
 
155
+ # def run_value_investing_analysis(csv_path, progress_callback=None):
156
  # current_df = pd.read_csv(csv_path)
157
  # prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
158
+
159
  # if os.path.exists(prev_path):
160
  # previous_df = pd.read_csv(prev_path)
161
  # changed_df = detect_changes(current_df, previous_df)
162
  # if changed_df.empty:
163
+ # if progress_callback:
164
+ # progress_callback("βœ… No changes detected. Skipping processing.")
165
  # return []
166
  # else:
167
  # changed_df = current_df
 
171
  # for _, row in changed_df.iterrows():
172
  # topic = row.get("topic")
173
  # timespan = row.get("timespan_days", 7)
174
+
175
+ # if progress_callback:
176
+ # progress_callback(f"πŸ” Processing: {topic} ({timespan} days)")
177
 
178
  # news = fetch_deep_news(topic, timespan)
179
  # if not news:
180
+ # if progress_callback:
181
+ # progress_callback(f"⚠️ No news found for: {topic}")
182
  # continue
183
 
184
+ # if progress_callback:
185
+ # progress_callback(f"🧠 Analyzing news for: {topic}")
186
 
187
+ # report_body = generate_value_investor_report(topic, news)
 
188
 
189
+ # # Use placeholder image instead of API call
190
+ # image_url = "https://via.placeholder.com/1281x721?text=No+Image"
191
+ # image_credit = "Image unavailable"
192
 
193
  # metrics_md = build_metrics_box(topic, len(news))
194
  # full_md = metrics_md + report_body
 
203
  # filepath = os.path.join(DATA_DIR, filename)
204
  # counter += 1
205
 
206
+ # if progress_callback:
207
+ # progress_callback(f"πŸ“ Saving markdown for: {topic}")
208
+
209
  # with open(filepath, "w", encoding="utf-8") as f:
210
  # f.write(full_md)
211
 
212
  # new_md_files.append(filepath)
213
 
214
+ # if progress_callback:
215
+ # progress_callback(f"βœ… Markdown reports saved to: `{DATA_DIR}`")
216
+
217
  # current_df.to_csv(prev_path, index=False)
218
  # return new_md_files
219
 
220
+ # def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
 
221
  # os.environ["TAVILY_API_KEY"] = tavily_api_key
222
 
223
+ # new_md_files = run_value_investing_analysis(csv_path, progress_callback)
224
  # new_html_paths = []
225
 
226
  # for md_path in new_md_files:
227
+ # if progress_callback:
228
+ # progress_callback(f"🌐 Converting to HTML: {os.path.basename(md_path)}")
229
+
230
  # convert_md_to_html(md_path, HTML_DIR)
231
  # html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
232
  # new_html_paths.append(html_path)
233
 
234
  # return new_html_paths
235
 
 
236
  # if __name__ == "__main__":
237
  # md_files = run_value_investing_analysis(CSV_PATH)
238
  # for md in md_files:
239
  # convert_md_to_html(md, HTML_DIR)
240
  # print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
241