import os
import re
import markdown
from image_search import search_unsplash_image # Must return (image_url, image_credit)
def convert_md_folder_to_html(md_folder, html_output_folder):
os.makedirs(html_output_folder, exist_ok=True)
for filename in os.listdir(md_folder):
if filename.endswith(".md"):
md_path = os.path.join(md_folder, filename)
title = filename.replace(".md", "")
html_path = os.path.join(html_output_folder, filename.replace(".md", ".html"))
with open(md_path, "r", encoding="utf-8") as f:
md_content = f.read()
md_content = re.sub(r'!\[.*?\]\(.*?\)', '', md_content) # remove Markdown images
html_body = markdown.markdown(md_content, extensions=["extra", "codehilite", "toc"])
html_body = re.sub(r'
(\[\d+\](?:,\s*\[\d+\])*)
', r'\1', html_body) # inline references
image_url, image_credit = search_unsplash_image(title)
# Extract metrics blockquote and convert to bullet list
metrics_block = ""
if "" in html_body:
start = html_body.find("")
end = html_body.find("
") + len("
")
metrics_raw = html_body[start:end]
html_body = html_body[:start] + html_body[end:]
text = re.sub(r'<.*?>', '', metrics_raw).strip()
lines = [f"{line.strip()}" for line in text.splitlines() if line.strip()]
metrics_block = f""
html_template = f"""
{title}
{html_body}
"""
with open(html_path, "w", encoding="utf-8") as f:
f.write(html_template)
print(f"✅ Converted: {md_path} -> {html_path}")
import os
import re
import markdown
from image_search import search_unsplash_image # Must return (image_url, image_credit)
def convert_single_md_to_html(md_path, html_output_folder):
os.makedirs(html_output_folder, exist_ok=True)
filename = os.path.basename(md_path)
title = filename.replace(".md", "")
html_path = os.path.join(html_output_folder, filename.replace(".md", ".html"))
with open(md_path, "r", encoding="utf-8") as f:
md_content = f.read()
md_content = re.sub(r'!\[.*?\]\(.*?\)', '', md_content) # remove Markdown images
html_body = markdown.markdown(md_content, extensions=["extra", "codehilite", "toc"])
html_body = re.sub(r'(\[\d+\](?:,\s*\[\d+\])*)
', r'\1', html_body) # inline refs
image_url, image_credit = search_unsplash_image(title)
metrics_block = ""
if "" in html_body:
start = html_body.find("")
end = html_body.find("
") + len("
")
metrics_raw = html_body[start:end]
html_body = html_body[:start] + html_body[end:]
text = re.sub(r'<.*?>', '', metrics_raw).strip()
lines = [f"{line.strip()}" for line in text.splitlines() if line.strip()]
metrics_block = f""
html_template = f"""
{title}
{html_body}
"""
with open(html_path, "w", encoding="utf-8") as f:
f.write(html_template)
print(f"✅ Converted: {md_path} -> {html_path}")
###FOR TESTING ONLY
if __name__ == "__main__":
md_path = "/Users/sigridveronica/Desktop/Investing/data/nuclear_energy_2025-06-03.md"
md_folder = "/Users/sigridveronica/Desktop/Investing/data"
html_output_folder = "/Users/sigridveronica/Desktop/Investing/html"
convert_md_folder_to_html(md_folder, html_output_folder)