import os import re import markdown from image_search import search_unsplash_image # Must return (image_url, image_credit) def convert_md_folder_to_html(md_folder, html_output_folder): os.makedirs(html_output_folder, exist_ok=True) for filename in os.listdir(md_folder): if filename.endswith(".md"): md_path = os.path.join(md_folder, filename) title = filename.replace(".md", "") html_path = os.path.join(html_output_folder, filename.replace(".md", ".html")) with open(md_path, "r", encoding="utf-8") as f: md_content = f.read() md_content = re.sub(r'!\[.*?\]\(.*?\)', '', md_content) # remove Markdown images html_body = markdown.markdown(md_content, extensions=["extra", "codehilite", "toc"]) html_body = re.sub(r'

(\[\d+\](?:,\s*\[\d+\])*)

', r'\1', html_body) # inline references image_url, image_credit = search_unsplash_image(title) # Extract metrics blockquote and convert to bullet list metrics_block = "" if "
" in html_body: start = html_body.find("
") end = html_body.find("
") + len("
") metrics_raw = html_body[start:end] html_body = html_body[:start] + html_body[end:] text = re.sub(r'<.*?>', '', metrics_raw).strip() lines = [f"
  • {line.strip()}
  • " for line in text.splitlines() if line.strip()] metrics_block = f"" html_template = f""" {title}
    {title} Banner
    {html_body}
    """ with open(html_path, "w", encoding="utf-8") as f: f.write(html_template) print(f"✅ Converted: {md_path} -> {html_path}") import os import re import markdown from image_search import search_unsplash_image # Must return (image_url, image_credit) def convert_single_md_to_html(md_path, html_output_folder): os.makedirs(html_output_folder, exist_ok=True) filename = os.path.basename(md_path) title = filename.replace(".md", "") html_path = os.path.join(html_output_folder, filename.replace(".md", ".html")) with open(md_path, "r", encoding="utf-8") as f: md_content = f.read() md_content = re.sub(r'!\[.*?\]\(.*?\)', '', md_content) # remove Markdown images html_body = markdown.markdown(md_content, extensions=["extra", "codehilite", "toc"]) html_body = re.sub(r'

    (\[\d+\](?:,\s*\[\d+\])*)

    ', r'\1', html_body) # inline refs image_url, image_credit = search_unsplash_image(title) metrics_block = "" if "
    " in html_body: start = html_body.find("
    ") end = html_body.find("
    ") + len("
    ") metrics_raw = html_body[start:end] html_body = html_body[:start] + html_body[end:] text = re.sub(r'<.*?>', '', metrics_raw).strip() lines = [f"
  • {line.strip()}
  • " for line in text.splitlines() if line.strip()] metrics_block = f"" html_template = f""" {title}
    {title} Banner
    {html_body}
    """ with open(html_path, "w", encoding="utf-8") as f: f.write(html_template) print(f"✅ Converted: {md_path} -> {html_path}") ###FOR TESTING ONLY if __name__ == "__main__": md_path = "/Users/sigridveronica/Desktop/Investing/data/nuclear_energy_2025-06-03.md" md_folder = "/Users/sigridveronica/Desktop/Investing/data" html_output_folder = "/Users/sigridveronica/Desktop/Investing/html" convert_md_folder_to_html(md_folder, html_output_folder)