from __future__ import annotations
import json
import tempfile
from pathlib import Path
import gradio as gr
from huggingface_hub import hf_hub_download
from modular_graph_and_candidates import (
build_graph_json,
generate_html,
build_timeline_json,
generate_timeline_html,
filter_graph_by_threshold,
)
def _escape_srcdoc(text: str) -> str:
return (
text.replace("&", "&")
.replace("\"", """)
.replace("'", "'")
.replace("<", "<")
.replace(">", ">")
)
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
CACHE_REPO = "Molbap/hf_cached_embeds_log"
def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85):
repo_id = CACHE_REPO
latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
sha = info.get("sha")
key = f"{sha}/{sim_method}-m{int(multimodal)}"
json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset")
raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8"))
filtered_data = filter_graph_by_threshold(raw_data, threshold)
if kind == "timeline":
raw_html = generate_timeline_html(filtered_data)
else:
raw_html = generate_html(filtered_data)
iframe_html = f''
tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
return iframe_html, str(tmp)
def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85):
latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
sha = info["sha"]
key = f"{sha}/{sim_method}-m{int(multimodal)}"
html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
raw_html = Path(html_fp).read_text(encoding="utf-8")
iframe_html = f''
return iframe_html
def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh)
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh)
# ───────────────────────────── UI ────────────────────────────────────────────────
CUSTOM_CSS = """
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
"""
TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}
with gr.Blocks() as demo:
html = gr.HTML()
def _load():
return run_loc(sim_method="jaccard", multimodal=False)
demo.load(_load, outputs=[html])
if __name__ == "__main__":
demo.launch()