from __future__ import annotations import json import tempfile from pathlib import Path import gradio as gr from huggingface_hub import hf_hub_download from modular_graph_and_candidates import ( build_graph_json, generate_html, build_timeline_json, generate_timeline_html, filter_graph_by_threshold, ) def _escape_srcdoc(text: str) -> str: return ( text.replace("&", "&") .replace("\"", """) .replace("'", "'") .replace("<", "<") .replace(">", ">") ) HF_MAIN_REPO = "https://github.com/huggingface/transformers" CACHE_REPO = "Molbap/hf_cached_embeds_log" def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85): repo_id = CACHE_REPO latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset") info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) sha = info.get("sha") key = f"{sha}/{sim_method}-m{int(multimodal)}" json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset") raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8")) filtered_data = filter_graph_by_threshold(raw_data, threshold) if kind == "timeline": raw_html = generate_timeline_html(filtered_data) else: raw_html = generate_html(filtered_data) iframe_html = f'' tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1]) tmp.write_text(json.dumps(filtered_data), encoding="utf-8") return iframe_html, str(tmp) def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85): latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset") info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) sha = info["sha"] key = f"{sha}/{sim_method}-m{int(multimodal)}" html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset") raw_html = Path(html_fp).read_text(encoding="utf-8") iframe_html = f'' return iframe_html def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85): return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh) def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85): return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh) # ───────────────────────────── UI ──────────────────────────────────────────────── CUSTOM_CSS = """ #graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;} """ TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2} with gr.Blocks() as demo: html = gr.HTML() def _load(): return run_loc(sim_method="jaccard", multimodal=False) demo.load(_load, outputs=[html]) if __name__ == "__main__": demo.launch()