File size: 3,411 Bytes
53c0cc8 f62613d 53c0cc8 f62613d 53c0cc8 6d106b8 53c0cc8 6641fa8 53c0cc8 f62613d 6641fa8 31b1b7e 6641fa8 31b1b7e f62613d 6641fa8 31b1b7e 6641fa8 53c0cc8 f62613d c7100d5 f62613d c7100d5 f62613d c7100d5 f62613d 49600c8 c410e03 ceffe7d 49600c8 ceffe7d f62613d 5ff759b ceffe7d 4b63c32 37c98e3 4b63c32 f62613d 4b63c32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from __future__ import annotations
import json
import tempfile
from pathlib import Path
import gradio as gr
from huggingface_hub import hf_hub_download
from modular_graph_and_candidates import (
build_graph_json,
generate_html,
build_timeline_json,
generate_timeline_html,
filter_graph_by_threshold,
)
def _escape_srcdoc(text: str) -> str:
return (
text.replace("&", "&")
.replace("\"", """)
.replace("'", "'")
.replace("<", "<")
.replace(">", ">")
)
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
CACHE_REPO = "Molbap/hf_cached_embeds_log"
def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85):
repo_id = CACHE_REPO
latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
sha = info.get("sha")
key = f"{sha}/{sim_method}-m{int(multimodal)}"
json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset")
raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8"))
filtered_data = filter_graph_by_threshold(raw_data, threshold)
if kind == "timeline":
raw_html = generate_timeline_html(filtered_data)
else:
raw_html = generate_html(filtered_data)
iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
return iframe_html, str(tmp)
def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85):
latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
sha = info["sha"]
key = f"{sha}/{sim_method}-m{int(multimodal)}"
html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
raw_html = Path(html_fp).read_text(encoding="utf-8")
iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
return iframe_html
def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh)
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh)
# βββββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββββββββββββββββββββββββ
CUSTOM_CSS = """
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
"""
TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}
with gr.Blocks() as demo:
html = gr.HTML()
def _load():
return run_loc(sim_method="jaccard", multimodal=False)
demo.load(_load, outputs=[html])
if __name__ == "__main__":
demo.launch()
|