File size: 3,411 Bytes
53c0cc8
 
 
 
 
 
 
f62613d
53c0cc8
f62613d
 
 
 
 
 
 
53c0cc8
6d106b8
 
 
 
 
 
 
 
 
53c0cc8
6641fa8
53c0cc8
f62613d
6641fa8
 
 
 
31b1b7e
6641fa8
31b1b7e
 
 
 
 
 
 
 
 
f62613d
6641fa8
31b1b7e
6641fa8
53c0cc8
f62613d
c7100d5
 
 
 
 
 
f62613d
c7100d5
 
f62613d
 
c7100d5
f62613d
 
49600c8
c410e03
ceffe7d
 
49600c8
ceffe7d
f62613d
5ff759b
ceffe7d
4b63c32
 
 
37c98e3
4b63c32
f62613d
 
4b63c32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from __future__ import annotations

import json
import tempfile
from pathlib import Path

import gradio as gr
from huggingface_hub import hf_hub_download

from modular_graph_and_candidates import (
    build_graph_json,
    generate_html,
    build_timeline_json,
    generate_timeline_html,
    filter_graph_by_threshold,
)

def _escape_srcdoc(text: str) -> str:
    return (
        text.replace("&", "&")
            .replace("\"", """)
            .replace("'", "'")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
    )

HF_MAIN_REPO = "https://github.com/huggingface/transformers"
CACHE_REPO = "Molbap/hf_cached_embeds_log"

def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85):
    repo_id = CACHE_REPO
    latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
    info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
    sha = info.get("sha")
    key = f"{sha}/{sim_method}-m{int(multimodal)}"
    json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset")

    raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8"))
    filtered_data = filter_graph_by_threshold(raw_data, threshold)

    if kind == "timeline":
        raw_html = generate_timeline_html(filtered_data)
    else:
        raw_html = generate_html(filtered_data)

    iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
    tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
    return iframe_html, str(tmp)

def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85):
    latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
    info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
    sha = info["sha"]
    key = f"{sha}/{sim_method}-m{int(multimodal)}"
    html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
    raw_html = Path(html_fp).read_text(encoding="utf-8")
    iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    return iframe_html

def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
    return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh)

def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
    return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh)

# ───────────────────────────── UI ────────────────────────────────────────────────

CUSTOM_CSS = """
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
"""

TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}

with gr.Blocks() as demo:
    html = gr.HTML()
    def _load():
        return run_loc(sim_method="jaccard", multimodal=False)
    demo.load(_load, outputs=[html])

if __name__ == "__main__":
    demo.launch()