human_eval_app / app.py
mertunsall
Fix
bb469a5
import json
from functools import lru_cache
import re
import traceback
from typing import Optional
import gradio as gr
from huggingface_hub import HfApi, hf_hub_download, hf_hub_url
from huggingface_hub.utils import HfHubHTTPError
DEFAULT_REPO_ID = "mlfoundations-cua-dev/human_eval"
IMAGE_EXTENSIONS = (
".jpg",
".jpeg",
".png",
".bmp",
".gif",
".webp",
".tif",
".tiff",
)
INIT_SCREENSHOT_NAMES = {"intial_screenshot", "initial_screenshot"}
STEP_FILENAME_PATTERN = re.compile(r"^step_(\d+)(?:\.[^.]+)?$", re.IGNORECASE)
TRAJECTORY_FILENAME = "traj.jsonl"
api = HfApi()
@lru_cache(maxsize=16)
def _list_repo_files(repo_id: str) -> list[str]:
"""Return all file paths contained in a Hugging Face dataset repository."""
return api.list_repo_files(repo_id=repo_id, repo_type="dataset")
def _extract_top_level(repo_id: str) -> tuple[list[str], list[str]]:
"""Split top-level folders and files for the given repository."""
files = _list_repo_files(repo_id)
top_level_dirs = sorted({path.split("/", 1)[0] for path in files if "/" in path})
top_level_files = sorted(path for path in files if "/" not in path)
return top_level_dirs, top_level_files
def _get_subdirectories(repo_id: str, directory: str) -> list[str]:
"""Return the direct subdirectories of the given directory."""
if not directory:
return []
files = [path for path in _list_repo_files(repo_id) if path.startswith(f"{directory}/")]
relative_paths = [path[len(directory) + 1 :] for path in files]
# Get immediate subdirectories (first level only)
child_dirs = sorted({rel.split("/", 1)[0] for rel in relative_paths if "/" in rel})
return child_dirs
def _build_path(*parts) -> str:
"""Join path parts while skipping empty values."""
return "/".join(part for part in parts if part)
def _image_sort_key(path: str):
filename = path.rsplit("/", 1)[-1]
lower_name = filename.lower()
if any(lower_name.startswith(name) for name in INIT_SCREENSHOT_NAMES):
return (0, 0)
match = STEP_FILENAME_PATTERN.match(lower_name)
if match:
return (1, int(match.group(1)))
return (2, lower_name)
def _load_traj_entries(repo_id: str, directory: str) -> list:
"""Load trajectory annotations from traj.jsonl within the given directory."""
if not directory:
return []
traj_path = _build_path(directory, TRAJECTORY_FILENAME)
repo_files = _list_repo_files(repo_id)
if traj_path not in repo_files:
return []
local_path = hf_hub_download(
repo_id=repo_id,
filename=traj_path,
repo_type="dataset",
)
entries: list = []
with open(local_path, "r", encoding="utf-8") as file:
for raw_line in file:
stripped = raw_line.strip()
if not stripped:
continue
parsed = json.loads(stripped)
if isinstance(parsed, list):
entries.extend(parsed)
else:
entries.append(parsed)
return entries
def _load_instruction_file(repo_id: str, filepath: str) -> Optional[str]:
"""Download a metadata JSON file and extract its instruction field."""
local_path = hf_hub_download(
repo_id=repo_id,
filename=filepath,
repo_type="dataset",
)
with open(local_path, "r", encoding="utf-8") as file:
data = json.load(file)
instruction: Optional[str] = None
if isinstance(data, dict):
instruction = data.get("instruction")
elif isinstance(data, list):
for entry in data:
if isinstance(entry, dict) and "instruction" in entry:
instruction = entry["instruction"]
break
if instruction is None:
return None
if isinstance(instruction, str):
return instruction.strip()
return str(instruction)
def _format_annotation(index: int, annotation) -> str:
prefix = f"Step {index + 1}"
if isinstance(annotation, str):
content = annotation.strip()
else:
try:
content = json.dumps(annotation, ensure_ascii=False)
except TypeError:
content = str(annotation)
return f"{prefix}: {content}" if content else prefix
def _prepare_gallery_items(
repo_id: str, directory: Optional[str]
) -> tuple[list[tuple[str, str]], list[str], str]:
"""Prepare gallery items, status messages, and instruction text for a directory."""
if not directory:
return (
[],
["Select a fifth-level folder to view screenshots."],
"ℹ️ Instruction will appear once a fifth-level folder is selected.",
)
prefix = f"{directory}/"
files = [path for path in _list_repo_files(repo_id) if path.startswith(prefix)]
image_files = [
path
for path in files
if "/" not in path[len(prefix) :]
and path.lower().endswith(IMAGE_EXTENSIONS)
]
sorted_files = sorted(image_files, key=_image_sort_key)
image_urls = [
hf_hub_url(repo_id=repo_id, filename=path, repo_type="dataset")
for path in sorted_files
]
status_lines: list[str] = [
f"🖼️ Images: {len(image_urls)}",
]
annotations: list = []
try:
annotations = _load_traj_entries(repo_id, directory)
except json.JSONDecodeError as error:
status_lines.append(f"⚠️ Failed to parse `{TRAJECTORY_FILENAME}`: {error}")
except Exception as error: # pragma: no cover - unexpected IO errors
status_lines.append(f"⚠️ Error loading `{TRAJECTORY_FILENAME}`: {error}")
status_lines.append(f"📝 Annotations: {len(annotations)}")
if not image_urls:
status_lines.append("⚠️ No images found in this folder.")
if image_urls and not annotations:
status_lines.append(
f"⚠️ `{TRAJECTORY_FILENAME}` missing or empty; no annotations to display."
)
if annotations and len(annotations) != len(image_urls):
status_lines.append(
"⚠️ Mismatch between images and annotations; displaying available pairs."
)
folder_name = directory.rsplit("/", 1)[-1]
metadata_files = [
path
for path in files
if "/" not in path[len(prefix) :]
and path.lower().endswith(".json")
and not path.lower().endswith(".jsonl")
]
instruction_markdown = ""
if metadata_files:
metadata_files.sort(
key=lambda path: (
0
if path.rsplit("/", 1)[-1].rsplit(".", 1)[0] == folder_name
else 1,
path.rsplit("/", 1)[-1].lower(),
)
)
metadata_path = metadata_files[0]
metadata_filename = metadata_path.rsplit("/", 1)[-1]
try:
instruction_value = _load_instruction_file(repo_id, metadata_path)
if instruction_value:
instruction_markdown = f"**Instruction:** {instruction_value}"
status_lines.append("📋 Instruction: loaded")
else:
instruction_markdown = (
f"⚠️ Instruction missing in `{metadata_filename}`."
)
status_lines.append("⚠️ Instruction: missing in metadata.")
except json.JSONDecodeError as error:
status_lines.append(
f"⚠️ Failed to parse `{metadata_filename}`: {error}"
)
instruction_markdown = (
f"⚠️ Unable to parse instruction from `{metadata_filename}`."
)
except Exception as error: # pragma: no cover - unexpected IO errors
status_lines.append(
f"⚠️ Error loading `{metadata_filename}`: {error}"
)
instruction_markdown = (
f"⚠️ Unable to load instruction from `{metadata_filename}`."
)
else:
instruction_markdown = "⚠️ Instruction file not found in this folder."
status_lines.append("⚠️ Instruction: file not found.")
gallery_items: list[tuple[str, str]] = []
for idx, url in enumerate(image_urls):
caption = "No annotation available"
if idx < len(annotations):
caption = _format_annotation(idx, annotations[idx])
gallery_items.append((url, caption))
return gallery_items, status_lines, instruction_markdown
def _dropdown_update(
*,
choices: list[str],
value: Optional[str],
label: str,
filled_info: str,
empty_info: str,
):
has_choices = bool(choices)
return gr.update(
choices=choices,
value=value if has_choices else None,
interactive=has_choices,
label=label,
info=filled_info if has_choices else empty_info,
)
def refresh_repo(repo_id: str):
try:
top_dirs, top_files = _extract_top_level(repo_id)
except HfHubHTTPError as error:
print(f"[refresh_repo] Hub HTTP error for {repo_id}: {error}", flush=True)
print(traceback.format_exc(), flush=True)
return (
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(value=""),
gr.update(value=[]),
gr.update(value=""),
gr.update(value=f"❌ Unable to load repo `{repo_id}`: {error}"),
)
except Exception as error: # pragma: no cover - network and auth edge cases
print(f"[refresh_repo] Unexpected error for {repo_id}: {error}", flush=True)
print(traceback.format_exc(), flush=True)
return (
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(choices=[], value=None, interactive=False),
gr.update(value=""),
gr.update(value=[]),
gr.update(value=""),
gr.update(value=f"❌ Unexpected error loading `{repo_id}`: {error}"),
)
status_lines = [
f"✅ Loaded `{repo_id}`",
f"• Top-level folders: {len(top_dirs)}",
]
if top_files:
status_lines.append(f"• Loose files at root: {len(top_files)}")
if not top_dirs:
status_lines.append("• No sub-folders found at root.")
top_value = top_dirs[0] if top_dirs else None
second_dirs = _get_subdirectories(repo_id, top_value) if top_value else []
second_value = second_dirs[0] if second_dirs else None
third_dirs = (
_get_subdirectories(repo_id, _build_path(top_value, second_value))
if second_value
else []
)
third_value = third_dirs[0] if third_dirs else None
fourth_dirs = (
_get_subdirectories(repo_id, _build_path(top_value, second_value, third_value))
if third_value
else []
)
fourth_value = fourth_dirs[0] if fourth_dirs else None
fifth_dirs = (
_get_subdirectories(
repo_id, _build_path(top_value, second_value, third_value, fourth_value)
)
if fourth_value
else []
)
fifth_value = fifth_dirs[0] if fifth_dirs else None
target_directory = (
_build_path(
top_value,
second_value,
third_value,
fourth_value,
fifth_value,
)
if fifth_value
else None
)
gallery_items, gallery_status, instruction_markdown = _prepare_gallery_items(
repo_id, target_directory
)
first_dropdown_update = _dropdown_update(
choices=top_dirs,
value=top_value,
label="Top-level folders",
filled_info="Choose a folder to explore",
empty_info="No folders found at the repository root",
)
second_dropdown_update = _dropdown_update(
choices=second_dirs,
value=second_value,
label="Second-level folders",
filled_info="Choose a second-level folder",
empty_info="No subdirectories under the selected folder",
)
third_dropdown_update = _dropdown_update(
choices=third_dirs,
value=third_value,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="No third-level folders under the selection",
)
fourth_dropdown_update = _dropdown_update(
choices=fourth_dirs,
value=fourth_value,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="No fourth-level folders under the selection",
)
fifth_dropdown_update = _dropdown_update(
choices=fifth_dirs,
value=fifth_value,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="No fifth-level folders under the selection",
)
instruction_update = gr.update(value=instruction_markdown)
gallery_update = gr.update(value=gallery_items)
annotation_update = gr.update(value="\n".join(gallery_status))
status_update = gr.update(value="\n".join(status_lines))
return (
first_dropdown_update,
second_dropdown_update,
third_dropdown_update,
fourth_dropdown_update,
fifth_dropdown_update,
instruction_update,
gallery_update,
annotation_update,
status_update,
)
def update_second_dropdown(repo_id: str, top_level_dir: str):
"""Update downstream dropdowns when the top-level selection changes."""
try:
if not top_level_dir:
empty_second = _dropdown_update(
choices=[],
value=None,
label="Second-level folders",
filled_info="Choose a second-level folder",
empty_info="Select a top-level folder first",
)
empty_third = _dropdown_update(
choices=[],
value=None,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="Select a higher-level folder first",
)
empty_fourth = _dropdown_update(
choices=[],
value=None,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="Select a higher-level folder first",
)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Select a higher-level folder first",
)
instruction_update = gr.update(
value="ℹ️ Select a top-level folder to display its instruction."
)
gallery_update = gr.update(value=[])
annotation_update = gr.update(
value="Select a top-level folder to load screenshots and annotations."
)
return (
empty_second,
empty_third,
empty_fourth,
empty_fifth,
instruction_update,
gallery_update,
annotation_update,
)
second_dirs = _get_subdirectories(repo_id, top_level_dir)
second_value = second_dirs[0] if second_dirs else None
third_dirs = (
_get_subdirectories(repo_id, _build_path(top_level_dir, second_value))
if second_value
else []
)
third_value = third_dirs[0] if third_dirs else None
fourth_dirs = (
_get_subdirectories(
repo_id, _build_path(top_level_dir, second_value, third_value)
)
if third_value
else []
)
fourth_value = fourth_dirs[0] if fourth_dirs else None
fifth_dirs = (
_get_subdirectories(
repo_id,
_build_path(top_level_dir, second_value, third_value, fourth_value),
)
if fourth_value
else []
)
fifth_value = fifth_dirs[0] if fifth_dirs else None
target_directory = (
_build_path(
top_level_dir,
second_value,
third_value,
fourth_value,
fifth_value,
)
if fifth_value
else None
)
(
gallery_items,
gallery_status,
instruction_markdown,
) = _prepare_gallery_items(repo_id, target_directory)
return (
_dropdown_update(
choices=second_dirs,
value=second_value,
label="Second-level folders",
filled_info="Choose a second-level folder",
empty_info="No subdirectories under the selected folder",
),
_dropdown_update(
choices=third_dirs,
value=third_value,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="No third-level folders under the selection",
),
_dropdown_update(
choices=fourth_dirs,
value=fourth_value,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="No fourth-level folders under the selection",
),
_dropdown_update(
choices=fifth_dirs,
value=fifth_value,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="No fifth-level folders under the selection",
),
gr.update(value=instruction_markdown),
gr.update(value=gallery_items),
gr.update(value="\n".join(gallery_status)),
)
except Exception as error:
print(f"[update_second_dropdown] Error for {repo_id}/{top_level_dir}: {error}", flush=True)
print(traceback.format_exc(), flush=True)
empty_second = _dropdown_update(
choices=[],
value=None,
label="Second-level folders",
filled_info="Choose a second-level folder",
empty_info="Unable to load subdirectories",
)
empty_third = _dropdown_update(
choices=[],
value=None,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="Unable to load subdirectories",
)
empty_fourth = _dropdown_update(
choices=[],
value=None,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="Unable to load subdirectories",
)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Unable to load subdirectories",
)
return (
empty_second,
empty_third,
empty_fourth,
empty_fifth,
gr.update(value="Unable to load instruction."),
gr.update(value=[]),
gr.update(value="Unable to load screenshots or annotations."),
)
def update_third_dropdown(repo_id: str, top_level_dir: str, second_level_dir: str):
"""Update the third and fourth dropdowns when the second-level changes."""
try:
if not top_level_dir or not second_level_dir:
empty_third = _dropdown_update(
choices=[],
value=None,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="Select higher-level folders first",
)
empty_fourth = _dropdown_update(
choices=[],
value=None,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="Select higher-level folders first",
)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Select higher-level folders first",
)
instruction_update = gr.update(
value="ℹ️ Select higher-level folders to display the instruction."
)
gallery_update = gr.update(value=[])
annotation_update = gr.update(
value="Select higher-level folders to load screenshots and annotations."
)
return (
empty_third,
empty_fourth,
empty_fifth,
instruction_update,
gallery_update,
annotation_update,
)
third_dirs = _get_subdirectories(
repo_id, _build_path(top_level_dir, second_level_dir)
)
third_value = third_dirs[0] if third_dirs else None
fourth_dirs = (
_get_subdirectories(
repo_id, _build_path(top_level_dir, second_level_dir, third_value)
)
if third_value
else []
)
fourth_value = fourth_dirs[0] if fourth_dirs else None
fifth_dirs = (
_get_subdirectories(
repo_id,
_build_path(top_level_dir, second_level_dir, third_value, fourth_value),
)
if fourth_value
else []
)
fifth_value = fifth_dirs[0] if fifth_dirs else None
target_directory = (
_build_path(
top_level_dir,
second_level_dir,
third_value,
fourth_value,
fifth_value,
)
if fifth_value
else None
)
(
gallery_items,
gallery_status,
instruction_markdown,
) = _prepare_gallery_items(repo_id, target_directory)
return (
_dropdown_update(
choices=third_dirs,
value=third_value,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="No third-level folders under the selection",
),
_dropdown_update(
choices=fourth_dirs,
value=fourth_value,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="No fourth-level folders under the selection",
),
_dropdown_update(
choices=fifth_dirs,
value=fifth_value,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="No fifth-level folders under the selection",
),
gr.update(value=instruction_markdown),
gr.update(value=gallery_items),
gr.update(value="\n".join(gallery_status)),
)
except Exception as error:
print(
f"[update_third_dropdown] Error for {repo_id}/{top_level_dir}/{second_level_dir}: {error}",
flush=True,
)
print(traceback.format_exc(), flush=True)
empty_third = _dropdown_update(
choices=[],
value=None,
label="Third-level folders",
filled_info="Choose a third-level folder",
empty_info="Unable to load subdirectories",
)
empty_fourth = _dropdown_update(
choices=[],
value=None,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="Unable to load subdirectories",
)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Unable to load subdirectories",
)
return (
empty_third,
empty_fourth,
empty_fifth,
gr.update(value="Unable to load instruction."),
gr.update(value=[]),
gr.update(value="Unable to load screenshots or annotations."),
)
def update_fourth_dropdown(
repo_id: str,
top_level_dir: str,
second_level_dir: str,
third_level_dir: str,
):
"""Update the fourth and fifth dropdowns when the third-level changes."""
try:
if not top_level_dir or not second_level_dir or not third_level_dir:
empty_fourth = _dropdown_update(
choices=[],
value=None,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="Select higher-level folders first",
)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Select higher-level folders first",
)
instruction_update = gr.update(
value="ℹ️ Select higher-level folders to display the instruction."
)
gallery_update = gr.update(value=[])
annotation_update = gr.update(
value="Select higher-level folders to load screenshots and annotations."
)
return (
empty_fourth,
empty_fifth,
instruction_update,
gallery_update,
annotation_update,
)
fourth_dirs = _get_subdirectories(
repo_id,
_build_path(top_level_dir, second_level_dir, third_level_dir),
)
fourth_value = fourth_dirs[0] if fourth_dirs else None
fifth_dirs = (
_get_subdirectories(
repo_id,
_build_path(
top_level_dir, second_level_dir, third_level_dir, fourth_value
),
)
if fourth_value
else []
)
fifth_value = fifth_dirs[0] if fifth_dirs else None
target_directory = (
_build_path(
top_level_dir,
second_level_dir,
third_level_dir,
fourth_value,
fifth_value,
)
if fifth_value
else None
)
(
gallery_items,
gallery_status,
instruction_markdown,
) = _prepare_gallery_items(repo_id, target_directory)
return (
_dropdown_update(
choices=fourth_dirs,
value=fourth_value,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="No fourth-level folders under the selection",
),
_dropdown_update(
choices=fifth_dirs,
value=fifth_value,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="No fifth-level folders under the selection",
),
gr.update(value=instruction_markdown),
gr.update(value=gallery_items),
gr.update(value="\n".join(gallery_status)),
)
except Exception as error:
print(
"[update_fourth_dropdown] Error for "
f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}: {error}",
flush=True,
)
print(traceback.format_exc(), flush=True)
empty_fourth = _dropdown_update(
choices=[],
value=None,
label="Fourth-level folders",
filled_info="Choose a fourth-level folder",
empty_info="Unable to load subdirectories",
)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Unable to load subdirectories",
)
return (
empty_fourth,
empty_fifth,
gr.update(value="Unable to load instruction."),
gr.update(value=[]),
gr.update(value="Unable to load screenshots or annotations."),
)
def update_fifth_dropdown(
repo_id: str,
top_level_dir: str,
second_level_dir: str,
third_level_dir: str,
fourth_level_dir: str,
):
"""Update the fifth dropdown and gallery when the fourth-level changes."""
try:
if not all(
[top_level_dir, second_level_dir, third_level_dir, fourth_level_dir]
):
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Select higher-level folders first",
)
instruction_update = gr.update(
value="ℹ️ Select higher-level folders to display the instruction."
)
gallery_update = gr.update(value=[])
annotation_update = gr.update(
value="Select higher-level folders to load screenshots and annotations."
)
return empty_fifth, instruction_update, gallery_update, annotation_update
fifth_dirs = _get_subdirectories(
repo_id,
_build_path(
top_level_dir,
second_level_dir,
third_level_dir,
fourth_level_dir,
),
)
fifth_value = fifth_dirs[0] if fifth_dirs else None
target_directory = (
_build_path(
top_level_dir,
second_level_dir,
third_level_dir,
fourth_level_dir,
fifth_value,
)
if fifth_value
else None
)
(
gallery_items,
gallery_status,
instruction_markdown,
) = _prepare_gallery_items(repo_id, target_directory)
return (
_dropdown_update(
choices=fifth_dirs,
value=fifth_value,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="No fifth-level folders under the selection",
),
gr.update(value=instruction_markdown),
gr.update(value=gallery_items),
gr.update(value="\n".join(gallery_status)),
)
except Exception as error:
print(
"[update_fifth_dropdown] Error for "
f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}/{fourth_level_dir}: {error}",
flush=True,
)
print(traceback.format_exc(), flush=True)
empty_fifth = _dropdown_update(
choices=[],
value=None,
label="Fifth-level folders",
filled_info="Choose a fifth-level folder",
empty_info="Unable to load subdirectories",
)
return (
empty_fifth,
gr.update(value="Unable to load instruction."),
gr.update(value=[]),
gr.update(value="Unable to load screenshots or annotations."),
)
def update_gallery(
repo_id: str,
top_level_dir: str,
second_level_dir: str,
third_level_dir: str,
fourth_level_dir: str,
fifth_level_dir: str,
):
"""Update the image gallery when the fifth-level selection changes."""
try:
directory = (
_build_path(
top_level_dir,
second_level_dir,
third_level_dir,
fourth_level_dir,
fifth_level_dir,
)
if all(
[
top_level_dir,
second_level_dir,
third_level_dir,
fourth_level_dir,
fifth_level_dir,
]
)
else None
)
(
gallery_items,
gallery_status,
instruction_markdown,
) = _prepare_gallery_items(repo_id, directory)
return (
gr.update(value=instruction_markdown),
gr.update(value=gallery_items),
gr.update(value="\n".join(gallery_status)),
)
except Exception as error:
print(
"[update_gallery] Error for "
f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}/{fourth_level_dir}/{fifth_level_dir}: {error}",
flush=True,
)
print(traceback.format_exc(), flush=True)
return (
gr.update(value="Unable to load instruction."),
gr.update(value=[]),
gr.update(value="Unable to load screenshots or annotations."),
)
with gr.Blocks(title="HF Dataset Explorer") as demo:
gr.Markdown(
"""# Hugging Face Dataset Explorer
Provide a dataset repository ID (e.g. `org/dataset`) to list its top-level folders."""
)
with gr.Row():
repo_id_input = gr.Textbox(
value=DEFAULT_REPO_ID,
label="Dataset repo ID",
placeholder="owner/dataset",
info="Any public dataset on the Hugging Face Hub"
)
reload_button = gr.Button("Load repo", variant="primary")
status_display = gr.Markdown()
folder_dropdown = gr.Dropdown(label="Top-level folders", interactive=False)
second_level_dropdown = gr.Dropdown(label="Second-level folders", interactive=False)
third_level_dropdown = gr.Dropdown(label="Third-level folders", interactive=False)
fourth_level_dropdown = gr.Dropdown(label="Fourth-level folders", interactive=False)
fifth_level_dropdown = gr.Dropdown(label="Fifth-level folders", interactive=False)
instruction_display = gr.Markdown()
image_gallery = gr.Gallery(label="Images", columns=4)
annotation_status = gr.Markdown()
reload_button.click(
refresh_repo,
inputs=repo_id_input,
outputs=[
folder_dropdown,
second_level_dropdown,
third_level_dropdown,
fourth_level_dropdown,
fifth_level_dropdown,
instruction_display,
image_gallery,
annotation_status,
status_display,
],
)
folder_dropdown.change(
update_second_dropdown,
inputs=[repo_id_input, folder_dropdown],
outputs=[
second_level_dropdown,
third_level_dropdown,
fourth_level_dropdown,
fifth_level_dropdown,
instruction_display,
image_gallery,
annotation_status,
],
)
second_level_dropdown.change(
update_third_dropdown,
inputs=[repo_id_input, folder_dropdown, second_level_dropdown],
outputs=[
third_level_dropdown,
fourth_level_dropdown,
fifth_level_dropdown,
instruction_display,
image_gallery,
annotation_status,
],
)
third_level_dropdown.change(
update_fourth_dropdown,
inputs=[
repo_id_input,
folder_dropdown,
second_level_dropdown,
third_level_dropdown,
],
outputs=[
fourth_level_dropdown,
fifth_level_dropdown,
instruction_display,
image_gallery,
annotation_status,
],
)
fourth_level_dropdown.change(
update_fifth_dropdown,
inputs=[
repo_id_input,
folder_dropdown,
second_level_dropdown,
third_level_dropdown,
fourth_level_dropdown,
],
outputs=[
fifth_level_dropdown,
instruction_display,
image_gallery,
annotation_status,
],
)
fifth_level_dropdown.change(
update_gallery,
inputs=[
repo_id_input,
folder_dropdown,
second_level_dropdown,
third_level_dropdown,
fourth_level_dropdown,
fifth_level_dropdown,
],
outputs=[instruction_display, image_gallery, annotation_status],
)
demo.load(
refresh_repo,
inputs=repo_id_input,
outputs=[
folder_dropdown,
second_level_dropdown,
third_level_dropdown,
fourth_level_dropdown,
fifth_level_dropdown,
instruction_display,
image_gallery,
annotation_status,
status_display,
],
)
if __name__ == "__main__":
demo.launch()