remove_weights_from_python_wheel

by jdye64 - opened 25 days ago

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+175

-10

This PR is in draft mode

Files changed (6) hide show

MANIFEST.in +2 -1
nemotron_graphic_elements_v1/__init__.py +5 -0
nemotron_graphic_elements_v1/graphic_element_v1.py +24 -6
nemotron_graphic_elements_v1/model.py +16 -2
nemotron_graphic_elements_v1/weights.py +126 -0
pyproject.toml +2 -1

MANIFEST.in CHANGED Viewed

@@ -1,3 +1,4 @@
 include README.md
 include THIRD_PARTY_NOTICES.md
-recursive-include nemotron_graphic_elements_v1

 include README.md
 include THIRD_PARTY_NOTICES.md
+recursive-include nemotron_graphic_elements_v1 *.py *.json *.png
+recursive-exclude nemotron_graphic_elements_v1 *.pth

nemotron_graphic_elements_v1/__init__.py CHANGED Viewed

@@ -6,6 +6,8 @@ Nemotron Graphic Elements v1
 A specialized object detection system designed to identify and extract key elements
 from charts and graphs. Based on YOLOX architecture.
 """
 __version__ = "1.0.0"
@@ -19,6 +21,7 @@ from .utils import (
     COLORS,
 )
 from .graphic_element_v1 import Exp
 __all__ = [
     "define_model",
@@ -28,5 +31,7 @@ __all__ = [
     "reformat_for_plotting",
     "reorder_boxes",
     "COLORS",
 ]

 A specialized object detection system designed to identify and extract key elements
 from charts and graphs. Based on YOLOX architecture.
+Model weights are automatically downloaded from Hugging Face Hub on first use.
 """
 __version__ = "1.0.0"
     COLORS,
 )
 from .graphic_element_v1 import Exp
+from .weights import get_weights_path, clear_cache
 __all__ = [
     "define_model",
     "reformat_for_plotting",
     "reorder_boxes",
     "COLORS",
+    "get_weights_path",
+    "clear_cache",
 ]

nemotron_graphic_elements_v1/graphic_element_v1.py CHANGED Viewed

@@ -4,7 +4,9 @@
 import os
 import torch
 import torch.nn as nn
-from typing import List, Tuple
 class Exp:
@@ -16,12 +18,28 @@ class Exp:
     parameters, and class-specific thresholds.
     """
-    def __init__(self) -> None:
-        """Initialize the configuration with default parameters."""
         self.name: str = "graphic-element-v1"
-        # Use package directory for weights path
-        package_dir = os.path.dirname(os.path.abspath(__file__))
-        self.ckpt: str = os.path.join(package_dir, "weights.pth")
         self.device: str = "cuda:0" if torch.cuda.is_available() else "cpu"
         # YOLOX architecture parameters

 import os
 import torch
 import torch.nn as nn
+from typing import List, Tuple, Optional
+from .weights import get_weights_path
 class Exp:
     parameters, and class-specific thresholds.
     """
+    def __init__(
+        self,
+        weights_cache_dir: Optional[str] = None,
+        force_download: bool = False,
+        hf_token: Optional[str] = None,
+    ) -> None:
+        """
+        Initialize the configuration with default parameters.
+        Args:
+            weights_cache_dir: Directory to cache downloaded weights.
+                Defaults to ~/.cache/nemotron_graphic_elements_v1
+            force_download: If True, re-download weights even if cached.
+            hf_token: Hugging Face token for accessing gated models (if needed).
+        """
         self.name: str = "graphic-element-v1"
+        # Get weights path (downloads from HuggingFace if needed)
+        self.ckpt: str = get_weights_path(
+            cache_dir=weights_cache_dir,
+            force_download=force_download,
+            token=hf_token,
+        )
         self.device: str = "cuda:0" if torch.cuda.is_available() else "cpu"
         # YOLOX architecture parameters

nemotron_graphic_elements_v1/model.py CHANGED Viewed

@@ -13,13 +13,23 @@ from typing import Dict, List, Tuple, Union
 from .yolox.boxes import postprocess
-def define_model(config_name: str = "graphic_element_v1", verbose: bool = True) -> nn.Module:
     """
     Defines and initializes the model based on the configuration.
     Args:
         config_name (str): Configuration name. Defaults to "graphic_element_v1".
         verbose (bool): Whether to print verbose output. Defaults to True.
     Returns:
         torch.nn.Module: The initialized YOLOX model.
@@ -27,7 +37,11 @@ def define_model(config_name: str = "graphic_element_v1", verbose: bool = True)
     # Import the config class
     from .graphic_element_v1 import Exp
-    config = Exp()
     model = config.get_model()
     # Load weights

 from .yolox.boxes import postprocess
+def define_model(
+    config_name: str = "graphic_element_v1",
+    verbose: bool = True,
+    weights_cache_dir: str = None,
+    force_download: bool = False,
+    hf_token: str = None,
+) -> nn.Module:
     """
     Defines and initializes the model based on the configuration.
     Args:
         config_name (str): Configuration name. Defaults to "graphic_element_v1".
         verbose (bool): Whether to print verbose output. Defaults to True.
+        weights_cache_dir (str): Directory to cache downloaded weights.
+            Defaults to ~/.cache/nemotron_graphic_elements_v1
+        force_download (bool): If True, re-download weights even if cached.
+        hf_token (str): Hugging Face token for accessing gated models (if needed).
     Returns:
         torch.nn.Module: The initialized YOLOX model.
     # Import the config class
     from .graphic_element_v1 import Exp
+    config = Exp(
+        weights_cache_dir=weights_cache_dir,
+        force_download=force_download,
+        hf_token=hf_token,
+    )
     model = config.get_model()
     # Load weights

nemotron_graphic_elements_v1/weights.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Weights management for Nemotron Graphic Elements v1.
+This module handles downloading model weights from Hugging Face Hub
+when they are not bundled with the package.
+"""
+import os
+from pathlib import Path
+from typing import Optional
+from huggingface_hub import hf_hub_download
+# Hugging Face repository information
+HF_REPO_ID = "nvidia/nemotron-graphic-elements-v1"
+WEIGHTS_FILENAME = "nemotron_graphic_elements_v1/weights.pth"
+# Default cache directory for weights
+DEFAULT_CACHE_DIR = Path.home() / ".cache" / "nemotron_graphic_elements_v1"
+def get_weights_path(
+    cache_dir: Optional[str] = None,
+    force_download: bool = False,
+    token: Optional[str] = None,
+) -> str:
+    """
+    Get the path to the model weights, downloading if necessary.
+    This function first checks if weights exist in the package directory
+    (for development or manual installation). If not found, it downloads
+    the weights from Hugging Face Hub to the cache directory.
+    Args:
+        cache_dir: Directory to cache downloaded weights. Defaults to
+            ~/.cache/nemotron_graphic_elements_v1
+        force_download: If True, re-download even if weights exist in cache.
+        token: Hugging Face token for accessing gated models (if needed).
+    Returns:
+        str: Path to the weights file.
+    Raises:
+        RuntimeError: If weights cannot be found or downloaded.
+    """
+    # First, check if weights exist in the package directory (dev mode)
+    package_dir = Path(__file__).parent
+    local_weights = package_dir / "weights.pth"
+    if local_weights.exists() and not force_download:
+        return str(local_weights)
+    # Set up cache directory
+    if cache_dir is None:
+        cache_dir = DEFAULT_CACHE_DIR
+    else:
+        cache_dir = Path(cache_dir)
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    cached_weights = cache_dir / "weights.pth"
+    # Check if weights are already cached
+    if cached_weights.exists() and not force_download:
+        return str(cached_weights)
+    # Download from Hugging Face Hub
+    print(f" -> Downloading weights from Hugging Face Hub ({HF_REPO_ID})...")
+    try:
+        downloaded_path = hf_hub_download(
+            repo_id=HF_REPO_ID,
+            filename=WEIGHTS_FILENAME,
+            cache_dir=str(cache_dir),
+            force_download=force_download,
+            token=token,
+            local_dir=str(cache_dir),
+            local_dir_use_symlinks=False,
+        )
+        # The file might be downloaded to a subdirectory, move to expected location
+        downloaded_path = Path(downloaded_path)
+        if downloaded_path != cached_weights:
+            # Copy to the expected location if different
+            import shutil
+            shutil.copy2(downloaded_path, cached_weights)
+        print(f" -> Weights downloaded to {cached_weights}")
+        return str(cached_weights)
+    except Exception as e:
+        raise RuntimeError(
+            f"Failed to download weights from Hugging Face Hub.\n"
+            f"Repository: {HF_REPO_ID}\n"
+            f"Error: {e}\n\n"
+            f"Please ensure you have internet access and the huggingface_hub "
+            f"package is installed. You can also manually download the weights "
+            f"from https://huggingface.co/{HF_REPO_ID} and place them at:\n"
+            f"  {cached_weights}"
+        ) from e
+def clear_cache(cache_dir: Optional[str] = None) -> None:
+    """
+    Clear the cached weights.
+    Args:
+        cache_dir: Directory where weights are cached. Defaults to
+            ~/.cache/nemotron_graphic_elements_v1
+    """
+    if cache_dir is None:
+        cache_dir = DEFAULT_CACHE_DIR
+    else:
+        cache_dir = Path(cache_dir)
+    cached_weights = cache_dir / "weights.pth"
+    if cached_weights.exists():
+        cached_weights.unlink()
+        print(f" -> Removed cached weights from {cached_weights}")
+    else:
+        print(f" -> No cached weights found at {cached_weights}")

pyproject.toml CHANGED Viewed

@@ -32,6 +32,7 @@ dependencies = [
     "matplotlib>=3.5.0",
     "pandas>=1.3.0",
     "Pillow>=9.0.0",
 ]
 [project.optional-dependencies]
@@ -50,5 +51,5 @@ Repository = "https://huggingface.co/nvidia/nemotron-graphic-elements-v1"
 packages = ["nemotron_graphic_elements_v1", "nemotron_graphic_elements_v1.yolox", "nemotron_graphic_elements_v1.post_processing"]
 [tool.setuptools.package-data]
-"nemotron_graphic_elements_v1" = ["*.pth", "*.json", "*.png"]

     "matplotlib>=3.5.0",
     "pandas>=1.3.0",
     "Pillow>=9.0.0",
+    "huggingface_hub>=0.20.0",
 ]
 [project.optional-dependencies]
 packages = ["nemotron_graphic_elements_v1", "nemotron_graphic_elements_v1.yolox", "nemotron_graphic_elements_v1.post_processing"]
 [tool.setuptools.package-data]
+"nemotron_graphic_elements_v1" = ["*.json", "*.png"]