Spaces:

QingShuai
/

test_T2M

Running on Zero

App Files Files Community

chingshuai commited on 1 day ago

Commit

fa80dfd

1 Parent(s): c4d5f5a

merge gradio_app, runtime

Browse files

Files changed (8) hide show

gradio_app.py +320 -133
hymotion/network/text_encoders/text_encoder.py +5 -3
hymotion/pipeline/motion_diffusion.py +6 -13
hymotion/prompt_engineering/client.py +88 -0
hymotion/utils/gradio_css.py +29 -0
hymotion/utils/gradio_runtime.py +14 -17
hymotion/utils/t2m_runtime.py +2 -2
requirements.txt +2 -2

gradio_app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from typing import List, Optional, Tuple, Union
 import gradio as gr
 from hymotion.utils.gradio_runtime import ModelInference
 from hymotion.utils.gradio_utils import try_to_download_model, try_to_download_text_encoder
-from hymotion.utils.gradio_css import get_placeholder_html, APP_CSS, HEADER_BASE_MD, FOOTER_MD
 # Import spaces for Hugging Face Zero GPU support
 import spaces
@@ -20,6 +20,155 @@ DATA_SOURCES = {
     "example_prompts": "examples/example_prompts/example_subset.json",
 }
 def load_examples_from_txt(txt_path: str, example_record_fps=20, max_duration=12):
     """Load examples from txt file."""
@@ -69,19 +218,19 @@ def load_examples_from_txt(txt_path: str, example_record_fps=20, max_duration=12
     return examples
 @spaces.GPU(duration=120)  # Request GPU for up to 120 seconds per inference
 def generate_motion_func(
     # text input
     original_text: str,
     rewritten_text: str,
-    use_prompt_engineering: bool,
     # model input
     seed_input: str,
     motion_duration: float,
     cfg_scale: float,
-    # output
-    output_dir: str,
 ) -> Tuple[str, List[str]]:
     # When rewrite is not available, use original_text directly
     if use_prompt_engineering:
         text_to_use = rewritten_text.strip()
@@ -106,7 +255,7 @@ def generate_motion_func(
             cfg_scale=cfg_scale,
             output_format=req_format,
             original_text=original_text,
-            output_dir=output_dir
         )
         print(f"Running inference...after gpu_inference_wrapper")
         # Escape HTML content for srcdoc attribute
@@ -128,12 +277,25 @@ def generate_motion_func(
             [],
         )
 class T2MGradioUI:
     def __init__(self, args):
         self.output_dir = args.output_dir
         print(f"[{self.__class__.__name__}] output_dir: {self.output_dir}")
         # self.args = args
         self.prompt_engineering_available = args.use_prompt_engineering
         self.all_example_data = {}
         self._init_example_data()
@@ -162,34 +324,29 @@ class T2MGradioUI:
         seeds = [random.randint(0, 999) for _ in range(4)]
         return ",".join(map(str, seeds))
-    def _prompt_engineering(
-        self, text: str, duration: float, enable_rewrite: bool = True, enable_duration_est: bool = True
-    ):
         if not text.strip():
-            return "", gr.update(interactive=False), gr.update()
-        call_llm = enable_rewrite or enable_duration_est
-        if not call_llm:
-            print(f"\t>>> Using original duration and original text...")
-            predicted_duration = duration
-            rewritten_text = text
-        else:
-            print(f"\t>>> Using LLM to estimate duration/rewrite text...")
-            try:
-                predicted_duration, rewritten_text = model_inference.rewrite_text_and_infer_time(text=text)
-            except Exception as e:
-                print(f"\t>>> Text rewriting/duration prediction failed: {e}")
-                return (
-                    f"❌ Text rewriting/duration prediction failed: {str(e)}",
-                    gr.update(interactive=False),
-                    gr.update(),
-                )
-            if not enable_rewrite:
-                rewritten_text = text
-            if not enable_duration_est:
-                predicted_duration = duration
-        return rewritten_text, gr.update(interactive=True), gr.update(value=predicted_duration)
     def _get_example_choices(self):
         """Get all example choices from all data sources"""
@@ -204,7 +361,10 @@ class T2MGradioUI:
     def _on_example_select(self, selected_example):
         """When selecting an example, the callback function"""
         if selected_example == "Custom Input":
-            return "", self._generate_random_seeds(), gr.update()
         else:
             # find the corresponding example from all data sources
             for source_name in self.all_example_data:
@@ -212,30 +372,45 @@ class T2MGradioUI:
                 for text, duration in example_data:
                     display_text = f"{text[:50]}..." if len(text) > 50 else text
                     if display_text == selected_example:
-                        return text, self._generate_random_seeds(), gr.update(value=duration)
-            return "", self._generate_random_seeds(), gr.update()
     def build_ui(self):
         with gr.Blocks(css=APP_CSS) as demo:
             # Create State components for non-UI values that need to be passed to event handlers
             self.use_prompt_engineering_state = gr.State(self.prompt_engineering_available)
             self.output_dir_state = gr.State(self.output_dir)
             self.header_md = gr.Markdown(HEADER_BASE_MD, elem_classes=["main-header"])
             with gr.Row():
                 # Left control panel
                 with gr.Column(scale=2, elem_classes=["left-panel"]):
                     # Input textbox
                     if self.prompt_engineering_available:
-                        input_place_holder = "Enter text to generate motion, support Chinese and English text input."
                     else:
-                        input_place_holder = "Enter text to generate motion, please use `A person ...` format to describe the motion"
                     self.text_input = gr.Textbox(
                         label="📝 Input Text",
                         placeholder=input_place_holder,
                     )
                     # Rewritten textbox
                     self.rewritten_text = gr.Textbox(
                         label="✏️ Rewritten Text",
@@ -281,18 +456,13 @@ class T2MGradioUI:
                             interactive=not self.prompt_engineering_available,  # Enable directly if rewrite not available
                         )
-                    if not self.prompt_engineering_available:
-                        gr.Markdown(
-                            "> ⚠️ **Prompt engineering is not available.** Text rewriting and duration estimation are disabled. Your input text and duration will be used directly."
-                        )
                     # Example selection dropdown
                     self.example_dropdown = gr.Dropdown(
                         choices=self._get_example_choices(),
                         value="Custom Input",
-                        label="📚 Test Examples",
-                        info="Select a preset example or input your own text above",
                         interactive=True,
                     )
@@ -309,6 +479,9 @@ class T2MGradioUI:
                     self.status_output = gr.Textbox(
                         label="📊 Status Information",
                         value=status_msg,
                     )
                     # FBX Download section
@@ -325,11 +498,27 @@ class T2MGradioUI:
                 # Right display area
                 with gr.Column(scale=3):
                     self.output_display = gr.HTML(
-                        value=get_placeholder_html(),
-                        show_label=False,
-                        elem_classes=["flask-display"]
                     )
             # Footer
             gr.Markdown(FOOTER_MD, elem_classes=["footer"])
@@ -338,79 +527,73 @@ class T2MGradioUI:
             return demo
     def _build_advanced_settings(self):
-        # Only show rewrite options if rewrite is available
-        if self.prompt_engineering_available:
-            with gr.Group():
-                gr.Markdown("### 🔄 Text Rewriting Options")
-                with gr.Row():
-                    self.enable_rewrite = gr.Checkbox(
-                        label="Enable Text Rewriting",
-                        value=True,
-                        info="Automatically optimize text prompt to get better motion generation",
-                    )
-            with gr.Group():
-                gr.Markdown("### ⏱️ Duration Settings")
-                self.enable_duration_est = gr.Checkbox(
-                    label="Enable Duration Estimation",
-                    value=True,
-                    info="Automatically estimate the duration of the motion",
-                )
-        else:
-            # Create hidden placeholders with default values (disabled)
-            self.enable_rewrite = gr.Checkbox(
-                label="Enable Text Rewriting",
-                value=False,
-                visible=False,
             )
-            self.enable_duration_est = gr.Checkbox(
-                label="Enable Duration Estimation",
-                value=False,
-                visible=False,
             )
-            with gr.Group():
-                gr.Markdown("### ⚠️ Prompt Engineering Unavailable")
-                gr.Markdown(
-                    "Text rewriting and duration estimation are not available. "
-                    "Your input text and duration will be used directly."
-                )
-        with gr.Group():
-            gr.Markdown("### ⚙️ Generation Parameters")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    self.seed_input = gr.Textbox(
-                        label="🎯 Random Seed List (comma separated)",
-                        value="0,1,2,3",
-                        placeholder="Enter comma separated seed list (e.g.: 0,1,2,3)",
-                        info="Random seeds control the diversity of generated motions",
-                    )
-                with gr.Column(scale=1, min_width=60, elem_classes=["dice-container"]):
-                    self.dice_btn = gr.Button(
-                        "🎲 Lucky Button",
-                        variant="secondary",
-                        size="sm",
-                        elem_classes=["dice-button"],
-                    )
-            self.cfg_slider = gr.Slider(
-                minimum=1,
-                maximum=10,
-                value=5.0,
-                step=0.1,
-                label="⚙️ CFG Strength",
-                info="Text fidelity: higher = more faithful to the prompt",
             )
     def _bind_events(self):
         # Generate random seeds
         self.dice_btn.click(self._generate_random_seeds, outputs=[self.seed_input])
         # Bind example selection event
         self.example_dropdown.change(
             fn=self._on_example_select,
             inputs=[self.example_dropdown],
-            outputs=[self.text_input, self.seed_input, self.duration_slider],
         )
         # Rewrite text logic (only bind when rewrite is available)
@@ -420,16 +603,11 @@ class T2MGradioUI:
                 inputs=[
                     self.text_input,
                     self.duration_slider,
-                    self.enable_rewrite,
-                    self.enable_duration_est,
                 ],
-                outputs=[self.rewritten_text, self.generate_btn, self.duration_slider],
             ).then(
-                fn=lambda: (
-                    gr.update(visible=True),
-                    "Text rewriting completed! Please check and edit the rewritten text, then click [🚀 Generate Motion]",
-                ),
-                outputs=[self.rewritten_text, self.status_output],
             )
         # Generate motion logic
@@ -438,16 +616,8 @@ class T2MGradioUI:
             outputs=[self.status_output],
         ).then(
             generate_motion_func,
-            inputs=[
-                self.text_input,
-                self.rewritten_text,
-                self.use_prompt_engineering_state,
-                self.seed_input,
-                self.duration_slider,
-                self.cfg_slider,
-                self.output_dir_state,
-            ],
-            outputs=[self.output_display, self.fbx_files]
         ).then(
             fn=lambda fbx_list: (
                 (
@@ -463,12 +633,22 @@ class T2MGradioUI:
         # Reset logic - different behavior based on rewrite availability
         if self.prompt_engineering_available:
             self.text_input.change(
-                fn=lambda: (
-                    gr.update(visible=False),
-                    gr.update(interactive=False),
-                    "Please click the [🔄 Rewrite Text] button to rewrite the text first",
                 ),
                 outputs=[self.rewritten_text, self.generate_btn, self.status_output],
             )
         else:
@@ -508,11 +688,8 @@ def create_demo(final_model_path):
     class Args:
         model_path = final_model_path
         output_dir = "output/gradio"
-        use_prompt_engineering = False
         use_text_encoder = True
-        prompt_engineering_host = os.environ.get("PROMPT_HOST", None)
-        prompt_engineering_model_path = os.environ.get("PROMPT_MODEL_PATH", None)
-        disable_prompt_engineering = os.environ.get("DISABLE_PROMPT_ENGINEERING", False)
     args = Args()
@@ -538,11 +715,21 @@ def create_demo(final_model_path):
 if __name__ == "__main__":
     # Create demo at module level for Hugging Face Spaces
     try_to_download_text_encoder()
     # Then download the main model
     final_model_path = try_to_download_model()
-    model_inference = ModelInference(final_model_path,
         use_prompt_engineering=False, use_text_encoder=True)
     model_inference.initialize_model(device="cpu")
     demo = create_demo(final_model_path)
-    demo.launch(server_name="0.0.0.0")

 import gradio as gr
 from hymotion.utils.gradio_runtime import ModelInference
 from hymotion.utils.gradio_utils import try_to_download_model, try_to_download_text_encoder
+from hymotion.utils.gradio_css import get_placeholder_html, APP_CSS, HEADER_BASE_MD, FOOTER_MD, WITHOUT_PROMPT_ENGINEERING_WARNING
 # Import spaces for Hugging Face Zero GPU support
 import spaces
     "example_prompts": "examples/example_prompts/example_subset.json",
 }
+# Pre-generated examples for gallery display (generated on first startup)
+# Add/remove items to control the number of examples
+EXAMPLE_GALLERY_LIST = [
+    {
+        "prompt": "A person jumps upward with both legs twice.",
+        "duration": 4.5,
+        "seeds": "792",
+        "cfg_scale": 5.0,
+        "filename": "jump_twice",
+    },
+    # Add more examples here as needed:
+    {
+        "prompt": "A person jumps on their right leg.",
+        "duration": 4.5,
+        "seeds": "941",
+        "cfg_scale": 5.0,
+        "filename": "jump_right_leg",
+    },
+]
+EXAMPLE_GALLERY_OUTPUT_DIR = "examples/pregenerated"
+def ensure_examples_generated(model_inference_obj) -> List[str]:
+    """
+    Ensure all example motions are generated on first startup.
+    Returns a list of successfully generated example filenames.
+    """
+    example_dir = EXAMPLE_GALLERY_OUTPUT_DIR
+    os.makedirs(example_dir, exist_ok=True)
+    generated_examples = []
+    for example in EXAMPLE_GALLERY_LIST:
+        example_filename = example["filename"]
+        meta_path = os.path.join(example_dir, f"{example_filename}_meta.json")
+        # Check if already generated
+        if os.path.exists(meta_path):
+            print(f">>> Example already exists: {meta_path}")
+            generated_examples.append(example_filename)
+            continue
+        # Generate the example
+        print(f">>> Generating example motion: {example['prompt']}")
+        try:
+            html_content, fbx_files = model_inference_obj.run_inference(
+                text=example["prompt"],
+                seeds_csv=example["seeds"],
+                motion_duration=example["duration"],
+                cfg_scale=example["cfg_scale"],
+                output_format="dict",  # Don't generate FBX for example
+                original_text=example["prompt"],
+                output_dir=example_dir,
+                output_filename=example_filename,
+            )
+            print(f">>> Example '{example_filename}' generated successfully!")
+            generated_examples.append(example_filename)
+        except Exception as e:
+            print(f">>> Failed to generate example '{example_filename}': {e}")
+    return generated_examples
+def load_example_gallery_html(example_index: int = 0) -> str:
+    """
+    Load a specific pre-generated example and return iframe HTML for display.
+    Args:
+        example_index: Index of the example in EXAMPLE_GALLERY_LIST
+    """
+    from hymotion.utils.visualize_mesh_web import generate_static_html_content
+    if example_index < 0 or example_index >= len(EXAMPLE_GALLERY_LIST):
+        return ""
+    example = EXAMPLE_GALLERY_LIST[example_index]
+    example_dir = EXAMPLE_GALLERY_OUTPUT_DIR
+    example_filename = example["filename"]
+    meta_path = os.path.join(example_dir, f"{example_filename}_meta.json")
+    if not os.path.exists(meta_path):
+        return f"""
+        <div style='height: 300px; display: flex; justify-content: center; align-items: center;
+                    background: #2d3748; border-radius: 12px; color: #a0aec0;'>
+            <p>Example not generated yet. Please restart the app.</p>
+        </div>
+        """
+    try:
+        html_content = generate_static_html_content(
+            folder_name=example_dir,
+            file_name=example_filename,
+            hide_captions=False,
+        )
+        escaped_html = html_content.replace('"', "&quot;")
+        iframe_html = f"""
+            <iframe
+                srcdoc="{escaped_html}"
+                width="100%"
+                height="350px"
+                style="border: none; border-radius: 12px; box-shadow: 0 4px 20px rgba(0,0,0,0.1);"
+            ></iframe>
+        """
+        return iframe_html
+    except Exception as e:
+        print(f">>> Failed to load example gallery: {e}")
+        return ""
+def get_example_gallery_grid_html() -> str:
+    """
+    Generate a grid layout HTML for all examples in the gallery.
+    """
+    if not EXAMPLE_GALLERY_LIST:
+        return "<p>No examples configured.</p>"
+    # Calculate grid columns based on number of examples
+    num_examples = len(EXAMPLE_GALLERY_LIST)
+    if num_examples == 1:
+        columns = 1
+    elif num_examples == 2:
+        columns = 2
+    elif num_examples <= 4:
+        columns = 2
+    else:
+        columns = 3
+    grid_items = []
+    for idx, example in enumerate(EXAMPLE_GALLERY_LIST):
+        iframe_html = load_example_gallery_html(idx)
+        prompt_short = example["prompt"][:60] + "..." if len(example["prompt"]) > 60 else example["prompt"]
+        grid_items.append(f"""
+            <div class="example-grid-item" style="background: var(--card-bg, #fff); border-radius: 12px;
+                        padding: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);">
+                <div style="font-size: 14px; font-weight: 600; color: var(--text-primary, #333);
+                            margin-bottom: 8px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">
+                    {prompt_short}
+                </div>
+                {iframe_html}
+            </div>
+        """)
+    grid_html = f"""
+        <div style="display: grid; grid-template-columns: repeat({columns}, 1fr); gap: 16px; padding: 8px;">
+            {"".join(grid_items)}
+        </div>
+    """
+    return grid_html
 def load_examples_from_txt(txt_path: str, example_record_fps=20, max_duration=12):
     """Load examples from txt file."""
     return examples
 @spaces.GPU(duration=120)  # Request GPU for up to 120 seconds per inference
 def generate_motion_func(
     # text input
     original_text: str,
     rewritten_text: str,
     # model input
     seed_input: str,
     motion_duration: float,
     cfg_scale: float,
 ) -> Tuple[str, List[str]]:
+    use_prompt_engineering = USE_PROMPT_ENGINEERING
+    output_dir = "output/gradio"
     # When rewrite is not available, use original_text directly
     if use_prompt_engineering:
         text_to_use = rewritten_text.strip()
             cfg_scale=cfg_scale,
             output_format=req_format,
             original_text=original_text,
+            output_dir=output_dir,
         )
         print(f"Running inference...after gpu_inference_wrapper")
         # Escape HTML content for srcdoc attribute
             [],
         )
 class T2MGradioUI:
     def __init__(self, args):
         self.output_dir = args.output_dir
         print(f"[{self.__class__.__name__}] output_dir: {self.output_dir}")
         # self.args = args
         self.prompt_engineering_available = args.use_prompt_engineering
+        if self.prompt_engineering_available:
+            try:
+                from hymotion.prompt_engineering.client import PromptEngineeringClient
+                self.prompt_engineering_client = PromptEngineeringClient()
+                # Test the client with a simple prompt to verify it works
+                self.prompt_engineering_client.rewrite_prompt_and_infer_time("A person walks forward.", max_timeout=30)
+                print(f"[{self.__class__.__name__}] Prompt engineering client initialized successfully.")
+            except Exception as e:
+                print(f"[{self.__class__.__name__}] Prompt engineering client initialization failed: {e}")
+                self.prompt_engineering_available = False
         self.all_example_data = {}
         self._init_example_data()
         seeds = [random.randint(0, 999) for _ in range(4)]
         return ",".join(map(str, seeds))
+    def _prompt_engineering(self, text: str, duration: float):
         if not text.strip():
+            return "", gr.update(interactive=False), gr.update(), "⚠️ Please enter text first"
+        print(f"\t>>> Using LLM to estimate duration/rewrite text...")
+        try:
+            predicted_duration, rewritten_text = self.prompt_engineering_client.rewrite_prompt_and_infer_time(text=text)
+        except Exception as e:
+            print(f"\t>>> Text rewriting/duration prediction failed: {e}")
+            # On failure, use original text and enable generate button
+            return (
+                text,  # Use original text as fallback
+                gr.update(interactive=True),  # Enable generate button
+                gr.update(),
+                f"⚠️ Text rewriting failed: {str(e)}\n💡 Using your original input directly. You can click [🚀 Generate Motion] to continue.",
+            )
+        return (
+            rewritten_text,
+            gr.update(interactive=True),
+            gr.update(value=predicted_duration),
+            "✅ Text rewriting completed! Please check and edit the rewritten text, then click [🚀 Generate Motion]",
+        )
     def _get_example_choices(self):
         """Get all example choices from all data sources"""
     def _on_example_select(self, selected_example):
         """When selecting an example, the callback function"""
         if selected_example == "Custom Input":
+            if self.prompt_engineering_available:
+                return "", self._generate_random_seeds(), gr.update(), gr.update(value="", visible=False), gr.update(interactive=False), "Please enter text or select an example"
+            else:
+                return "", self._generate_random_seeds(), gr.update(), gr.update(), gr.update(), gr.update()
         else:
             # find the corresponding example from all data sources
             for source_name in self.all_example_data:
                 for text, duration in example_data:
                     display_text = f"{text[:50]}..." if len(text) > 50 else text
                     if display_text == selected_example:
+                        if self.prompt_engineering_available:
+                            # Set text directly to rewritten_text and enable generate button
+                            return text, self._generate_random_seeds(), gr.update(value=duration), gr.update(value=text, visible=True), gr.update(interactive=True), "✅ Example selected! Click [🚀 Generate Motion] to start."
+                        else:
+                            return text, self._generate_random_seeds(), gr.update(value=duration), gr.update(), gr.update(), gr.update()
+            if self.prompt_engineering_available:
+                return "", self._generate_random_seeds(), gr.update(), gr.update(value="", visible=False), gr.update(interactive=False), "Please enter text or select an example"
+            else:
+                return "", self._generate_random_seeds(), gr.update(), gr.update(), gr.update(), gr.update()
     def build_ui(self):
         with gr.Blocks(css=APP_CSS) as demo:
             # Create State components for non-UI values that need to be passed to event handlers
             self.use_prompt_engineering_state = gr.State(self.prompt_engineering_available)
             self.output_dir_state = gr.State(self.output_dir)
             self.header_md = gr.Markdown(HEADER_BASE_MD, elem_classes=["main-header"])
             with gr.Row():
                 # Left control panel
                 with gr.Column(scale=2, elem_classes=["left-panel"]):
                     # Input textbox
                     if self.prompt_engineering_available:
+                        input_place_holder = "Enter text to generate motion, support Chinese and English text input. Non-humanoid Characters, Multi-person Interactions and Environment & Camera are not supported. Click [ 📚 Example Prompts ] to see more examples."
                     else:
+                        input_place_holder = "Enter English text to generate motion, please use `A person ...` format to describe the motion, better less than 50 words. Non-humanoid Characters, Multi-person Interactions and Environment & Camera are not supported. Click [ 📚 Example Prompts ] to see more examples."
                     self.text_input = gr.Textbox(
                         label="📝 Input Text",
                         placeholder=input_place_holder,
+                        lines=3,
+                        max_lines=10,
+                        autoscroll=False,
                     )
+                    # if not self.prompt_engineering_available:
+                    #     gr.Markdown(
+                    #         "Click [📚 Example Prompts] to see more examples."
+                    #     )
                     # Rewritten textbox
                     self.rewritten_text = gr.Textbox(
                         label="✏️ Rewritten Text",
                             interactive=not self.prompt_engineering_available,  # Enable directly if rewrite not available
                         )
                     # Example selection dropdown
                     self.example_dropdown = gr.Dropdown(
                         choices=self._get_example_choices(),
                         value="Custom Input",
+                        label="📚 Example Prompts",
+                        # info="Select a preset example or input your own text above",
                         interactive=True,
                     )
                     self.status_output = gr.Textbox(
                         label="📊 Status Information",
                         value=status_msg,
+                        lines=1,
+                        max_lines=10,
+                        elem_classes=["status-textbox"],
                     )
                     # FBX Download section
                 # Right display area
                 with gr.Column(scale=3):
                     self.output_display = gr.HTML(
+                        value=get_placeholder_html(), show_label=False, elem_classes=["flask-display"]
                     )
+            # Example Gallery Section
+            with gr.Accordion("🎬 Example Gallery", open=True):
+                self.example_gallery_display = gr.HTML(
+                    value=get_example_gallery_grid_html(),
+                    show_label=False,
+                    elem_classes=["example-gallery-display"]
+                )
+                # Create use example buttons for each example
+                with gr.Row():
+                    self.use_example_btns = []
+                    for idx, example in enumerate(EXAMPLE_GALLERY_LIST):
+                        btn = gr.Button(
+                            f"📋 Use Example {idx + 1}",
+                            variant="secondary",
+                            size="sm",
+                        )
+                        self.use_example_btns.append((btn, idx))
             # Footer
             gr.Markdown(FOOTER_MD, elem_classes=["footer"])
             return demo
     def _build_advanced_settings(self):
+        with gr.Row():
+            self.seed_input = gr.Textbox(
+                label="🎯 Random Seeds",
+                value="0,1,2,3",
+                placeholder="e.g.: 0,1,2,3",
+                scale=3,
             )
+            self.dice_btn = gr.Button(
+                "🎲",
+                variant="secondary",
+                size="sm",
+                scale=1,
+                min_width=50,
             )
+        self.cfg_slider = gr.Slider(
+            minimum=1,
+            maximum=10,
+            value=5.0,
+            step=0.1,
+            label="⚙️ CFG Strength",
+        )
+    def _on_use_example(self, example_idx: int):
+        """When clicking 'Use This Example' button, fill in the example prompt"""
+        if example_idx < 0 or example_idx >= len(EXAMPLE_GALLERY_LIST):
+            if self.prompt_engineering_available:
+                return ("", "0,1,2,3", gr.update(), gr.update(value="", visible=False), gr.update(interactive=False), "Please select a valid example")
+            else:
+                return ("", "0,1,2,3", gr.update(), gr.update(), gr.update(), gr.update())
+        example = EXAMPLE_GALLERY_LIST[example_idx]
+        if self.prompt_engineering_available:
+            # Set text directly to rewritten_text and enable generate button
+            return (
+                example["prompt"],
+                example["seeds"],
+                gr.update(value=example["duration"]),
+                gr.update(value=example["prompt"], visible=True),
+                gr.update(interactive=True),
+                "✅ Example selected! Click [🚀 Generate Motion] to start.",
+            )
+        else:
+            return (
+                example["prompt"],
+                example["seeds"],
+                gr.update(value=example["duration"]),
+                gr.update(),
+                gr.update(),
+                gr.update(),
             )
     def _bind_events(self):
         # Generate random seeds
         self.dice_btn.click(self._generate_random_seeds, outputs=[self.seed_input])
+        # Use example buttons - bind each button to its example
+        for btn, idx in self.use_example_btns:
+            btn.click(
+                fn=lambda i=idx: self._on_use_example(i),
+                outputs=[self.text_input, self.seed_input, self.duration_slider, self.rewritten_text, self.generate_btn, self.status_output],
+            )
         # Bind example selection event
         self.example_dropdown.change(
             fn=self._on_example_select,
             inputs=[self.example_dropdown],
+            outputs=[self.text_input, self.seed_input, self.duration_slider, self.rewritten_text, self.generate_btn, self.status_output],
         )
         # Rewrite text logic (only bind when rewrite is available)
                 inputs=[
                     self.text_input,
                     self.duration_slider,
                 ],
+                outputs=[self.rewritten_text, self.generate_btn, self.duration_slider, self.status_output],
             ).then(
+                fn=lambda: gr.update(visible=True),
+                outputs=[self.rewritten_text],
             )
         # Generate motion logic
             outputs=[self.status_output],
         ).then(
             generate_motion_func,
+            inputs=[self.text_input, self.rewritten_text, self.seed_input, self.duration_slider, self.cfg_slider],
+            outputs=[self.output_display, self.fbx_files],
         ).then(
             fn=lambda fbx_list: (
                 (
         # Reset logic - different behavior based on rewrite availability
         if self.prompt_engineering_available:
+            # When text_input changes:
+            # - If text_input == rewritten_text, it means the change was triggered by example selection,
+            #   so we should NOT hide the rewritten_text (keep it visible and generate button enabled)
+            # - If text_input != rewritten_text, it means user manually edited the input,
+            #   so we should hide the rewritten_text and require a new rewrite
             self.text_input.change(
+                fn=lambda text, rewritten: (
+                    gr.update() if text.strip() == rewritten.strip() else gr.update(visible=False),
+                    gr.update() if text.strip() == rewritten.strip() else gr.update(interactive=False),
+                    (
+                        "✅ Example selected! Click [🚀 Generate Motion] to start."
+                        if text.strip() == rewritten.strip() and text.strip()
+                        else "Please click the [🔄 Rewrite Text] button to rewrite the text first"
+                    ),
                 ),
+                inputs=[self.text_input, self.rewritten_text],
                 outputs=[self.rewritten_text, self.generate_btn, self.status_output],
             )
         else:
     class Args:
         model_path = final_model_path
         output_dir = "output/gradio"
+        use_prompt_engineering = USE_PROMPT_ENGINEERING
         use_text_encoder = True
     args = Args()
 if __name__ == "__main__":
     # Create demo at module level for Hugging Face Spaces
+    import argparse
+    parser = argparse.ArgumentParser(description="HY-Motion-1.0 Gradio App")
+    parser.add_argument("--port", type=int, default=7860, help="Port to listen on")
+    args = parser.parse_args()
+    USE_PROMPT_ENGINEERING = True
     try_to_download_text_encoder()
     # Then download the main model
     final_model_path = try_to_download_model()
+    model_inference = ModelInference(final_model_path,
         use_prompt_engineering=False, use_text_encoder=True)
     model_inference.initialize_model(device="cpu")
+    # Generate examples on first startup (if not exists)
+    ensure_examples_generated(model_inference)
     demo = create_demo(final_model_path)
+    demo.launch(server_name="0.0.0.0", server_port=args.port)

hymotion/network/text_encoders/text_encoder.py CHANGED Viewed

@@ -99,7 +99,9 @@ class HYTextModel(nn.Module):
                 padding_side="right",
             )
             self.llm_text_encoder = LLM_ENCODER_LAYOUT[llm_type]["text_encoder_class"].from_pretrained(
-                LLM_ENCODER_LAYOUT[llm_type]["module_path"], low_cpu_mem_usage=True
             )
             self.llm_text_encoder = self.llm_text_encoder.eval().requires_grad_(False)
             self.ctxt_dim = self.llm_text_encoder.config.hidden_size
@@ -150,9 +152,9 @@ class HYTextModel(nn.Module):
             )
         )
         if self.llm_type == "qwen3":
-            ctxt_raw = llm_outputs.hidden_states[-1]
         else:
-            ctxt_raw = llm_outputs.last_hidden_state
         start = self.crop_start
         end = start + self._orig_max_length_llm

                 padding_side="right",
             )
             self.llm_text_encoder = LLM_ENCODER_LAYOUT[llm_type]["text_encoder_class"].from_pretrained(
+                LLM_ENCODER_LAYOUT[llm_type]["module_path"],
+                low_cpu_mem_usage=True,
+                torch_dtype=torch.bfloat16,
             )
             self.llm_text_encoder = self.llm_text_encoder.eval().requires_grad_(False)
             self.ctxt_dim = self.llm_text_encoder.config.hidden_size
             )
         )
         if self.llm_type == "qwen3":
+            ctxt_raw = llm_outputs.hidden_states[-1].clone()
         else:
+            ctxt_raw = llm_outputs.last_hidden_state.clone()
         start = self.crop_start
         end = start + self._orig_max_length_llm

hymotion/pipeline/motion_diffusion.py CHANGED Viewed

@@ -176,7 +176,6 @@ class MotionGeneration(torch.nn.Module):
     def load_in_demo(
         self,
         ckpt_name: str,
-        mean_std_name: Optional[str] = None,
         build_text_encoder: bool = True,
         allow_empty_ckpt: bool = False,
     ) -> None:
@@ -188,11 +187,6 @@ class MotionGeneration(torch.nn.Module):
             else:
                 checkpoint = torch.load(ckpt_name, map_location="cpu", weights_only=False)
                 self.load_state_dict(checkpoint["model_state_dict"], strict=False)
-        if mean_std_name is not None:
-            assert os.path.exists(mean_std_name), f"{mean_std_name} not found"
-            if not os.path.isfile(mean_std_name):
-                mean_std_name = None
-            self._load_mean_std(mean_std_name)
         self.motion_transformer.eval()
         if build_text_encoder and not self.uncondition_mode:
             self.text_encoder = load_object(self._text_encoder_module, self._text_encoder_cfg)
@@ -299,11 +293,11 @@ class MotionGeneration(torch.nn.Module):
             k3d = torch.zeros(B, L, nj, 3, device=device)
         return dict(
-            latent_denorm=latent_denorm,  # (B, L, 201)
-            keypoints3d=k3d,  # (B, L, J, 3)
-            rot6d=rot6d_smooth,  # (B, L, J, 6)
-            transl=transl_smooth,  # (B, L, 3)
-            root_rotations_mat=root_rotmat_smooth,  # (B, L, 3, 3)
         )
     @staticmethod
@@ -584,9 +578,8 @@ class MotionFlowMatching(MotionGeneration):
         )
         with torch.no_grad():
             trajectory = odeint(fn, y0, t, **self._noise_scheduler_cfg)
-        sampled = trajectory[-1]
         assert isinstance(sampled, Tensor), f"sampled must be a Tensor, but got {type(sampled)}"
-        sampled = sampled[:, :length, ...].clone()
         output_dict = self.decode_motion_from_latent(sampled, should_apply_smooothing=True)

     def load_in_demo(
         self,
         ckpt_name: str,
         build_text_encoder: bool = True,
         allow_empty_ckpt: bool = False,
     ) -> None:
             else:
                 checkpoint = torch.load(ckpt_name, map_location="cpu", weights_only=False)
                 self.load_state_dict(checkpoint["model_state_dict"], strict=False)
         self.motion_transformer.eval()
         if build_text_encoder and not self.uncondition_mode:
             self.text_encoder = load_object(self._text_encoder_module, self._text_encoder_cfg)
             k3d = torch.zeros(B, L, nj, 3, device=device)
         return dict(
+            latent_denorm=latent_denorm.cpu().detach(),  # (B, L, 201)
+            keypoints3d=k3d.cpu().detach(),  # (B, L, J, 3)
+            rot6d=rot6d_smooth.cpu().detach(),  # (B, L, J, 6)
+            transl=transl_smooth.cpu().detach(),  # (B, L, 3)
+            root_rotations_mat=root_rotmat_smooth.cpu().detach(),  # (B, L, 3, 3)
         )
     @staticmethod
         )
         with torch.no_grad():
             trajectory = odeint(fn, y0, t, **self._noise_scheduler_cfg)
+        sampled = trajectory[-1][:, :length, ...].clone()
         assert isinstance(sampled, Tensor), f"sampled must be a Tensor, but got {type(sampled)}"
         output_dict = self.decode_motion_from_latent(sampled, should_apply_smooothing=True)

hymotion/prompt_engineering/client.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import os
+import json
+import time
+from openai import OpenAI
+import json
+PROMPT = """
+# Role
+You are an expert in 3D motion analysis, animation timing, and choreography. Your task is to analyze textual action descriptions to estimate execution time and standardize the language for motion generation systems.
+# Task
+Analyze the user-provided [Input Action] and generate a structured JSON response containing a duration estimate and a refined caption.
+# Instructions
+### 1. Duration Estimation (frame_count)
+- Analyze the complexity, speed, and physical constraints of the described action.
+- Estimate the time required to perform the action in a **smooth, natural, and realistic manner**.
+- Calculate the total duration in frames based on a **30 fps** (frames per second) standard.
+- Output strictly as an Integer.
+### 2. Caption Refinement (short_caption)
+- Generate a refined, grammatically correct version of the input description in **English**.
+- **Strict Constraints**:
+    - You must **PRESERVE** the original sequence of events (chronological order).
+    - You must **RETAIN** all original spatial modifiers (e.g., "left," "upward," "quickly").
+    - **DO NOT** add new sub-actions or hallucinate details not present in the input.
+    - **DO NOT** delete any specific movements.
+- The goal is to improve clarity and flow while maintaining 100% semantic fidelity to the original request.
+### 3. Output Format
+- Return **ONLY** a raw JSON object.
+- Do not use Markdown formatting (i.e., do not use ```json ... ```).
+- Ensure the JSON is valid and parsable.
+# JSON Structure
+{{
+    "duration": <Integer, frames at 30fps>,
+    "short_caption": "<String, the refined English description>"
+}}
+# Input
+{}
+"""
+class PromptEngineeringClient:
+    def __init__(self):
+        BASE_URL = os.environ.get("PROMPT_ENGINEERING_BASE_URL", "http://IP:PORT/v1")
+        API_KEY = os.environ.get("PROMPT_ENGINEERING_API_KEY", "EMPTY")
+        MODEL_NAME = os.environ.get("PROMPT_ENGINEERING_MODEL_NAME", "")
+        client = OpenAI(
+            api_key=API_KEY,
+            base_url=BASE_URL
+        )
+        self.model_name = MODEL_NAME
+        self.client = client
+    def rewrite_prompt_and_infer_time(self, text, max_timeout=30):
+        start_time = time.time()
+        while True:
+            end_time = time.time()
+            if end_time - start_time > max_timeout:
+                raise Exception("Prompt rewriting timeout")
+            try:
+                chat_response = self.client.chat.completions.create(
+                    model=self.model_name,
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant."},
+                        {"role": "user", "content": PROMPT.format(text)},
+                    ]
+                )
+                chat_response = json.loads(chat_response.choices[0].message.content.strip())
+                duration = chat_response["duration"]
+                short_caption = chat_response["short_caption"]
+                pred_duration = min(12, max(1, int(duration) / 30))
+            except Exception as e:
+                print(e)
+                continue
+            else:
+                break
+        return pred_duration, short_caption
+if __name__ == "__main__":
+    # python -m hymotion.prompt_engineering.client
+    client = PromptEngineeringClient()
+    print(client.rewrite_prompt_and_infer_time("A person jumps upward with both legs twice."))

hymotion/utils/gradio_css.py CHANGED Viewed

@@ -116,6 +116,14 @@ APP_CSS = """
     font-weight:500 !important;
     }
     /* Button base class and variant */
     .generate-button,.rewrite-button,.dice-button{
     border:none !important; color:#fff !important; font-weight:600 !important;
@@ -206,6 +214,20 @@ APP_CSS = """
     padding:10px !important;
     color:var(--text-secondary, #666) !important;
     }
 """
 HEADER_BASE_MD = "# HY-Motion-1.0: Text-to-Motion Playground\n### *Tencent Hunyuan 3D Digital Human Team*"
@@ -248,3 +270,10 @@ def get_placeholder_html() -> str:
         </div>
         """

     font-weight:500 !important;
     }
+    /* Status textbox - dynamic height based on content */
+    .status-textbox textarea{
+    height:auto !important;
+    min-height:2.5em !important;
+    resize:none !important;
+    overflow-y:hidden !important;
+    }
     /* Button base class and variant */
     .generate-button,.rewrite-button,.dice-button{
     border:none !important; color:#fff !important; font-weight:600 !important;
     padding:10px !important;
     color:var(--text-secondary, #666) !important;
     }
+    /* Example Gallery Styles */
+    .example-gallery-display{
+    padding:0 !important; margin:12px 0 !important; border:none !important;
+    box-shadow:none !important; background:var(--iframe-bg) !important;
+    border-radius:10px !important; position:relative !important;
+    min-height:500px !important;
+    }
+    .example-gallery-display iframe{
+    width:100% !important; min-height:500px !important;
+    border:none !important; border-radius:10px !important; display:block !important;
+    background:var(--iframe-bg) !important;
+    }
 """
 HEADER_BASE_MD = "# HY-Motion-1.0: Text-to-Motion Playground\n### *Tencent Hunyuan 3D Digital Human Team*"
         </div>
         """
+WITHOUT_PROMPT_ENGINEERING_WARNING = """
+    <div style='color: #ff0000; font-weight: bold;'>
+        <p>Prompt engineering is not available. You should use `A person ...` format to describe the motion and manually adjust the duration. Click [📚 Example Prompts] to see more examples.</p>
+        <p>Non-humanoid Characters, Multi-person Interactions and Environment & Camera are not supported.</p>
+    </div>
+"""

hymotion/utils/gradio_runtime.py CHANGED Viewed

@@ -26,6 +26,7 @@ def _now():
     ms = int((t - int(t)) * 1000)
     return time.strftime("%Y%m%d_%H%M%S", time.localtime(t)) + f"{ms:03d}"
 _MODEL_CACHE = None
@@ -37,19 +38,14 @@ class SimpleRuntime(torch.nn.Module):
         # prompt engineering
         if self.load_prompt_engineering:
             print(f"[{self.__class__.__name__}] Loading prompt engineering...")
-            self.prompt_rewriter = PromptRewriter(
-                host=None, model_path=None, device="cpu"
-            )
         else:
             self.prompt_rewriter = None
         # text encoder
         if self.load_text_encoder:
             print(f"[{self.__class__.__name__}] Loading text encoder...")
             _text_encoder_module = "hymotion/network/text_encoders/text_encoder.HYTextModel"
-            _text_encoder_cfg = {
-                "llm_type": "qwen3",
-                "max_length_llm": 128
-            }
             text_encoder = load_object(_text_encoder_module, _text_encoder_cfg)
         else:
             text_encoder = None
@@ -66,7 +62,6 @@ class SimpleRuntime(torch.nn.Module):
         print(f"[{self.__class__.__name__}] Loading ckpt: {ckpt_name}")
         pipeline.load_in_demo(
             os.path.join(os.path.dirname(config_path), ckpt_name),
-            "stats",
             build_text_encoder=False,
             allow_empty_ckpt=False,
         )
@@ -87,7 +82,6 @@ class SimpleRuntime(torch.nn.Module):
             self.fbx_converter = None
             print(">>> FBX module not found. FBX export will be disabled.")
     def _generate_html_content(
         self,
         timestamp: str,
@@ -128,7 +122,6 @@ class SimpleRuntime(torch.nn.Module):
             # Return error HTML
             return f"<html><body><h1>Error generating visualization</h1><p>{str(e)}</p></body></html>"
     def _generate_fbx_files(
         self,
         visualization_data: dict,
@@ -247,6 +240,7 @@ class SimpleRuntime(torch.nn.Module):
         else:
             raise ValueError(f">>> Invalid output format: {output_format}")
 class ModelInference:
     """
     Handles model inference and data processing for Depth Anything 3.
@@ -288,7 +282,7 @@ class ModelInference:
                 config_path=os.path.join(self.model_path, "config.yml"),
                 ckpt_name="latest.ckpt",
                 load_prompt_engineering=self.use_prompt_engineering,
-                load_text_encoder=self.use_text_encoder
             )
             # Load to CPU first (faster, and allows reuse)
             _MODEL_CACHE = _MODEL_CACHE.to("cpu")
@@ -306,9 +300,7 @@ class ModelInference:
         return _MODEL_CACHE
-    def run_inference(
-        self, *args, **kwargs
-    ):
         """
         Run DepthAnything3 model inference on images.
         Args:
@@ -333,7 +325,6 @@ class ModelInference:
         # Initialize model if needed - get model instance (not stored in self)
         model = self.initialize_model(device)
         with torch.no_grad():
             print(f"[{self.__class__.__name__}] Running inference with torch.no_grad")
             html_content, fbx_files, model_output = model.generate_motion(*args, **kwargs)
@@ -347,7 +338,13 @@ class ModelInference:
         return html_content, fbx_files
 if __name__ == "__main__":
     # python -m hymotion.utils.gradio_runtime
-    runtime = SimpleRuntime(config_path="assets/config_simplified.yml", ckpt_name="latest.ckpt", load_prompt_engineering=False, load_text_encoder=False)
-    print(runtime.pipeline)

     ms = int((t - int(t)) * 1000)
     return time.strftime("%Y%m%d_%H%M%S", time.localtime(t)) + f"{ms:03d}"
 _MODEL_CACHE = None
         # prompt engineering
         if self.load_prompt_engineering:
             print(f"[{self.__class__.__name__}] Loading prompt engineering...")
+            self.prompt_rewriter = PromptRewriter(host=None, model_path=None, device="cpu")
         else:
             self.prompt_rewriter = None
         # text encoder
         if self.load_text_encoder:
             print(f"[{self.__class__.__name__}] Loading text encoder...")
             _text_encoder_module = "hymotion/network/text_encoders/text_encoder.HYTextModel"
+            _text_encoder_cfg = {"llm_type": "qwen3", "max_length_llm": 128}
             text_encoder = load_object(_text_encoder_module, _text_encoder_cfg)
         else:
             text_encoder = None
         print(f"[{self.__class__.__name__}] Loading ckpt: {ckpt_name}")
         pipeline.load_in_demo(
             os.path.join(os.path.dirname(config_path), ckpt_name),
             build_text_encoder=False,
             allow_empty_ckpt=False,
         )
             self.fbx_converter = None
             print(">>> FBX module not found. FBX export will be disabled.")
     def _generate_html_content(
         self,
         timestamp: str,
             # Return error HTML
             return f"<html><body><h1>Error generating visualization</h1><p>{str(e)}</p></body></html>"
     def _generate_fbx_files(
         self,
         visualization_data: dict,
         else:
             raise ValueError(f">>> Invalid output format: {output_format}")
 class ModelInference:
     """
     Handles model inference and data processing for Depth Anything 3.
                 config_path=os.path.join(self.model_path, "config.yml"),
                 ckpt_name="latest.ckpt",
                 load_prompt_engineering=self.use_prompt_engineering,
+                load_text_encoder=self.use_text_encoder,
             )
             # Load to CPU first (faster, and allows reuse)
             _MODEL_CACHE = _MODEL_CACHE.to("cpu")
         return _MODEL_CACHE
+    def run_inference(self, *args, **kwargs):
         """
         Run DepthAnything3 model inference on images.
         Args:
         # Initialize model if needed - get model instance (not stored in self)
         model = self.initialize_model(device)
         with torch.no_grad():
             print(f"[{self.__class__.__name__}] Running inference with torch.no_grad")
             html_content, fbx_files, model_output = model.generate_motion(*args, **kwargs)
         return html_content, fbx_files
 if __name__ == "__main__":
     # python -m hymotion.utils.gradio_runtime
+    runtime = SimpleRuntime(
+        config_path="assets/config_simplified.yml",
+        ckpt_name="latest.ckpt",
+        load_prompt_engineering=False,
+        load_text_encoder=False,
+    )
+    print(runtime.pipeline)

hymotion/utils/t2m_runtime.py CHANGED Viewed

@@ -128,7 +128,6 @@ class T2MRuntime:
             device = torch.device("cpu")
             pipeline.load_in_demo(
                 self.ckpt_name,
-                os.path.dirname(self.ckpt_name),
                 build_text_encoder=not self.skip_text,
                 allow_empty_ckpt=allow_empty_ckpt,
             )
@@ -145,7 +144,6 @@ class T2MRuntime:
                 )
                 p.load_in_demo(
                     self.ckpt_name,
-                    os.path.dirname(self.ckpt_name),
                     build_text_encoder=not self.skip_text,
                     allow_empty_ckpt=allow_empty_ckpt,
                 )
@@ -238,6 +236,8 @@ class T2MRuntime:
             raise
         finally:
             self._release_pipeline(pi)
     def load_text_encoder(self) -> None:
         """

             device = torch.device("cpu")
             pipeline.load_in_demo(
                 self.ckpt_name,
                 build_text_encoder=not self.skip_text,
                 allow_empty_ckpt=allow_empty_ckpt,
             )
                 )
                 p.load_in_demo(
                     self.ckpt_name,
                     build_text_encoder=not self.skip_text,
                     allow_empty_ckpt=allow_empty_ckpt,
                 )
             raise
         finally:
             self._release_pipeline(pi)
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     def load_text_encoder(self) -> None:
         """

requirements.txt CHANGED Viewed

@@ -3,11 +3,13 @@ huggingface_hub==0.30.0
 torch==2.5.1
 torchvision==0.20.1
 accelerate==0.30.1
 diffusers==0.26.3
 transformers==4.53.3
 einops==0.8.1
 safetensors==0.5.3
 numpy>=1.24.0,<2.0
 scipy>=1.10.0
@@ -20,5 +22,3 @@ requests==2.32.4
 openai==1.78.1
 fbxsdkpy==2020.1.post2
-torchdiffeq==0.2.5

 torch==2.5.1
 torchvision==0.20.1
+torchdiffeq==0.2.5
 accelerate==0.30.1
 diffusers==0.26.3
 transformers==4.53.3
 einops==0.8.1
 safetensors==0.5.3
+bitsandbytes==0.49.0
 numpy>=1.24.0,<2.0
 scipy>=1.10.0
 openai==1.78.1
 fbxsdkpy==2020.1.post2