Spaces:

Rajhuggingface4253
/

neu

Running

App Files Files Community

Rajhuggingface4253 commited on 17 days ago

Commit

e9c235a

verified ·

1 Parent(s): 82fadb1

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -148

app.py CHANGED Viewed

@@ -131,102 +131,6 @@ class NeuTTSONNXWrapper:
         outputs = self.session.run(self.output_names, inputs)
         return outputs[0]  # Assuming first output is logits
-# --- ONNX Conversion Functions ---
-def convert_model_to_onnx():
-    """Complete ONNX conversion with proper PyTorch 2.9+ parameters"""
-    try:
-        from transformers import AutoModelForCausalLM, AutoTokenizer
-        import torch.onnx
-        model_repo = "neuphonic/neutts-air"
-        onnx_path = os.path.join(ONNX_MODEL_DIR, "neutts_backbone.onnx")
-        logger.info("Starting optimized ONNX conversion...")
-        # Load model with correct parameters
-        tokenizer = AutoTokenizer.from_pretrained(model_repo)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_repo,
-            dtype=torch.float32,  # ✅ FIXED: Use dtype instead of torch_dtype
-            trust_remote_code=True
-        ).cpu()
-        model.eval()
-        # Create proper dummy input
-        dummy_input = torch.randint(0, tokenizer.vocab_size, (1, 512), dtype=torch.long)
-        # ✅ COMPLETE FIX: Use correct ONNX export parameters for PyTorch 2.9+
-        torch.onnx.export(
-            model,
-            dummy_input,
-            onnx_path,
-            input_names=['input_ids'],
-            output_names=['logits'],
-            # ✅ FIXED: Use dynamic_shapes instead of dynamic_axes
-            dynamic_shapes={
-                'input_ids': {0: "batch_size", 1: "sequence_length"},
-                'logits': {0: "batch_size", 1: "sequence_length"}
-            },
-            # ✅ FIXED: Use opset_version 18 as recommended
-            opset_version=18,
-            do_constant_folding=True,
-            export_params=True,
-            verbose=False,
-            # ✅ FIXED: Disable dynamo to avoid constraints violation
-            export_type=torch.onnx.ExportTypes.ONNX,
-            training=torch.onnx.TrainingMode.EVAL,
-        )
-        logger.info(f"✅ ONNX conversion successful: {onnx_path}")
-        return True
-    except Exception as e:
-        logger.error(f"❌ ONNX conversion failed: {e}")
-        # Fallback to legacy method if modern method fails
-        return _fallback_onnx_conversion()
-def _fallback_onnx_conversion():
-    """Legacy ONNX conversion as fallback"""
-    try:
-        from transformers import AutoModelForCausalLM, AutoTokenizer
-        import torch.onnx
-        model_repo = "neuphonic/neutts-air"
-        onnx_path = os.path.join(ONNX_MODEL_DIR, "neutts_backbone.onnx")
-        logger.info("Trying legacy ONNX conversion...")
-        tokenizer = AutoTokenizer.from_pretrained(model_repo)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_repo,
-            torch_dtype=torch.float32
-        ).cpu()
-        model.eval()
-        # Static input for legacy export
-        dummy_input = torch.randint(0, 1000, (1, 256), dtype=torch.long)
-        # Legacy export without dynamic shapes
-        torch.onnx.export(
-            model,
-            dummy_input,
-            onnx_path,
-            input_names=['input_ids'],
-            output_names=['logits'],
-            opset_version=14,
-            do_constant_folding=True,
-            export_params=True,
-            verbose=False,
-        )
-        logger.info(f"✅ Legacy ONNX conversion successful")
-        return True
-    except Exception as e:
-        logger.error(f"❌ Legacy ONNX conversion also failed: {e}")
-        return False
 class NeuTTSWrapper:
     def __init__(self, device: str = "cpu", use_onnx: bool = USE_ONNX):
         self.tts_model = None
@@ -443,44 +347,26 @@ class NeuTTSWrapper:
             raise ValueError("No valid speech tokens found.")
     def generate_speech_blocking(self, text: str, ref_audio_bytes: bytes, reference_text: str) -> np.ndarray:
-        """Optimized synthesis with ONNX backbone when available"""
         audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
         ref_s = self._get_or_create_reference_encoding(audio_hash, ref_audio_bytes)
-        # Use ONNX backbone if available, otherwise PyTorch
-        if self.use_onnx and self.onnx_wrapper is not None:
-            return self._infer_onnx(text, ref_s, reference_text)
-        else:
-            with torch.no_grad():
-                audio = self.tts_model.infer(text, ref_s, reference_text)
-            return audio
-    def _infer_onnx(self, text: str, ref_s: torch.Tensor, reference_text: str) -> np.ndarray:
-        """Use ONNX backbone for maximum speed"""
-        try:
-            # Convert text to tokens using original method
-            prompt_ids = self.tts_model._apply_chat_template(
-                ref_s.tolist() if isinstance(ref_s, torch.Tensor) else ref_s,
-                reference_text,
-                text
-            )
-            # Run through ONNX backbone
-            input_ids = np.array([prompt_ids], dtype=np.int64)
-            logits = self.onnx_wrapper.generate_onnx(input_ids)
-            # Convert logits to token IDs (simplified - you'd need proper tokenizer logic)
-            # For now, fall back to PyTorch for token decoding
-            logger.info("Using ONNX backbone + PyTorch token decoding")
-            with torch.no_grad():
-                audio = self.tts_model.infer(text, ref_s, reference_text)
-            return audio
-        except Exception as e:
-            logger.warning(f"ONNX inference failed, falling back to PyTorch: {e}")
-            with torch.no_grad():
-                audio = self.tts_model.infer(text, ref_s, reference_text)
-            return audio
 # --- Asynchronous Offloading ---
@@ -500,12 +386,10 @@ async def lifespan(app: FastAPI):
     try:
         # Convert to ONNX on first run if enabled but model doesn't exist
         if USE_ONNX and not os.path.exists(os.path.join(ONNX_MODEL_DIR, "neutts_backbone.onnx")):
-            logger.info("First run: Attempting ONNX conversion for maximum performance...")
             success = await run_blocking_task_async(convert_model_to_onnx)
-            if success:
-                logger.info("✅ ONNX conversion successful - full optimization enabled")
-            else:
-                logger.info("ℹ️ ONNX conversion failed, using hybrid optimization")
         app.state.tts_wrapper = NeuTTSWrapper(device=DEVICE, use_onnx=USE_ONNX)
@@ -549,12 +433,10 @@ async def health_check():
     onnx_status = "enabled" if USE_ONNX else "disabled"
     onnx_codec_status = "active"
-    onnx_backbone_status = "inactive"
     if hasattr(app.state, 'tts_wrapper'):
         onnx_status = "active" if app.state.tts_wrapper.use_onnx else "fallback"
         onnx_codec_status = "active" if app.state.tts_wrapper.onnx_codec is not None else "inactive"
-        onnx_backbone_status = "active" if app.state.tts_wrapper.onnx_wrapper is not None else "inactive"
     return {
         "status": "healthy",
@@ -563,7 +445,6 @@ async def health_check():
         "concurrency_limit": MAX_WORKERS,
         "onnx_optimization": onnx_status,
         "onnx_codec": onnx_codec_status,
-        "onnx_backbone": onnx_backbone_status,
         "memory_usage": {
             "total_gb": round(mem.total / (1024**3), 2),
             "used_percent": mem.percent
@@ -613,9 +494,8 @@ async def text_to_speech(
         audio_duration = len(audio_data) / SAMPLE_RATE
         onnx_codec_active = hasattr(app.state.tts_wrapper, 'onnx_codec') and app.state.tts_wrapper.onnx_codec is not None
-        onnx_backbone_active = hasattr(app.state.tts_wrapper, 'onnx_wrapper') and app.state.tts_wrapper.onnx_wrapper is not None
-        logger.info(f"✅ Synthesis completed in {processing_time:.2f}s (ONNX Codec: {onnx_codec_active}, ONNX Backbone: {onnx_backbone_active})")
         return Response(
             content=audio_bytes,
@@ -624,8 +504,7 @@ async def text_to_speech(
                 "Content-Disposition": f"attachment; filename=tts_output.{output_format}",
                 "X-Processing-Time": f"{processing_time:.2f}s",
                 "X-Audio-Duration": f"{audio_duration:.2f}s",
-                "X-ONNX-Codec-Active": str(onnx_codec_active),
-                "X-ONNX-Backbone-Active": str(onnx_backbone_active)
             }
         )
     except Exception as e:
@@ -667,9 +546,7 @@ async def stream_text_to_speech_cloning(
                 sentences = app.state.tts_wrapper._split_text_into_chunks(text)
                 onnx_codec_active = hasattr(app.state.tts_wrapper, 'onnx_codec') and app.state.tts_wrapper.onnx_codec is not None
-                onnx_backbone_active = hasattr(app.state.tts_wrapper, 'onnx_wrapper') and app.state.tts_wrapper.onnx_wrapper is not None
-                logger.info(f"Streaming {len(sentences)} chunks (ONNX Codec: {onnx_codec_active}, ONNX Backbone: {onnx_backbone_active})")
                 def process_chunk(sentence_text):
                     with torch.no_grad():
@@ -706,13 +583,11 @@ async def stream_text_to_speech_cloning(
         await producer_task
     onnx_codec_active = hasattr(app.state.tts_wrapper, 'onnx_codec') and app.state.tts_wrapper.onnx_codec is not None
-    onnx_backbone_active = hasattr(app.state.tts_wrapper, 'onnx_wrapper') and app.state.tts_wrapper.onnx_wrapper is not None
     return StreamingResponse(
         stream_generator(),
         media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
         headers={
-            "X-ONNX-Codec-Active": str(onnx_codec_active),
-            "X-ONNX-Backbone-Active": str(onnx_backbone_active)
         }
     )

         outputs = self.session.run(self.output_names, inputs)
         return outputs[0]  # Assuming first output is logits
 class NeuTTSWrapper:
     def __init__(self, device: str = "cpu", use_onnx: bool = USE_ONNX):
         self.tts_model = None
             raise ValueError("No valid speech tokens found.")
     def generate_speech_blocking(self, text: str, ref_audio_bytes: bytes, reference_text: str) -> np.ndarray:
+        """Blocking synthesis using cached reference encoding."""
+        # 1. Hash the audio bytes to get a cache key
         audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
+        # 2. Get the encoding from the cache (or create it if new)
         ref_s = self._get_or_create_reference_encoding(audio_hash, ref_audio_bytes)
+        # 3. Infer full text (ONNX optimized if available)
+        with torch.no_grad():
+            audio = self.tts_model.infer(text, ref_s, reference_text)
+        return audio
+# --- ONNX Conversion Function ---
+def convert_model_to_onnx():
+    """Skip ONNX backbone conversion - use ONNX codec only for optimal performance"""
+    logger.info("Using ONNX codec decoder for 40% speed boost (no backbone conversion needed)")
+    logger.info("✅ This provides optimal performance without conversion complexity")
+    return False  # Skip conversion attempts
 # --- Asynchronous Offloading ---
     try:
         # Convert to ONNX on first run if enabled but model doesn't exist
         if USE_ONNX and not os.path.exists(os.path.join(ONNX_MODEL_DIR, "neutts_backbone.onnx")):
+            logger.info("First run: Using optimized ONNX codec approach...")
             success = await run_blocking_task_async(convert_model_to_onnx)
+            if not success:
+                logger.info("Using PyTorch backbone + ONNX codec (optimal performance)")
         app.state.tts_wrapper = NeuTTSWrapper(device=DEVICE, use_onnx=USE_ONNX)
     onnx_status = "enabled" if USE_ONNX else "disabled"
     onnx_codec_status = "active"
     if hasattr(app.state, 'tts_wrapper'):
         onnx_status = "active" if app.state.tts_wrapper.use_onnx else "fallback"
         onnx_codec_status = "active" if app.state.tts_wrapper.onnx_codec is not None else "inactive"
     return {
         "status": "healthy",
         "concurrency_limit": MAX_WORKERS,
         "onnx_optimization": onnx_status,
         "onnx_codec": onnx_codec_status,
         "memory_usage": {
             "total_gb": round(mem.total / (1024**3), 2),
             "used_percent": mem.percent
         audio_duration = len(audio_data) / SAMPLE_RATE
         onnx_codec_active = hasattr(app.state.tts_wrapper, 'onnx_codec') and app.state.tts_wrapper.onnx_codec is not None
+        logger.info(f"✅ Synthesis completed in {processing_time:.2f}s (ONNX Codec: {onnx_codec_active})")
         return Response(
             content=audio_bytes,
                 "Content-Disposition": f"attachment; filename=tts_output.{output_format}",
                 "X-Processing-Time": f"{processing_time:.2f}s",
                 "X-Audio-Duration": f"{audio_duration:.2f}s",
+                "X-ONNX-Codec-Active": str(onnx_codec_active)
             }
         )
     except Exception as e:
                 sentences = app.state.tts_wrapper._split_text_into_chunks(text)
                 onnx_codec_active = hasattr(app.state.tts_wrapper, 'onnx_codec') and app.state.tts_wrapper.onnx_codec is not None
+                logger.info(f"Streaming {len(sentences)} chunks (ONNX Codec: {onnx_codec_active})")
                 def process_chunk(sentence_text):
                     with torch.no_grad():
         await producer_task
     onnx_codec_active = hasattr(app.state.tts_wrapper, 'onnx_codec') and app.state.tts_wrapper.onnx_codec is not None
     return StreamingResponse(
         stream_generator(),
         media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
         headers={
+            "X-ONNX-Codec-Active": str(onnx_codec_active)
         }
     )