Spaces:

abdullahhameed111
/

Challenge_Task

Build error

App Files Files Community

Challenge_Task / app /utils.py

abdullah0101

Add full application code and deps

3859913 6 months ago

raw

history blame contribute delete

2.49 kB

	import asyncio
	import subprocess
	from pathlib import Path
	from typing import List

	import torchaudio
	from yt_dlp import YoutubeDL
	import webrtcvad

	from .config import AUDIO_CACHE

	# ---------------------------------------------------------------------------
	# ffmpeg helpers
	# ---------------------------------------------------------------------------

	def _run(cmd: List[str]):
	proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	if proc.returncode != 0:
	raise RuntimeError(proc.stderr.decode())

	# ---------------------------------------------------------------------------
	# Video → Audio
	# ---------------------------------------------------------------------------
	async def download_video(url: str, out_dir: Path) -> Path:
	"""Async wrapper around yt‑dlp to pull remote video assets."""
	ydl_opts = {
	"quiet": True,
	"no_warnings": True,
	"outtmpl": str(out_dir / "download.%(ext)s"),
	"format": "bestvideo+bestaudio/best / best",
	}
	loop = asyncio.get_running_loop()

	def _job():
	with YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])

	await loop.run_in_executor(None, _job)
	return next(out_dir.glob("download.*"))

	async def extract_audio(video_path: Path, wav_path: Path, sr: int = 16000):
	cmd = [
	"ffmpeg", "-y", "-i", str(video_path),
	"-vn", "-ac", "1", "-ar", str(sr), str(wav_path)
	]
	loop = asyncio.get_running_loop()
	await loop.run_in_executor(None, _run, cmd)

	# ---------------------------------------------------------------------------
	# VAD trimming (WebRTC)
	# ---------------------------------------------------------------------------

	def _frame_gen(frame_ms, pcm16, sr):
	n = int(sr * (frame_ms / 1000.0) * 2)
	for i in range(0, len(pcm16), n):
	yield pcm16[i : i + n]

	def trim_silence(wav_path: Path, aggressiveness: int = 3) -> Path:
	sig, sr = torchaudio.load(str(wav_path))
	sig = sig.squeeze(0).numpy()
	vad = webrtcvad.Vad(aggressiveness)
	frames = list(_frame_gen(30, (sig * 32768).astype("int16").tobytes(), sr))
	voiced = [vad.is_speech(f, sr) for f in frames]
	if not any(voiced):
	return wav_path
	first, last = voiced.index(True), len(voiced) - 1 - voiced[::-1].index(True)
	kept = sig[first * 480 : (last + 1) * 480]
	out = wav_path.with_name(wav_path.stem + "_trim.wav")
	torchaudio.save(str(out), torchaudio.tensor(kept).unsqueeze(0), sr)
	return out