Spaces:

Jedi09
/

voice_to_text_gemini

Running

App Files Files Community

voice_to_text_gemini / app.py

Jedi09

Update app.py

29a8059 verified 14 days ago

raw

history blame

7.94 kB

	"""
	Ses Deşifre Pro - Türkçe Ses-Metin Dönüştürme
	Hugging Face Spaces için profesyonel arayüz.
	Gradio 6.x uyumlu + Gemini AI Özet Desteği + Streaming Output.
	"""

	import gradio as gr
	from faster_whisper import WhisperModel
	import tempfile
	import time
	import os
	# import requests # Artık gerek yok
	from transformers import pipeline
	import torch

	# ==================== CONFIG & MODELS ====================

	# 1. WHISPER MODEL (Ses Deşifre)
	MODEL_SIZE = "medium"
	model = None

	try:
	print(f"� Whisper {MODEL_SIZE} modeli yükleniyor...")
	model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
	print("✅ Whisper Modeli Hazır!")
	except Exception as e:
	print(f"❌ Whisper Yükleme Hatası: {e}")
	model = None

	# 2. LOCAL AI PIPELINES (Cache)
	summarizer_pipe = None
	translator_pipe = None

	def load_summarizer():
	global summarizer_pipe
	if summarizer_pipe is None:
	print("📥 Özetleme Modeli (mT5-Small) yükleniyor...")
	device = "cpu" # GPU varsa 0 yapabilirsiniz
	summarizer_pipe = pipeline("summarization", model="ozcangundes/mt5-small-turkish-summarization", device=-1)
	print("✅ Özetleme Modeli Hazır!")
	return summarizer_pipe

	def load_translator():
	global translator_pipe
	if translator_pipe is None:
	print("� Çeviri Modeli (NLLB-200) yükleniyor...")
	# NLLB cpu'da biraz yavaş olabilir ama kalitelidir
	translator_pipe = pipeline("translation", model="facebook/nllb-200-distilled-600M", device=-1)
	print("✅ Çeviri Modeli Hazır!")
	return translator_pipe

	# ==================== AI FUNCTIONS (LOCAL) ====================

	def summarize_locally(text: str, progress=gr.Progress()) -> str:
	"""Yerel model (mT5) ile özetleme."""
	if not text or "⚠️" in text: return "⚠️ Önce geçerli bir metin oluşturun."

	clean_text = text.split("───────────────────────────────────")[0].strip()
	if len(clean_text) < 50: return "⚠️ Metin özetlemek için çok kısa."

	try:
	progress(0.2, desc="Özetleme modeli yükleniyor...")
	pipe = load_summarizer()

	progress(0.5, desc="Metin özetleniyor...")
	# Maksimum girdi uzunluğunu ve çıktı uzunluğunu ayarla
	result = pipe(clean_text, max_length=150, min_length=40, do_sample=False)

	return result[0]['summary_text']

	except Exception as e:
	return f"❌ Özetleme Hatası: {str(e)}"

	def translate_locally(text: str, target_language: str, progress=gr.Progress()) -> str:
	"""Yerel model (NLLB) ile çeviri."""
	if not text or "⚠️" in text: return "⚠️ Çevrilecek metin yok."

	clean_text = text.split("───────────────────────────────────")[0].strip()

	# NLLB Dil Kodları
	lang_map = {
	"İngilizce": "eng_Latn",
	"Almanca": "deu_Latn",
	"Fransızca": "fra_Latn",
	"Türkçe": "tur_Latn"
	}
	src_lang = "tur_Latn" # Varsayılan giriş Türkçe
	tgt_lang = lang_map.get(target_language, "eng_Latn")

	try:
	progress(0.2, desc="Çeviri modeli yükleniyor...")
	pipe = load_translator()

	progress(0.5, desc=f"Çeviriliyor ({target_language})...")
	# NLLB pipeline kullanımı: src_lang ve tgt_lang belirtilmeli
	result = pipe(clean_text, src_lang=src_lang, tgt_lang=tgt_lang, max_length=512)

	return result[0]['translation_text']

	except Exception as e:
	return f"❌ Çeviri Hatası: {str(e)}"


	# ==================== TRANSCRIPTION (WHISPER) ====================

	def transcribe(audio_path: str, progress=gr.Progress()):
	if model is None:
	yield "❌ Hata: Whisper modeli yüklenemedi.", None
	return

	if audio_path is None:
	yield "⚠️ Lütfen bir ses dosyası yükleyin.", None
	return

	try:
	start_time = time.time()
	progress(0, desc="Ses işleniyor...")

	segments, info = model.transcribe(
	audio_path,
	language="tr",
	beam_size=1,
	vad_filter=True,
	word_timestamps=False
	)

	duration = info.duration
	full_text = ""

	for segment in segments:
	full_text += segment.text + " "
	if duration > 0:
	prog = min(segment.end / duration, 0.99)
	progress(prog, desc=f"Dönüştürülüyor... ({int(segment.end)}/{int(duration)} sn)")
	yield full_text.strip(), None

	elapsed = time.time() - start_time
	final_result = full_text.strip()

	if not final_result:
	yield "⚠️ Ses anlaşılamadı veya sessiz.", None
	return

	# Dosya Kaydetme
	progress(0.99, desc="Dosya kaydediliyor...")
	txt_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8')
	txt_file.write(final_result)
	txt_file.close()

	stats = f"\n\n───────────────────────────────────\n📊 İstatistikler\n• Süre: {duration:.1f} sn\n• İşlem: {elapsed:.1f} sn\n• Hız: {duration/elapsed:.1f}x\n───────────────────────────────────"

	yield final_result + stats, txt_file.name

	except Exception as e:
	yield f"❌ Transkripsiyon Hatası: {str(e)}", None

	# ==================== UI (GRADIO) ====================

	with gr.Blocks(title="Ses Deşifre Pro (Local AI)") as demo:

	gr.HTML("""
	<style>
	footer { display: none !important; }
	.gradio-container { max-width: 900px !important; margin: auto !important; }
	</style>
	<div style="text-align: center; padding: 30px; background: linear-gradient(135deg, #10b981 0%, #059669 100%); border-radius: 20px; margin-bottom: 20px; color: white;">
	<h1 style="font-size: 2.2rem; margin: 0;">🎙️ Ses Deşifre & Local AI</h1>
	<p style="opacity: 0.9;">%100 Çevrimdışı • Token Yok • Limit Yok</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(label="Ses Dosyası", type="filepath", sources=["upload", "microphone"])
	submit_btn = gr.Button("🚀 Başlat", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	output_text = gr.Textbox(label="Deşifre Metni", placeholder="Sonuçlar burada görünecek...", lines=10, interactive=False)
	download_file = gr.File(label="Metni İndir (.txt)")

	# --- LOCAL AI ARAÇLARI ---
	gr.HTML("<h3 style='margin-top: 20px; border-bottom: 1px solid #ddd; padding-bottom: 10px;'>🧠 Yerel Yapay Zeka (CPU)</h3>")

	with gr.Tabs():
	with gr.TabItem("✨ Özetle (mT5)"):
	summary_btn = gr.Button("📝 Metni Özetle")
	summary_output = gr.Textbox(label="Özet Sonucu", lines=6)

	with gr.TabItem("🌍 Çevir (NLLB)"):
	with gr.Row():
	target_lang = gr.Dropdown(["İngilizce", "Almanca", "Fransızca"], label="Hedef Dil", value="İngilizce")
	translate_btn = gr.Button("A Çevir")
	translate_output = gr.Textbox(label="Çeviri Sonucu", lines=6)

	# --- BAĞLANTILAR ---
	submit_btn.click(transcribe, inputs=[audio_input], outputs=[output_text, download_file])

	summary_btn.click(summarize_locally, inputs=[output_text], outputs=summary_output)
	translate_btn.click(translate_locally, inputs=[output_text, target_lang], outputs=translate_output)

	if __name__ == "__main__":
	demo.launch(share=False)