Spaces:

Zenkad
/

Zenkapremiuim

Sleeping

App Files Files Community

Zenkad commited on 10 days ago

Commit

fec4279

verified ·

1 Parent(s): 41e7713

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -18

app.py CHANGED Viewed

@@ -7,8 +7,7 @@ from transformers import (
 )
 import pdfplumber
-# ---- Modeller ----
 CORE_MODEL_NAME = "TURKCELL/Turkcell-LLM-7b-v1"  # Ana sohbet / QA beyni
 SUMM_MODEL_NAME = "mukayese/mt5-base-turkish-summarization"  # Özet beyni
@@ -23,7 +22,6 @@ core_tokenizer = AutoTokenizer.from_pretrained(CORE_MODEL_NAME)
 core_model = AutoModelForCausalLM.from_pretrained(
     CORE_MODEL_NAME,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto" if torch.cuda.is_available() else None,
 )
 if device == "cpu":
     core_model.to(device)
@@ -32,9 +30,7 @@ if device == "cpu":
 # ---- Yardımcı fonksiyonlar ----
 def extract_pdf_text(pdf_file) -> str:
-    """
-    Yüklenen PDF dosyasından düz metin çıkarır.
-    """
     if pdf_file is None:
         return ""
@@ -49,13 +45,10 @@ def extract_pdf_text(pdf_file) -> str:
 def summarize_text(text: str, max_input_chars: int = 6000) -> str:
-    """
-    PDF metnini kısaltarak mT5 ile özetler.
-    """
     if not text:
         return "PDF'ten metin çıkarılamadı veya dosya boş görünüyor."
-    # Çok uzun metni kırp (MVP için basit truncation)
     text = text[:max_input_chars]
     inputs = summ_tokenizer(
@@ -78,16 +71,13 @@ def summarize_text(text: str, max_input_chars: int = 6000) -> str:
 def answer_question_from_text(text: str, question: str, max_context_chars: int = 4000) -> str:
-    """
-    PDF metni + kullanıcının sorusuna göre, Turkcell-LLM ile cevap üretir.
-    """
     if not text:
         return "Önce geçerli bir PDF yüklemelisin."
     if not question:
         return "Lütfen PDF hakkında bir soru yaz."
-    # Konteksti çok büyütmemek için basit truncation
     context = text[:max_context_chars]
     prompt = (
@@ -118,7 +108,6 @@ def answer_question_from_text(text: str, question: str, max_context_chars: int =
     full_answer = core_tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Prompt'u cevaptan ayırmak için basit kesme
     if "Cevap:" in full_answer:
         answer = full_answer.split("Cevap:", 1)[-1].strip()
     else:
@@ -127,8 +116,6 @@ def answer_question_from_text(text: str, question: str, max_context_chars: int =
     return answer
-# ---- Gradio Arayüzü ----
 def summarize_pdf(pdf_file):
     text = extract_pdf_text(pdf_file)
     if not text:
@@ -141,6 +128,8 @@ def qa_on_pdf(pdf_file, question):
     return answer_question_from_text(text, question)
 with gr.Blocks() as demo:
     gr.Markdown(
         """
@@ -186,5 +175,6 @@ with gr.Blocks() as demo:
         outputs=[answer_output],
     )
 if __name__ == "__main__":
-    demo.launch(

 )
 import pdfplumber
+# ---- Model isimleri ----
 CORE_MODEL_NAME = "TURKCELL/Turkcell-LLM-7b-v1"  # Ana sohbet / QA beyni
 SUMM_MODEL_NAME = "mukayese/mt5-base-turkish-summarization"  # Özet beyni
 core_model = AutoModelForCausalLM.from_pretrained(
     CORE_MODEL_NAME,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 )
 if device == "cpu":
     core_model.to(device)
 # ---- Yardımcı fonksiyonlar ----
 def extract_pdf_text(pdf_file) -> str:
+    """PDF dosyasından düz metin çıkar."""
     if pdf_file is None:
         return ""
 def summarize_text(text: str, max_input_chars: int = 6000) -> str:
+    """Türkçe özet üret."""
     if not text:
         return "PDF'ten metin çıkarılamadı veya dosya boş görünüyor."
     text = text[:max_input_chars]
     inputs = summ_tokenizer(
 def answer_question_from_text(text: str, question: str, max_context_chars: int = 4000) -> str:
+    """PDF metnine göre soru cevapla."""
     if not text:
         return "Önce geçerli bir PDF yüklemelisin."
     if not question:
         return "Lütfen PDF hakkında bir soru yaz."
     context = text[:max_context_chars]
     prompt = (
     full_answer = core_tokenizer.decode(output_ids[0], skip_special_tokens=True)
     if "Cevap:" in full_answer:
         answer = full_answer.split("Cevap:", 1)[-1].strip()
     else:
     return answer
 def summarize_pdf(pdf_file):
     text = extract_pdf_text(pdf_file)
     if not text:
     return answer_question_from_text(text, question)
+# ---- Gradio arayüzü ----
 with gr.Blocks() as demo:
     gr.Markdown(
         """
         outputs=[answer_output],
     )
 if __name__ == "__main__":
+    demo.launch()