Spaces:
Running
Running
Jaiwincr7
FINAL FIX: Removed redundant 'bytes()' conversion in app.py to correct PDF data type mismatch.
112effb
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_huggingface import HuggingFacePipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import torch | |
| from fpdf import FPDF | |
| import re | |
| # --- MODEL SETUP --- | |
| model_id = "deepseek-ai/deepseek-coder-1.3b-instruct" | |
| # Load model and tokenizer once (Runs when main.py is imported) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| dtype=torch.float16, | |
| device_map="auto", | |
| offload_folder="./offload" | |
| ) | |
| # Wrap Transformers pipeline | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| llm = HuggingFacePipeline( | |
| pipeline=pipe, | |
| model_kwargs={ | |
| "max_new_tokens": 4096, | |
| "do_sample": True, | |
| "temperature": 0.2, | |
| "repetition_penalty": 1.05, | |
| "eos_token_id": tokenizer.eos_token_id, | |
| } | |
| ) | |
| # Define the Prompt Template (used by the test_case function) | |
| test_prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ( | |
| "system", | |
| """You are an expert QA engineer. | |
| STRICTLY follow these rules for your output: | |
| - Generate EXACTLY 10 numbered test cases (1β5 functional, 6β10 edge cases). | |
| - Output ONLY the numbered list. | |
| - DO NOT include explanations, headers, filler text, or markdown. | |
| - Each test MUST be a single, concise sentence. | |
| - Begin your response immediately with '1. '""", | |
| ), | |
| ( | |
| "user", | |
| "Generate test cases for the following code:\n{code}", | |
| ), | |
| ] | |
| ) | |
| # --- TEST CASE GENERATION FUNCTION --- | |
| def test_case(code): | |
| # FIX: Define and INVOKE the test_chain to resolve UnboundLocalError | |
| test_chain = test_prompt | llm | StrOutputParser() | |
| test_cases = test_chain.invoke({"code": code}) | |
| print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases)) | |
| # Aggressive cleaning (Removes markdown blocks and standalone markers) | |
| test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL) | |
| test_cases = re.sub(r"```", "", test_cases) | |
| test_cases = test_cases.strip() | |
| # Guardrail: If the LLM returns nothing, force a known output | |
| if not test_cases: | |
| test_cases = "Error: Test case generation failed or returned empty content." | |
| print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases) | |
| # Pass the clean UTF-8 string directly | |
| safe_text = test_cases | |
| pdf = FPDF() | |
| pdf.add_page() | |
| # FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans) | |
| try: | |
| # Requires 'fonts-dejavu' package installed in Dockerfile | |
| pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf") | |
| pdf.set_font("DejaVuSans", size=12) | |
| except Exception as e: | |
| print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.") | |
| pdf.set_font("Arial", size=12) | |
| # Set title and content cell | |
| pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C') | |
| pdf.multi_cell(0, 10, txt=safe_text) | |
| # CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling | |
| try: | |
| pdf_string = pdf.output(dest='S') | |
| # Use 'replace' to safely handle any non-latin-1 characters FPDF might leave | |
| pdf_bytes = pdf_string.encode('latin-1', 'replace') | |
| print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes)) | |
| # Check to ensure the file is not empty (i.e., less than a blank document) | |
| if len(pdf_bytes) < 100: | |
| print("[CRITICAL LOG] FPDF generated very small file, likely failed.") | |
| # Fallback PDF: Creates a new PDF with the error message | |
| error_pdf = FPDF() | |
| error_pdf.add_page() | |
| error_pdf.set_font("Arial", size=12) | |
| error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.") | |
| return error_pdf.output(dest='S').encode('latin-1') | |
| return pdf_bytes | |
| except Exception as e: | |
| print(f"[FATAL LOG] PDF output failed with error: {e}") | |
| # Fatal Fallback PDF: Creates a new PDF with the fatal error message | |
| error_pdf = FPDF() | |
| error_pdf.add_page() | |
| error_pdf.set_font("Arial", size=12) | |
| error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}") | |
| return error_pdf.output(dest='S').encode('latin-1') |