Spaces:
Running
Running
File size: 4,473 Bytes
dad3b3d 4ac9466 dad3b3d 4ac9466 dad3b3d 4ac9466 dad3b3d 9e1f69d dad3b3d 4ac9466 83fb250 4ac9466 dad3b3d 4ac9466 83fb250 85b1c76 4ac9466 85b1c76 4ac9466 85b1c76 4ac9466 05f4426 dad3b3d 85b1c76 dad3b3d 85b1c76 05f4426 4ac9466 83fb250 85b1c76 83fb250 05f4426 dad3b3d 85b1c76 4ac9466 85b1c76 05f4426 83fb250 85b1c76 05f4426 4ac9466 85b1c76 05f4426 4ac9466 05f4426 85b1c76 05f4426 85b1c76 05f4426 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from fpdf import FPDF
import re
# --- MODEL SETUP ---
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
# Load model and tokenizer once (Runs when main.py is imported)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
dtype=torch.float16,
device_map="auto",
offload_folder="./offload"
)
# Wrap Transformers pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(
pipeline=pipe,
model_kwargs={
"max_new_tokens": 4096,
"do_sample": True,
"temperature": 0.2,
"repetition_penalty": 1.05,
"eos_token_id": tokenizer.eos_token_id,
}
)
# Define the Prompt Template (used by the test_case function)
test_prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an expert QA engineer.
STRICTLY follow these rules for your output:
- Generate EXACTLY 10 numbered test cases (1β5 functional, 6β10 edge cases).
- Output ONLY the numbered list.
- DO NOT include explanations, headers, filler text, or markdown.
- Each test MUST be a single, concise sentence.
- Begin your response immediately with '1. '""",
),
(
"user",
"Generate test cases for the following code:\n{code}",
),
]
)
# --- TEST CASE GENERATION FUNCTION ---
def test_case(code):
# FIX: Define and INVOKE the test_chain to resolve UnboundLocalError
test_chain = test_prompt | llm | StrOutputParser()
test_cases = test_chain.invoke({"code": code})
print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases))
# Aggressive cleaning (Removes markdown blocks and standalone markers)
test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL)
test_cases = re.sub(r"```", "", test_cases)
test_cases = test_cases.strip()
# Guardrail: If the LLM returns nothing, force a known output
if not test_cases:
test_cases = "Error: Test case generation failed or returned empty content."
print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases)
# Pass the clean UTF-8 string directly
safe_text = test_cases
pdf = FPDF()
pdf.add_page()
# FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans)
try:
# Requires 'fonts-dejavu' package installed in Dockerfile
pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf")
pdf.set_font("DejaVuSans", size=12)
except Exception as e:
print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.")
pdf.set_font("Arial", size=12)
# Set title and content cell
pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C')
pdf.multi_cell(0, 10, txt=safe_text)
# CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling
try:
pdf_string = pdf.output(dest='S')
# Use 'replace' to safely handle any non-latin-1 characters FPDF might leave
pdf_bytes = pdf_string.encode('latin-1', 'replace')
print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes))
# Check to ensure the file is not empty (i.e., less than a blank document)
if len(pdf_bytes) < 100:
print("[CRITICAL LOG] FPDF generated very small file, likely failed.")
# Fallback PDF: Creates a new PDF with the error message
error_pdf = FPDF()
error_pdf.add_page()
error_pdf.set_font("Arial", size=12)
error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.")
return error_pdf.output(dest='S').encode('latin-1')
return pdf_bytes
except Exception as e:
print(f"[FATAL LOG] PDF output failed with error: {e}")
# Fatal Fallback PDF: Creates a new PDF with the fatal error message
error_pdf = FPDF()
error_pdf.add_page()
error_pdf.set_font("Arial", size=12)
error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}")
return error_pdf.output(dest='S').encode('latin-1') |