Code-Test-generator / pdf_utils.py
Jaiwincr7
FINAL DOCKERFIX: Ensuring Dockerfile copies pdf_utils.py instead of the nonexistent main.py.
ccdddd6
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from fpdf import FPDF
import re
# --- MODEL SETUP ---
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
dtype=torch.float16,
device_map="auto",
offload_folder="./offload"
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(
pipeline=pipe,
model_kwargs={
"max_new_tokens": 2000,
"do_sample": True,
"temperature": 0.2,
"repetition_penalty": 1.05,
"eos_token_id": tokenizer.eos_token_id,
}
)
# --- PROMPT ---
test_prompt = PromptTemplate.from_template(
"""You are an expert QA engineer.
Generate EXACTLY 10 numbered test cases:
1–5 functional
6–10 edge cases
Rules:
- Output ONLY the numbered list
- Each test must be one sentence
- No explanations, no markdown
Code:
{code}
Test cases:
"""
)
# --- πŸ”’ CRITICAL FIX: SAFE TEXT WRAPPING FOR PDF ---
def wrap_text_for_pdf(text, max_chars=90):
wrapped_lines = []
for line in text.split("\n"):
if len(line) <= max_chars:
wrapped_lines.append(line)
else:
for i in range(0, len(line), max_chars):
wrapped_lines.append(line[i:i + max_chars])
return "\n".join(wrapped_lines)
# --- TEST CASE GENERATION ---
def test_case(code):
test_chain = test_prompt | llm | StrOutputParser()
test_cases = test_chain.invoke({"code": code})
print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases))
# Clean markdown
test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL)
test_cases = re.sub(r"```", "", test_cases).strip()
if not test_cases:
test_cases = "Error: Test case generation failed."
print("\n[LOG] 2. Cleaned Text:", test_cases)
# πŸ”’ APPLY WRAPPING HERE
safe_text = wrap_text_for_pdf(test_cases)
pdf = FPDF()
pdf.add_page()
try:
pdf.add_font(
"DejaVuSans",
style="",
fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
)
pdf.set_font("DejaVuSans", size=10)
except Exception as e:
print(f"[LOG] Font fallback: {e}")
pdf.set_font("Arial", size=10)
pdf.multi_cell(0, 8, txt="--- Generated Test Cases ---", align="C")
pdf.ln(2)
pdf.multi_cell(0, 8, txt=safe_text)
try:
pdf_bytes = bytes(pdf.output(dest="S"))
print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes))
if len(pdf_bytes) < 100:
raise ValueError("PDF too small")
return pdf_bytes
except Exception as e:
print(f"[FATAL] PDF failed: {e}")
error_pdf = FPDF()
error_pdf.add_page()
error_pdf.set_font("Arial", size=12)
error_pdf.multi_cell(0, 10, txt=f"PDF generation failed: {e}")
return bytes(error_pdf.output(dest="S"))