Spaces:

jaiwinrc7
/

Code-Test-generator

Running

Code-Test-generator / pdf_utils.py

Jaiwincr7

FINAL FIX: Removed redundant 'bytes()' conversion in app.py to correct PDF data type mismatch.

112effb 3 days ago

4.47 kB

	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_huggingface import HuggingFacePipeline
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import torch
	from fpdf import FPDF
	import re

	# --- MODEL SETUP ---
	model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"

	# Load model and tokenizer once (Runs when main.py is imported)
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	dtype=torch.float16,
	device_map="auto",
	offload_folder="./offload"
	)

	# Wrap Transformers pipeline
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
	llm = HuggingFacePipeline(
	pipeline=pipe,
	model_kwargs={
	"max_new_tokens": 4096,
	"do_sample": True,
	"temperature": 0.2,
	"repetition_penalty": 1.05,
	"eos_token_id": tokenizer.eos_token_id,
	}
	)

	# Define the Prompt Template (used by the test_case function)
	test_prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"""You are an expert QA engineer.
	STRICTLY follow these rules for your output:
	- Generate EXACTLY 10 numbered test cases (1–5 functional, 6–10 edge cases).
	- Output ONLY the numbered list.
	- DO NOT include explanations, headers, filler text, or markdown.
	- Each test MUST be a single, concise sentence.
	- Begin your response immediately with '1. '""",
	),
	(
	"user",
	"Generate test cases for the following code:\n{code}",
	),
	]
	)

	# --- TEST CASE GENERATION FUNCTION ---
	def test_case(code):

	# FIX: Define and INVOKE the test_chain to resolve UnboundLocalError
	test_chain = test_prompt \| llm \| StrOutputParser()
	test_cases = test_chain.invoke({"code": code})

	print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases))

	# Aggressive cleaning (Removes markdown blocks and standalone markers)
	test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL)
	test_cases = re.sub(r"```", "", test_cases)
	test_cases = test_cases.strip()

	# Guardrail: If the LLM returns nothing, force a known output
	if not test_cases:
	test_cases = "Error: Test case generation failed or returned empty content."

	print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases)

	# Pass the clean UTF-8 string directly
	safe_text = test_cases

	pdf = FPDF()
	pdf.add_page()

	# FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans)
	try:
	# Requires 'fonts-dejavu' package installed in Dockerfile
	pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf")
	pdf.set_font("DejaVuSans", size=12)
	except Exception as e:
	print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.")
	pdf.set_font("Arial", size=12)

	# Set title and content cell
	pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C')
	pdf.multi_cell(0, 10, txt=safe_text)

	# CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling
	try:
	pdf_string = pdf.output(dest='S')
	# Use 'replace' to safely handle any non-latin-1 characters FPDF might leave
	pdf_bytes = pdf_string.encode('latin-1', 'replace')

	print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes))

	# Check to ensure the file is not empty (i.e., less than a blank document)
	if len(pdf_bytes) < 100:
	print("[CRITICAL LOG] FPDF generated very small file, likely failed.")

	# Fallback PDF: Creates a new PDF with the error message
	error_pdf = FPDF()
	error_pdf.add_page()
	error_pdf.set_font("Arial", size=12)
	error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.")
	return error_pdf.output(dest='S').encode('latin-1')

	return pdf_bytes

	except Exception as e:
	print(f"[FATAL LOG] PDF output failed with error: {e}")
	# Fatal Fallback PDF: Creates a new PDF with the fatal error message
	error_pdf = FPDF()
	error_pdf.add_page()
	error_pdf.set_font("Arial", size=12)
	error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}")
	return error_pdf.output(dest='S').encode('latin-1')