File size: 4,473 Bytes
dad3b3d
4ac9466
dad3b3d
 
 
 
 
 
4ac9466
dad3b3d
 
4ac9466
dad3b3d
 
 
 
 
 
 
 
 
 
 
 
 
9e1f69d
dad3b3d
 
 
 
 
 
4ac9466
 
83fb250
4ac9466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dad3b3d
4ac9466
83fb250
85b1c76
4ac9466
85b1c76
 
4ac9466
85b1c76
 
4ac9466
05f4426
 
 
dad3b3d
85b1c76
dad3b3d
85b1c76
 
 
05f4426
4ac9466
83fb250
85b1c76
83fb250
05f4426
dad3b3d
85b1c76
 
4ac9466
85b1c76
 
 
 
 
 
 
05f4426
 
83fb250
85b1c76
05f4426
 
4ac9466
 
85b1c76
 
05f4426
4ac9466
05f4426
 
 
85b1c76
05f4426
 
 
 
 
 
 
 
 
 
85b1c76
05f4426
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from fpdf import FPDF
import re

# --- MODEL SETUP ---
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"

# Load model and tokenizer once (Runs when main.py is imported)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    device_map="auto",
    offload_folder="./offload"
)

# Wrap Transformers pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(
    pipeline=pipe,
    model_kwargs={
        "max_new_tokens": 4096,
        "do_sample": True,
        "temperature": 0.2,
        "repetition_penalty": 1.05,
        "eos_token_id": tokenizer.eos_token_id,
    }
)

# Define the Prompt Template (used by the test_case function)
test_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert QA engineer.
STRICTLY follow these rules for your output:
- Generate EXACTLY 10 numbered test cases (1–5 functional, 6–10 edge cases).
- Output ONLY the numbered list.
- DO NOT include explanations, headers, filler text, or markdown.
- Each test MUST be a single, concise sentence.
- Begin your response immediately with '1. '""",
        ),
        (
            "user",
            "Generate test cases for the following code:\n{code}",
        ),
    ]
)

# --- TEST CASE GENERATION FUNCTION ---
def test_case(code):
    
    # FIX: Define and INVOKE the test_chain to resolve UnboundLocalError
    test_chain = test_prompt | llm | StrOutputParser()
    test_cases = test_chain.invoke({"code": code})
    
    print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases))

    # Aggressive cleaning (Removes markdown blocks and standalone markers)
    test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL)
    test_cases = re.sub(r"```", "", test_cases)
    test_cases = test_cases.strip()

    # Guardrail: If the LLM returns nothing, force a known output
    if not test_cases:
        test_cases = "Error: Test case generation failed or returned empty content."
    
    print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases)

    # Pass the clean UTF-8 string directly
    safe_text = test_cases
    
    pdf = FPDF()
    pdf.add_page()
    
    # FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans)
    try:
        # Requires 'fonts-dejavu' package installed in Dockerfile
        pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf")
        pdf.set_font("DejaVuSans", size=12)
    except Exception as e:
        print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.")
        pdf.set_font("Arial", size=12)

    # Set title and content cell
    pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C')
    pdf.multi_cell(0, 10, txt=safe_text)
    
    # CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling
    try:
        pdf_string = pdf.output(dest='S')
        # Use 'replace' to safely handle any non-latin-1 characters FPDF might leave
        pdf_bytes = pdf_string.encode('latin-1', 'replace') 
        
        print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes))
        
        # Check to ensure the file is not empty (i.e., less than a blank document)
        if len(pdf_bytes) < 100:
            print("[CRITICAL LOG] FPDF generated very small file, likely failed.")
            
            # Fallback PDF: Creates a new PDF with the error message
            error_pdf = FPDF()
            error_pdf.add_page()
            error_pdf.set_font("Arial", size=12)
            error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.")
            return error_pdf.output(dest='S').encode('latin-1')
            
        return pdf_bytes
        
    except Exception as e:
        print(f"[FATAL LOG] PDF output failed with error: {e}")
        # Fatal Fallback PDF: Creates a new PDF with the fatal error message
        error_pdf = FPDF()
        error_pdf.add_page()
        error_pdf.set_font("Arial", size=12)
        error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}")
        return error_pdf.output(dest='S').encode('latin-1')