import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer ) from threading import Thread from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_huggingface import HuggingFacePipeline from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch from fpdf import FPDF import re # --- MODEL SETUP --- model_id = "deepseek-ai/deepseek-coder-1.3b-instruct" # Load model and tokenizer once (Runs when main.py is imported) tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, dtype=torch.float16, device_map="auto", offload_folder="./offload" ) # Wrap Transformers pipeline pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) llm = HuggingFacePipeline( pipeline=pipe, model_kwargs={ "max_new_tokens": 4096, "do_sample": True, "temperature": 0.2, "repetition_penalty": 1.05, "eos_token_id": tokenizer.eos_token_id, } ) # Define the Prompt Template (used by the test_case function) test_prompt = ChatPromptTemplate.from_messages( [ ( "system", """You are an expert QA engineer. STRICTLY follow these rules for your output: - Generate EXACTLY 10 numbered test cases (1–5 functional, 6–10 edge cases). - Output ONLY the numbered list. - DO NOT include explanations, headers, filler text, or markdown. - Each test MUST be a single, concise sentence. - Begin your response immediately with '1. '""", ), ( "user", "Generate test cases for the following code:\n{code}", ), ] ) # --- TEST CASE GENERATION FUNCTION --- def test_case(code): # FIX: Define and INVOKE the test_chain to resolve UnboundLocalError test_chain = test_prompt | llm | StrOutputParser() test_cases = test_chain.invoke({"code": code}) print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases)) # Aggressive cleaning (Removes markdown blocks and standalone markers) test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL) test_cases = re.sub(r"```", "", test_cases) test_cases = test_cases.strip() # Guardrail: If the LLM returns nothing, force a known output if not test_cases: test_cases = "Error: Test case generation failed or returned empty content." print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases) # Pass the clean UTF-8 string directly safe_text = test_cases pdf = FPDF() pdf.add_page() # FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans) try: # Requires 'fonts-dejavu' package installed in Dockerfile pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf") pdf.set_font("DejaVuSans", size=12) except Exception as e: print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.") pdf.set_font("Arial", size=12) # Set title and content cell pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C') pdf.multi_cell(0, 10, txt=safe_text) # CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling try: pdf_string = pdf.output(dest='S') # Use 'replace' to safely handle any non-latin-1 characters FPDF might leave pdf_bytes = pdf_string.encode('latin-1', 'replace') print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes)) # Check to ensure the file is not empty (i.e., less than a blank document) if len(pdf_bytes) < 100: print("[CRITICAL LOG] FPDF generated very small file, likely failed.") # Fallback PDF: Creates a new PDF with the error message error_pdf = FPDF() error_pdf.add_page() error_pdf.set_font("Arial", size=12) error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.") return error_pdf.output(dest='S').encode('latin-1') return pdf_bytes except Exception as e: print(f"[FATAL LOG] PDF output failed with error: {e}") # Fatal Fallback PDF: Creates a new PDF with the fatal error message error_pdf = FPDF() error_pdf.add_page() error_pdf.set_font("Arial", size=12) error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}") return error_pdf.output(dest='S').encode('latin-1') MODEL_PATH = "jaiwinrc7/Qwen2.5-Coder-0.5B-finetunned-merged" def load_model_and_tokenizer(): tokenizer = AutoTokenizer.from_pretrained( MODEL_PATH, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, device_map="cpu", trust_remote_code=True, torch_dtype=torch.float32, # IMPORTANT: faster on CPU ) model.eval() return tokenizer, model def build_prompt(lang, task): # Keep prompt SIMPLE for speed return f"""You are a coding assistant. Write {lang} code for the following task: {task} Code: """ def generate_code_stream(lang, user_input, tokenizer, model): prompt = build_prompt(lang, user_input) inputs = tokenizer(prompt, return_tensors="pt") streamer = TextIteratorStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True ) generation_kwargs = dict( **inputs, max_new_tokens=250, do_sample=False, temperature=0.0, use_cache=True, streamer=streamer, pad_token_id=tokenizer.eos_token_id, ) # Run generation in background thread thread = Thread( target=model.generate, kwargs=generation_kwargs ) thread.start() # Yield tokens as they arrive for token in streamer: yield token