Jaiwincr7 commited on
Commit
47d6bb4
·
1 Parent(s): 112effb

Fix circular imports, Docker Streamlit app

Browse files
Files changed (1) hide show
  1. merged.py +122 -0
merged.py CHANGED
@@ -5,7 +5,129 @@ from transformers import (
5
  TextIteratorStreamer
6
  )
7
  from threading import Thread
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  MODEL_PATH = "jaiwinrc7/Qwen2.5-Coder-0.5B-finetunned-merged"
10
 
11
 
 
5
  TextIteratorStreamer
6
  )
7
  from threading import Thread
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain_huggingface import HuggingFacePipeline
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
12
+ import torch
13
+ from fpdf import FPDF
14
+ import re
15
+
16
+ # --- MODEL SETUP ---
17
+ model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
18
+
19
+ # Load model and tokenizer once (Runs when main.py is imported)
20
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_id,
23
+ dtype=torch.float16,
24
+ device_map="auto",
25
+ offload_folder="./offload"
26
+ )
27
+
28
+ # Wrap Transformers pipeline
29
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
30
+ llm = HuggingFacePipeline(
31
+ pipeline=pipe,
32
+ model_kwargs={
33
+ "max_new_tokens": 4096,
34
+ "do_sample": True,
35
+ "temperature": 0.2,
36
+ "repetition_penalty": 1.05,
37
+ "eos_token_id": tokenizer.eos_token_id,
38
+ }
39
+ )
40
+
41
+ # Define the Prompt Template (used by the test_case function)
42
+ test_prompt = ChatPromptTemplate.from_messages(
43
+ [
44
+ (
45
+ "system",
46
+ """You are an expert QA engineer.
47
+ STRICTLY follow these rules for your output:
48
+ - Generate EXACTLY 10 numbered test cases (1–5 functional, 6–10 edge cases).
49
+ - Output ONLY the numbered list.
50
+ - DO NOT include explanations, headers, filler text, or markdown.
51
+ - Each test MUST be a single, concise sentence.
52
+ - Begin your response immediately with '1. '""",
53
+ ),
54
+ (
55
+ "user",
56
+ "Generate test cases for the following code:\n{code}",
57
+ ),
58
+ ]
59
+ )
60
+
61
+ # --- TEST CASE GENERATION FUNCTION ---
62
+ def test_case(code):
63
+
64
+ # FIX: Define and INVOKE the test_chain to resolve UnboundLocalError
65
+ test_chain = test_prompt | llm | StrOutputParser()
66
+ test_cases = test_chain.invoke({"code": code})
67
+
68
+ print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases))
69
+
70
+ # Aggressive cleaning (Removes markdown blocks and standalone markers)
71
+ test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL)
72
+ test_cases = re.sub(r"```", "", test_cases)
73
+ test_cases = test_cases.strip()
74
+
75
+ # Guardrail: If the LLM returns nothing, force a known output
76
+ if not test_cases:
77
+ test_cases = "Error: Test case generation failed or returned empty content."
78
+
79
+ print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases)
80
+
81
+ # Pass the clean UTF-8 string directly
82
+ safe_text = test_cases
83
+
84
+ pdf = FPDF()
85
+ pdf.add_page()
86
+
87
+ # FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans)
88
+ try:
89
+ # Requires 'fonts-dejavu' package installed in Dockerfile
90
+ pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf")
91
+ pdf.set_font("DejaVuSans", size=12)
92
+ except Exception as e:
93
+ print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.")
94
+ pdf.set_font("Arial", size=12)
95
 
96
+ # Set title and content cell
97
+ pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C')
98
+ pdf.multi_cell(0, 10, txt=safe_text)
99
+
100
+ # CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling
101
+ try:
102
+ pdf_string = pdf.output(dest='S')
103
+ # Use 'replace' to safely handle any non-latin-1 characters FPDF might leave
104
+ pdf_bytes = pdf_string.encode('latin-1', 'replace')
105
+
106
+ print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes))
107
+
108
+ # Check to ensure the file is not empty (i.e., less than a blank document)
109
+ if len(pdf_bytes) < 100:
110
+ print("[CRITICAL LOG] FPDF generated very small file, likely failed.")
111
+
112
+ # Fallback PDF: Creates a new PDF with the error message
113
+ error_pdf = FPDF()
114
+ error_pdf.add_page()
115
+ error_pdf.set_font("Arial", size=12)
116
+ error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.")
117
+ return error_pdf.output(dest='S').encode('latin-1')
118
+
119
+ return pdf_bytes
120
+
121
+ except Exception as e:
122
+ print(f"[FATAL LOG] PDF output failed with error: {e}")
123
+ # Fatal Fallback PDF: Creates a new PDF with the fatal error message
124
+ error_pdf = FPDF()
125
+ error_pdf.add_page()
126
+ error_pdf.set_font("Arial", size=12)
127
+ error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}")
128
+ return error_pdf.output(dest='S').encode('latin-1')
129
+
130
+
131
  MODEL_PATH = "jaiwinrc7/Qwen2.5-Coder-0.5B-finetunned-merged"
132
 
133