tuan3335 commited on
Commit
8103d43
·
1 Parent(s): 010f542

refactor: dùng transformers gốc cho Qwen, bổ sung accelerate vào requirements.txt

Browse files
Files changed (2) hide show
  1. agent.py +17 -55
  2. requirements.txt +1 -0
agent.py CHANGED
@@ -20,8 +20,7 @@ from typing_extensions import TypedDict
20
  from pydantic import BaseModel, Field
21
 
22
  # LangChain HuggingFace Integration
23
- from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace, HuggingFaceEndpoint
24
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
25
 
26
  from utils import (
27
  process_question_with_tools,
@@ -56,64 +55,36 @@ class AIBrain:
56
  def __init__(self):
57
  self.model_name = "Qwen/Qwen3-8B"
58
 
59
- print("🧠 Initializing Qwen3-8B with LangChain HuggingFace...")
60
-
61
- # Load tokenizer with thinking disabled
62
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
63
-
64
- # Create text generation pipeline with Qwen3
65
- self.hf_pipeline = pipeline(
66
- "text-generation",
67
- model=self.model_name,
68
- tokenizer=self.tokenizer,
69
  torch_dtype="auto",
70
- device_map="auto",
71
- max_new_tokens=2048,
72
- temperature=0.7,
73
- top_p=0.9,
74
- do_sample=True,
75
- pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.eos_token_id else self.tokenizer.pad_token_id
76
  )
77
-
78
- # Wrap with LangChain HuggingFacePipeline
79
- self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline)
80
-
81
- # Create ChatHuggingFace for chat interface
82
- self.chat_model = ChatHuggingFace(llm=self.llm)
83
-
84
- print("✅ Qwen3 AI Brain with LangChain HuggingFace initialized")
85
 
86
  def _generate_with_qwen3(self, prompt: str, max_tokens: int = 2048) -> str:
87
- """Generate text with Qwen3 via LangChain - thinking disabled"""
88
  try:
89
- # Prepare messages for chat template with thinking DISABLED
90
  messages = [{"role": "user", "content": prompt}]
91
-
92
- # Apply chat template with enable_thinking=False
93
  text = self.tokenizer.apply_chat_template(
94
  messages,
95
  tokenize=False,
96
  add_generation_prompt=True,
97
- enable_thinking=False # CRITICAL: Disable thinking mode
98
  )
99
-
100
- # Use LangChain HuggingFace pipeline for generation
101
- response = self.llm.invoke(text)
102
-
103
- # Clean up response - remove input prompt
104
- if text in response:
105
- response = response.replace(text, "").strip()
106
-
107
  return response
108
-
109
  except Exception as e:
110
  print(f"⚠️ Qwen3 generation error: {str(e)}")
111
- # Fallback to direct pipeline call
112
- try:
113
- result = self.hf_pipeline(prompt, max_new_tokens=max_tokens)
114
- return result[0]['generated_text'].replace(prompt, "").strip()
115
- except Exception as e2:
116
- return f"AI generation failed: {str(e2)}"
117
 
118
  def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
119
  """Analyze question type using Qwen3 with strict JSON output"""
@@ -381,13 +352,4 @@ if __name__ == "__main__":
381
 
382
  print(f"\n{'-'*60}")
383
 
384
- print("\n✅ All tests completed!")
385
-
386
- # Initialize Qwen3 with thinking mode disabled
387
- primary_brain = HuggingFaceEndpoint(
388
- repo_id=primary_model,
389
- temperature=0.7,
390
- max_new_tokens=300,
391
- huggingfacehub_api_token=os.getenv("HF_API_KEY"),
392
- model_kwargs={"enable_thinking": False, "thinking_prompt": "/no_thinking"}
393
- )
 
20
  from pydantic import BaseModel, Field
21
 
22
  # LangChain HuggingFace Integration
23
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
24
 
25
  from utils import (
26
  process_question_with_tools,
 
55
  def __init__(self):
56
  self.model_name = "Qwen/Qwen3-8B"
57
 
58
+ print("🧠 Initializing Qwen3-8B với transformers gốc...")
 
 
59
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
60
+ self.model = AutoModelForCausalLM.from_pretrained(
61
+ self.model_name,
 
 
 
 
62
  torch_dtype="auto",
63
+ device_map="auto"
 
 
 
 
 
64
  )
65
+ print("✅ Qwen3 AI Brain với transformers đã sẵn sàng")
 
 
 
 
 
 
 
66
 
67
  def _generate_with_qwen3(self, prompt: str, max_tokens: int = 2048) -> str:
68
+ """Sinh text với Qwen3 bằng transformers gốc, thinking mode tắt"""
69
  try:
 
70
  messages = [{"role": "user", "content": prompt}]
 
 
71
  text = self.tokenizer.apply_chat_template(
72
  messages,
73
  tokenize=False,
74
  add_generation_prompt=True,
75
+ enable_thinking=False
76
  )
77
+ model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
78
+ generated_ids = self.model.generate(
79
+ **model_inputs,
80
+ max_new_tokens=max_tokens
81
+ )
82
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
83
+ response = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
 
84
  return response
 
85
  except Exception as e:
86
  print(f"⚠️ Qwen3 generation error: {str(e)}")
87
+ return f"AI generation failed: {str(e)}"
 
 
 
 
 
88
 
89
  def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
90
  """Analyze question type using Qwen3 with strict JSON output"""
 
352
 
353
  print(f"\n{'-'*60}")
354
 
355
+ print("\n✅ All tests completed!")
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -6,6 +6,7 @@ langgraph>=0.2.0
6
 
7
  # HuggingFace Core
8
  transformers>=4.51.0
 
9
 
10
  # Tool Dependencies
11
  groq>=0.11.0
 
6
 
7
  # HuggingFace Core
8
  transformers>=4.51.0
9
+ accelerate>=0.28.0
10
 
11
  # Tool Dependencies
12
  groq>=0.11.0