Spaces:

tuanhqv123
/

final_agent_course

Running

App Files Files Community

tuan3335 commited on Jun 26

Commit

8103d43

1 Parent(s): 010f542

refactor: dùng transformers gốc cho Qwen, bổ sung accelerate vào requirements.txt

Browse files

Files changed (2) hide show

agent.py +17 -55
requirements.txt +1 -0

agent.py CHANGED Viewed

@@ -20,8 +20,7 @@ from typing_extensions import TypedDict
 from pydantic import BaseModel, Field
 # LangChain HuggingFace Integration
-from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace, HuggingFaceEndpoint
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from utils import (
     process_question_with_tools,
@@ -56,64 +55,36 @@ class AIBrain:
     def __init__(self):
         self.model_name = "Qwen/Qwen3-8B"
-        print("🧠 Initializing Qwen3-8B with LangChain HuggingFace...")
-        # Load tokenizer with thinking disabled
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-        # Create text generation pipeline with Qwen3
-        self.hf_pipeline = pipeline(
-            "text-generation",
-            model=self.model_name,
-            tokenizer=self.tokenizer,
             torch_dtype="auto",
-            device_map="auto",
-            max_new_tokens=2048,
-            temperature=0.7,
-            top_p=0.9,
-            do_sample=True,
-            pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.eos_token_id else self.tokenizer.pad_token_id
         )
-        # Wrap with LangChain HuggingFacePipeline
-        self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline)
-        # Create ChatHuggingFace for chat interface
-        self.chat_model = ChatHuggingFace(llm=self.llm)
-        print("✅ Qwen3 AI Brain with LangChain HuggingFace initialized")
     def _generate_with_qwen3(self, prompt: str, max_tokens: int = 2048) -> str:
-        """Generate text with Qwen3 via LangChain - thinking disabled"""
         try:
-            # Prepare messages for chat template with thinking DISABLED
             messages = [{"role": "user", "content": prompt}]
-            # Apply chat template with enable_thinking=False
             text = self.tokenizer.apply_chat_template(
                 messages,
                 tokenize=False,
                 add_generation_prompt=True,
-                enable_thinking=False  # CRITICAL: Disable thinking mode
             )
-            # Use LangChain HuggingFace pipeline for generation
-            response = self.llm.invoke(text)
-            # Clean up response - remove input prompt
-            if text in response:
-                response = response.replace(text, "").strip()
             return response
         except Exception as e:
             print(f"⚠️ Qwen3 generation error: {str(e)}")
-            # Fallback to direct pipeline call
-            try:
-                result = self.hf_pipeline(prompt, max_new_tokens=max_tokens)
-                return result[0]['generated_text'].replace(prompt, "").strip()
-            except Exception as e2:
-                return f"AI generation failed: {str(e2)}"
     def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
         """Analyze question type using Qwen3 with strict JSON output"""
@@ -381,13 +352,4 @@ if __name__ == "__main__":
         print(f"\n{'-'*60}")
-    print("\n✅ All tests completed!")
-# Initialize Qwen3 with thinking mode disabled
-primary_brain = HuggingFaceEndpoint(
-    repo_id=primary_model,
-    temperature=0.7,
-    max_new_tokens=300,
-    huggingfacehub_api_token=os.getenv("HF_API_KEY"),
-    model_kwargs={"enable_thinking": False, "thinking_prompt": "/no_thinking"}
-)

 from pydantic import BaseModel, Field
 # LangChain HuggingFace Integration
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from utils import (
     process_question_with_tools,
     def __init__(self):
         self.model_name = "Qwen/Qwen3-8B"
+        print("🧠 Initializing Qwen3-8B với transformers gốc...")
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
             torch_dtype="auto",
+            device_map="auto"
         )
+        print("✅ Qwen3 AI Brain với transformers đã sẵn sàng")
     def _generate_with_qwen3(self, prompt: str, max_tokens: int = 2048) -> str:
+        """Sinh text với Qwen3 bằng transformers gốc, thinking mode tắt"""
         try:
             messages = [{"role": "user", "content": prompt}]
             text = self.tokenizer.apply_chat_template(
                 messages,
                 tokenize=False,
                 add_generation_prompt=True,
+                enable_thinking=False
             )
+            model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
+            generated_ids = self.model.generate(
+                **model_inputs,
+                max_new_tokens=max_tokens
+            )
+            output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+            response = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
             return response
         except Exception as e:
             print(f"⚠️ Qwen3 generation error: {str(e)}")
+            return f"AI generation failed: {str(e)}"
     def analyze_question(self, question: str, task_id: str = "") -> Dict[str, Any]:
         """Analyze question type using Qwen3 with strict JSON output"""
         print(f"\n{'-'*60}")
+    print("\n✅ All tests completed!")

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@ langgraph>=0.2.0
 # HuggingFace Core
 transformers>=4.51.0
 # Tool Dependencies
 groq>=0.11.0

 # HuggingFace Core
 transformers>=4.51.0
+accelerate>=0.28.0
 # Tool Dependencies
 groq>=0.11.0