tuan3335 commited on
Commit
8cf4282
·
1 Parent(s): c282f35

fix: explicitly disable thinking mode in Qwen InferenceClient calls for both answer and wiki query optimization

Browse files
Files changed (2) hide show
  1. agent.py +2 -1
  2. utils/wiki_tool.py +2 -1
agent.py CHANGED
@@ -71,7 +71,8 @@ class AIBrain:
71
  completion = self.client.chat.completions.create(
72
  model=self.model_name,
73
  messages=messages,
74
- max_tokens=max_tokens
 
75
  )
76
  return completion.choices[0].message.content
77
  except Exception as e:
 
71
  completion = self.client.chat.completions.create(
72
  model=self.model_name,
73
  messages=messages,
74
+ max_tokens=max_tokens,
75
+ enable_thinking=False
76
  )
77
  return completion.choices[0].message.content
78
  except Exception as e:
utils/wiki_tool.py CHANGED
@@ -133,7 +133,8 @@ Question: {question}
133
  completion = ai_client.chat.completions.create(
134
  model="Qwen/Qwen3-8B",
135
  messages=[{"role": "user", "content": prompt}],
136
- max_tokens=32
 
137
  )
138
  query = completion.choices[0].message.content.strip()
139
  # Nếu AI trả về rỗng, fallback
 
133
  completion = ai_client.chat.completions.create(
134
  model="Qwen/Qwen3-8B",
135
  messages=[{"role": "user", "content": prompt}],
136
+ max_tokens=32,
137
+ enable_thinking=False
138
  )
139
  query = completion.choices[0].message.content.strip()
140
  # Nếu AI trả về rỗng, fallback