JunHowie commited on Aug 22

Commit

6c8c5f2

verified ·

1 Parent(s): 5abec93

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

.gitattributes +1 -0
.ipynb_checkpoints/README-checkpoint.md +239 -0
.mdl +0 -0
.msc +0 -0
.mv +1 -0
LICENSE +57 -0
README.md +239 -0
added_tokens.json +32 -0
chat_template.jinja +88 -0
config.json +36 -0
configuration.json +1 -0
generation_config.json +12 -0
merges.txt +0 -0
model-00001-of-00005.safetensors +3 -0
model-00002-of-00005.safetensors +3 -0
model-00003-of-00005.safetensors +3 -0
model-00004-of-00005.safetensors +3 -0
model-00005-of-00005.safetensors +3 -0
model.safetensors.index.json +0 -0
tokenizer.json +3 -0
tokenizer_config.json +272 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,239 @@

+---
+library_name: transformers
+license_link: https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507/blob/main/LICENSE
+pipeline_tag: text-generation
+tags:
+- AWQ
+- 量化修复
+- vLLM
+base_model:
+  - Kwaipilot/KAT-V1-40B
+base_model_relation: quantized
+---
+# KAT-V1-40B-AWQ
+Base model: [Kwaipilot/KAT-V1-40B](https://huggingface.co/Kwaipilot/KAT-V1-40B)
+### 【vLLM Single Node with 4 GPUs Startup Command】
+```
+CONTEXT_LENGTH=32768
+vllm serve \
+    QuantTrio/KAT-V1-40B-AWQ \
+    --served-model-name KAT-V1-40B-AWQ \
+    --swap-space 16 \
+    --max-num-seqs 512 \
+    --max-model-len $CONTEXT_LENGTH \
+    --max-seq-len-to-capture $CONTEXT_LENGTH \
+    --gpu-memory-utilization 0.9 \
+    --tensor-parallel-size 4 \
+    --trust-remote-code \
+    --disable-log-requests \
+    --host 0.0.0.0 \
+    --port 8000
+```
+### 【Dependencies】
+```
+vllm==0.10.0
+```
+### 【Model Update Date】
+```
+2025-07-31
+1. fast commit
+```
+### 【Model Files】
+| File Size   | Last Updated       |
+|--------|--------------|
+| `22GB` | `2025-07-31` |
+### 【Model Download】
+```python
+from huggingface_hub  import snapshot_download
+snapshot_download('QuantTrio/KAT-V1-40B-AWQ', cache_dir="your_local_path")
+```
+### 【Overview】
+<div align="center">
+  <img src="https://raw.githubusercontent.com/Anditty/OASIS/refs/heads/main/Group.svg" width="60%" alt="Kwaipilot" />
+</div>
+<hr>
+<div align="center" style="line-height: 1;">
+  <a href="https://huggingface.co/Kwaipilot/KAT-V1-40B" target="_blank">
+    <img alt="Hugging Face" src="https://img.shields.io/badge/HuggingFace-fcd022?style=for-the-badge&logo=huggingface&logoColor=000&labelColor"/>
+  </a>
+  <a href="https://arxiv.org/pdf/2507.08297" target="_blank">
+    <img alt="arXiv" src="https://img.shields.io/badge/arXiv-2507.08297-b31b1b.svg?style=for-the-badge"/>
+  </a>
+</div>
+# News
+- Kwaipilot-AutoThink ranks first among all open-source models on [LiveCodeBench Pro](https://livecodebenchpro.com/), a challenging benchmark explicitly designed to prevent data leakage, and even surpasses strong proprietary systems such as Seed and o3-mini.
+***
+# Introduction
+**KAT (Kwaipilot-AutoThink)** is an open-source large-language model that mitigates *over-thinking* by learning **when** to produce explicit chain-of-thought and **when** to answer directly.
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/61ee40a269351366e29972ad/zdnsvBmv6hWIC2Qxxy1fD.png)
+Its development follows a concise two-stage training pipeline:
+<table>
+  <thead>
+    <tr>
+      <th style="text-align:left; width:18%;">Stage</th>
+      <th style="text-align:left;">Core Idea</th>
+      <th style="text-align:left;">Key Techniques</th>
+      <th style="text-align:left;">Outcome</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><strong>1. Pre-training</strong></td>
+      <td>Inject knowledge while separating “reasoning” from “direct answering”.</td>
+      <td>
+        <em>Dual-regime data</em><br>
+        • <strong>Think-off</strong> queries labeled via a custom tagging system.<br>
+        • <strong>Think-on</strong> queries generated by a multi-agent solver.<br><br>
+        <em>Knowledge Distillation&nbsp;+&nbsp;Multi-Token Prediction</em> for fine-grained utility.
+      </td>
+      <td>Base model attains strong factual and reasoning skills without full-scale pre-training costs.</td>
+    </tr>
+    <tr>
+      <td><strong>2. Post-training</strong></td>
+      <td>Make reasoning optional and efficient.</td>
+      <td>
+        <em>Cold-start AutoThink</em> — majority vote sets the initial thinking mode.<br>
+        <em>Step-SRPO</em> — intermediate supervision rewards correct <strong>mode selection</strong> and <strong>answer accuracy</strong> under that mode.
+      </td>
+      <td>Model triggers CoT only when beneficial, reducing token use and speeding inference.</td>
+    </tr>
+  </tbody>
+</table>
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/61ee40a269351366e29972ad/cwFAEh7Rl3f4FU46z8gBZ.png)
+***
+# Data Format
+KAT produces responses in a **structured template** that makes the reasoning path explicit and machine-parsable.
+Two modes are supported:
+![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/61ee40a269351366e29972ad/H8iAvQMMT02nyvlYnI5q1.jpeg)
+## Special Tokens
+| Token | Description |
+|-------|-------------|
+| `<judge>` | Analyzes the input to decide whether explicit reasoning is needed. |
+| `<think_on>` / `<think_off>` | Indicates whether reasoning is **activated** (“on”) or **skipped** (“off”). |
+| `<think>` | Marks the start of the chain-of-thought segment when `think_on` is chosen. |
+| `<answer>` | Marks the start of the final user-facing answer. |
+***
+# 🔧 Quick Start
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_name = "Kwaipilot/KAT-V1-40B"
+# load the tokenizer and the model
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+# prepare the model input
+prompt = "Give me a short introduction to large language model."
+messages = [
+    {"role": "user", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+# conduct text completion
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=65536,
+    temperature=0.6,
+    top_p=0.95,
+)
+output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
+print("prompt:\n", prompt)
+print("content:\n", content)
+"""
+prompt:
+Give me a short introduction to large language model.
+content:
+<judge>
+The user's request is to provide a concise factual introduction to large language models, which involves retrieving and summarizing basic information. This task is straightforward as it only requires recalling and presenting well-known details without deeper analysis. No complex reasoning is needed here—just a simple explanation will suffice.
+</judge>
+<think_off>
+<answer>
+A **Large Language Model (LLM)** is an advanced AI system trained on vast amounts of text data to understand, generate, and process human-like language. Here’s a concise introduction:
+### Key Points:
+1. **Training**: Trained on diverse text sources (books, websites, etc.) using deep learning.
+2. **Capabilities**:
+   - Answer questions, generate text, summarize content, translate languages.
+   - Understand context, sentiment, and nuances in language.
+3. **Architecture**: Often based on **transformer models** (e.g., BERT, GPT, LLaMA).
+4. **Scale**: Billions of parameters, requiring massive computational resources.
+5. **Applications**: Chatbots, content creation, coding assistance, research, and more.
+### Examples:
+- **OpenAI’s GPT-4**: Powers ChatGPT.
+- **Google’s Gemini**: Used in Bard.
+- **Meta’s LLaMA**: Open-source alternative.
+### Challenges:
+- **Bias**: Can reflect biases in training data.
+- **Accuracy**: May hallucinate "facts" not grounded in reality.
+- **Ethics**: Raises concerns about misinformation and job displacement.
+LLMs represent a leap forward in natural language processing, enabling machines to interact with humans in increasingly sophisticated ways. 🌐🤖
+</answer>
+"""
+```
+***
+# Future Releases
+Looking ahead, we will publish a companion paper that fully documents the **AutoThink training framework**, covering:
+* Cold-start initialization procedures
+* Reinforcement-learning (Step-SRPO) strategies
+* Data curation and reward design details
+At the same time, we will open-source:
+* **Training resources** – the curated dual-regime datasets and RL codebase
+* **Model suite** – checkpoints at 1.5B, 7B, and 13B parameters, all trained with AutoThink gating

.mdl ADDED Viewed

Binary file (44 Bytes). View file

.msc ADDED Viewed

Binary file (1.29 kB). View file

.mv ADDED Viewed

	@@ -0,0 +1 @@


1	+ Revision:master,CreatedAt:1753930441

LICENSE ADDED Viewed

	@@ -0,0 +1,57 @@

+MODEL LICENSE AGREEMENT
+By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Model Works, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.
+1.   	DEFINITIONS.
+a.    	“Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of the Model Works or any portion or element thereof set forth herein.
+b.   	“Materials” shall mean, collectively, Us proprietary the Model and Documentation (and any portion thereof) as made available by Us under this Agreement.
+c.  	“Model” shall mean the large language models, image/video/audio/3D generation models, and multimodal large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us .
+d.   	“Output” shall mean the information and/or content output of Model or a Model Derivative that results from operating or otherwise using Model or a Model Derivative.
+e.    	“Model Derivatives” shall mean all: (i) modifications to the Model or any Model Derivative; (ii) works based on the Model or any Model Derivative; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of the Model or any Model Derivative, to that model in order to cause that model to perform similarly to the Model or a Model Derivative, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs or a Model Derivative for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
+f.    	“Model Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
+g.   	“Licensor” , “We” or “Us” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License, including the persons or entities that may have rights in the Model and/or distributing the Model.
+h.   	“Licensee”,  “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Model Works for any purpose and in any field of use.
+i.    	“Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
+2.   	LICENSE CONTENT.
+a.    	We grant You a non-exclusive, worldwide, non-transferable and royalty-free limited license under the intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
+b.   	You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Model Works, provided that You meet all of the following conditions:
+ (i)	You must provide all such Third Party recipients of the Model Works or products or services using them the source of the Model and a copy of this Agreement;
+(ii)    You must cause any modified documents to carry prominent notices stating that You changed the documents;
+(iii)	You may add Your own copyright statement to Your modifications and, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement.
+3.   	LICENSE RESTRICITIONS.
+a.    	Your use of the Model Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and the restrictions set forth in Attachment A . You must include the use restrictions referenced in these Sections 3(a) and 3(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Model Works and You must provide notice to subsequent users to whom You distribute that Model Works are subject to the use restrictions in these Sections 3(a) and 3(b).
+b.   	You must not use the Model Works or any Output or results of the Model Works to improve any other large  model (other than Model or Model Derivatives thereof).
+4.   	INTELLECTUAL PROPERTY.
+a.    	We retain ownership of all intellectual property rights in and to the Model and derivatives. Conditioned upon compliance with the terms and conditions of this Agreement, with respect to any derivative works and modifications of the Materials that are made by You, You are and will be the owner of such derivative works and modifications.
+b.  	No trademark license is granted to use the trade names, trademarks, service marks, or product names of Us, except as required to fulfill notice requirements under this Agreement or as required for reasonable and customary use in describing and redistributing the Materials.
+c.    	If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed.
+5.   	DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
+a. THE MODEL WORKS AND ANY OUTPUT AND RESULTS THERE FROM ARE PROVIDED  "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. WE MAKE NO WARRANTY AND ASSUME NO RESPONSIBILITY FOR THE SAFETY OR STABILITY OF THE MATERIALS AND ANY OUTPUT THEREFROM.
+b.   IN NO EVENT SHALL WE BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE MATERIALS OR ANY OUTPUT OF IT, NO MATTER HOW IT’S CAUSED.
+c. You will defend, indemnify and hold harmless Us from and against any claim by any third party arising out of or related to Your use or distribution of the Materials.
+6.   	SURVIVAL AND TERMINATION.
+a.  The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
+b.  We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Model Works. Sections 4(a), 4(c), 5 and 7 shall survive the termination of this Agreement.
+7.   	GOVERNING LAW AND JURISDICTION.
+a.   This Agreement and any dispute arising out of or relating to it will be governed by the laws of China (for the purpose of this agreement only, excluding Hong Kong, Macau, and Taiwan), without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
+b.   Any disputes arising from or related to this Agreement shall be under the jurisdiction of the People's Court where the Licensor is located.
+Attachment A
+Use Restrictions
+You agree not to use the Model or Derivatives of the Model:
+-    In any way that violates any applicable national or international law or regulation or infringes upon the lawful rights and interests of any third party;
+-    For military use in any way;
+-    For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
+-    To generate or disseminate verifiably false information and/or content with the purpose of harming others;
+-    To generate or disseminate inappropriate content subject to applicable regulatory requirements;
+-    To generate or disseminate personal identifiable information without due authorization or for unreasonable use;
+-    To defame, disparage or otherwise harass others;
+-    For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation;
+-    For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics;
+-    To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
+-    For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories.

README.md ADDED Viewed

	@@ -0,0 +1,239 @@

+---
+library_name: transformers
+license_link: https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507/blob/main/LICENSE
+pipeline_tag: text-generation
+tags:
+- AWQ
+- 量化修复
+- vLLM
+base_model:
+  - Kwaipilot/KAT-V1-40B
+base_model_relation: quantized
+---
+# KAT-V1-40B-AWQ
+Base model: [Kwaipilot/KAT-V1-40B](https://huggingface.co/Kwaipilot/KAT-V1-40B)
+### 【vLLM Single Node with 4 GPUs Startup Command】
+```
+CONTEXT_LENGTH=32768
+vllm serve \
+    QuantTrio/KAT-V1-40B-AWQ \
+    --served-model-name KAT-V1-40B-AWQ \
+    --swap-space 16 \
+    --max-num-seqs 512 \
+    --max-model-len $CONTEXT_LENGTH \
+    --max-seq-len-to-capture $CONTEXT_LENGTH \
+    --gpu-memory-utilization 0.9 \
+    --tensor-parallel-size 4 \
+    --trust-remote-code \
+    --disable-log-requests \
+    --host 0.0.0.0 \
+    --port 8000
+```
+### 【Dependencies】
+```
+vllm==0.10.0
+```
+### 【Model Update Date】
+```
+2025-07-31
+1. fast commit
+```
+### 【Model Files】
+| File Size   | Last Updated       |
+|--------|--------------|
+| `22GB` | `2025-07-31` |
+### 【Model Download】
+```python
+from huggingface_hub  import snapshot_download
+snapshot_download('QuantTrio/KAT-V1-40B-AWQ', cache_dir="your_local_path")
+```
+### 【Overview】
+<div align="center">
+  <img src="https://raw.githubusercontent.com/Anditty/OASIS/refs/heads/main/Group.svg" width="60%" alt="Kwaipilot" />
+</div>
+<hr>
+<div align="center" style="line-height: 1;">
+  <a href="https://huggingface.co/Kwaipilot/KAT-V1-40B" target="_blank">
+    <img alt="Hugging Face" src="https://img.shields.io/badge/HuggingFace-fcd022?style=for-the-badge&logo=huggingface&logoColor=000&labelColor"/>
+  </a>
+  <a href="https://arxiv.org/pdf/2507.08297" target="_blank">
+    <img alt="arXiv" src="https://img.shields.io/badge/arXiv-2507.08297-b31b1b.svg?style=for-the-badge"/>
+  </a>
+</div>
+# News
+- Kwaipilot-AutoThink ranks first among all open-source models on [LiveCodeBench Pro](https://livecodebenchpro.com/), a challenging benchmark explicitly designed to prevent data leakage, and even surpasses strong proprietary systems such as Seed and o3-mini.
+***
+# Introduction
+**KAT (Kwaipilot-AutoThink)** is an open-source large-language model that mitigates *over-thinking* by learning **when** to produce explicit chain-of-thought and **when** to answer directly.
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/61ee40a269351366e29972ad/zdnsvBmv6hWIC2Qxxy1fD.png)
+Its development follows a concise two-stage training pipeline:
+<table>
+  <thead>
+    <tr>
+      <th style="text-align:left; width:18%;">Stage</th>
+      <th style="text-align:left;">Core Idea</th>
+      <th style="text-align:left;">Key Techniques</th>
+      <th style="text-align:left;">Outcome</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><strong>1. Pre-training</strong></td>
+      <td>Inject knowledge while separating “reasoning” from “direct answering”.</td>
+      <td>
+        <em>Dual-regime data</em><br>
+        • <strong>Think-off</strong> queries labeled via a custom tagging system.<br>
+        • <strong>Think-on</strong> queries generated by a multi-agent solver.<br><br>
+        <em>Knowledge Distillation&nbsp;+&nbsp;Multi-Token Prediction</em> for fine-grained utility.
+      </td>
+      <td>Base model attains strong factual and reasoning skills without full-scale pre-training costs.</td>
+    </tr>
+    <tr>
+      <td><strong>2. Post-training</strong></td>
+      <td>Make reasoning optional and efficient.</td>
+      <td>
+        <em>Cold-start AutoThink</em> — majority vote sets the initial thinking mode.<br>
+        <em>Step-SRPO</em> — intermediate supervision rewards correct <strong>mode selection</strong> and <strong>answer accuracy</strong> under that mode.
+      </td>
+      <td>Model triggers CoT only when beneficial, reducing token use and speeding inference.</td>
+    </tr>
+  </tbody>
+</table>
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/61ee40a269351366e29972ad/cwFAEh7Rl3f4FU46z8gBZ.png)
+***
+# Data Format
+KAT produces responses in a **structured template** that makes the reasoning path explicit and machine-parsable.
+Two modes are supported:
+![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/61ee40a269351366e29972ad/H8iAvQMMT02nyvlYnI5q1.jpeg)
+## Special Tokens
+| Token | Description |
+|-------|-------------|
+| `<judge>` | Analyzes the input to decide whether explicit reasoning is needed. |
+| `<think_on>` / `<think_off>` | Indicates whether reasoning is **activated** (“on”) or **skipped** (“off”). |
+| `<think>` | Marks the start of the chain-of-thought segment when `think_on` is chosen. |
+| `<answer>` | Marks the start of the final user-facing answer. |
+***
+# 🔧 Quick Start
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_name = "Kwaipilot/KAT-V1-40B"
+# load the tokenizer and the model
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+# prepare the model input
+prompt = "Give me a short introduction to large language model."
+messages = [
+    {"role": "user", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+# conduct text completion
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=65536,
+    temperature=0.6,
+    top_p=0.95,
+)
+output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
+print("prompt:\n", prompt)
+print("content:\n", content)
+"""
+prompt:
+Give me a short introduction to large language model.
+content:
+<judge>
+The user's request is to provide a concise factual introduction to large language models, which involves retrieving and summarizing basic information. This task is straightforward as it only requires recalling and presenting well-known details without deeper analysis. No complex reasoning is needed here—just a simple explanation will suffice.
+</judge>
+<think_off>
+<answer>
+A **Large Language Model (LLM)** is an advanced AI system trained on vast amounts of text data to understand, generate, and process human-like language. Here’s a concise introduction:
+### Key Points:
+1. **Training**: Trained on diverse text sources (books, websites, etc.) using deep learning.
+2. **Capabilities**:
+   - Answer questions, generate text, summarize content, translate languages.
+   - Understand context, sentiment, and nuances in language.
+3. **Architecture**: Often based on **transformer models** (e.g., BERT, GPT, LLaMA).
+4. **Scale**: Billions of parameters, requiring massive computational resources.
+5. **Applications**: Chatbots, content creation, coding assistance, research, and more.
+### Examples:
+- **OpenAI’s GPT-4**: Powers ChatGPT.
+- **Google’s Gemini**: Used in Bard.
+- **Meta’s LLaMA**: Open-source alternative.
+### Challenges:
+- **Bias**: Can reflect biases in training data.
+- **Accuracy**: May hallucinate "facts" not grounded in reality.
+- **Ethics**: Raises concerns about misinformation and job displacement.
+LLMs represent a leap forward in natural language processing, enabling machines to interact with humans in increasingly sophisticated ways. 🌐🤖
+</answer>
+"""
+```
+***
+# Future Releases
+Looking ahead, we will publish a companion paper that fully documents the **AutoThink training framework**, covering:
+* Cold-start initialization procedures
+* Reinforcement-learning (Step-SRPO) strategies
+* Data curation and reward design details
+At the same time, we will open-source:
+* **Training resources** – the curated dual-regime datasets and RL codebase
+* **Model suite** – checkpoints at 1.5B, 7B, and 13B parameters, all trained with AutoThink gating

added_tokens.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "</answer>": 151670,
+  "</judge>": 151668,
+  "</think>": 151666,
+  "</tool_call>": 151658,
+  "<answer>": 151669,
+  "<judge>": 151667,
+  "<think>": 151665,
+  "<think_off>": 151672,
+  "<think_on>": 151671,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,88 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- '' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" and not message.tool_calls %}
+        {%- set content = message.content %}
+        {%- if not loop.last %}
+            {%- set answer_blocks = message.content.split('<answer>\n') %}
+            {%- if answer_blocks|length > 1 %}
+                {%- set last_answer_block = answer_blocks[-1] %}
+                {%- if '\n</answer>' in last_answer_block %}
+                    {%- set content = last_answer_block.split('\n</answer>')[0] %}
+                {%- else %}
+                    {%- set content = message.content.split('<think_off>')[-1].lstrip('\n') %}
+                    {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+                {%- endif %}
+            {%- else %}
+                {%- set content = message.content.split('<think_off>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set content = message.content %}
+        {%- if not loop.last %}
+            {%- set answer_blocks = message.content.split('<answer>\n') %}
+            {%- if answer_blocks|length > 1 %}
+                {%- set last_answer_block = answer_blocks[-1] %}
+                {%- if '\n</answer>' in last_answer_block %}
+                    {%- set content = last_answer_block.split('\n</answer>')[0] %}
+                {%- else %}
+                    {%- set content = message.content.split('<think_off>')[-1].lstrip('\n') %}
+                    {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+                {%- endif %}
+            {%- else %}
+                {%- set content = message.content.split('<think_off>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{\"name\": \"' }}
+            {{- tool_call.name }}
+            {{- '\", \"arguments\": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "name_or_path": "tclf90/KAT-V1-40B-AWQ",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 27648,
+  "max_position_embeddings": 131072,
+  "max_window_layers": 64,
+  "model_type": "qwen2",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 80,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.46.1",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 152064,
+  "quantization_config": {
+    "quant_method": "awq",
+    "bits": 4,
+    "group_size": 128,
+    "version": "gemm",
+    "zero_point": true
+  }
+}

configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}

generation_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_p": 0.95,
+  "transformers_version": "4.52.4"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6500e55b56e36acc90315085adc30730239cdb5e27a2de72609277a95fe45db
+size 4990397496

model-00002-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7a845aa11eac7af3e7a4fb16c8328391cbfe85077204226e3d8bd826ade4ebf
+size 4961829824

model-00003-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fba14531e970182a81c8ebf112c4398a7ca01113efa67a29ea4cb744fa5555bb
+size 4972882976

model-00004-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99c6782a7362d334ed2bbd85bb84c9bc1ad8a1d06444c903b63bde48817fd5b8
+size 4998781176

model-00005-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ebc129a0537e6f3e0786717708b91d5aed8de214395ef8de8b73fc3f86e40f2
+size 3458780720

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:528d9f2690ed3679eee70ed9b085fb78899b7f6dfc2a220220cfe89fdd3ffef5
+size 11423388

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,272 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<judge>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</judge>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151669": {
+      "content": "<answer>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151670": {
+      "content": "</answer>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151671": {
+      "content": "<think_on>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151672": {
+      "content": "<think_off>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff