Delete BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit

Browse files

Files changed (17) hide show

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/Qwen3 Coder a3b 480b DISTILL LM STUDIO TOOL USE.preset.json +0 -61
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/README.md +0 -82
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/added_tokens.json +0 -28
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/chat_template.jinja +0 -33
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/config.json +0 -433
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/generation_config.json +0 -12
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/merges.txt +0 -0
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00001-of-00004.safetensors +0 -3
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00002-of-00004.safetensors +0 -3
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00003-of-00004.safetensors +0 -3
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00004-of-00004.safetensors +0 -3
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model.safetensors.index.json +0 -0
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/qwen3coder_tool_parser.py +0 -689
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/special_tokens_map.json +0 -31
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/tokenizer.json +0 -3
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/tokenizer_config.json +0 -239
BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/vocab.json +0 -0

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/Qwen3 Coder a3b 480b DISTILL LM STUDIO TOOL USE.preset.json DELETED Viewed

@@ -1,61 +0,0 @@
-{
-  "identifier": "@local:qwen3-coder-a3b-480b-distill-lm-studio-tool-use",
-  "name": "Qwen3 Coder a3b - 480b DISTILL - LM STUDIO (TOOL USE)",
-  "changed": false,
-  "operation": {
-    "fields": [
-      {
-        "key": "llm.prediction.systemPrompt",
-        "value": "TOOL USE RULES\n- If you decide to call a tool, output the tool call ONLY. Do not output any other text in the same message.\n- Do NOT print control tokens like <start_of_turn>user or <start_of_turn>model in your output.\n- After a successful tool call, WAIT for the tool result. Do not immediately call the tool again unless the previous call failed or returned nextThoughtNeeded=true and you have NEW parameters.\n- Never call the same tool twice in a row with identical parameters.\n- After summarizing a tool result once, STOP."
-      },
-      {
-        "key": "llm.prediction.promptTemplate",
-        "value": {
-          "type": "jinja",
-          "jinjaPromptTemplate": {
-            "template": "{{ bos_token }}\n{%- if messages and messages[0]['role'] == 'system' -%}\n    {%- set first_user_prefix = messages[0]['content'] ~ '\\n\\n' -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = '' -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}\n    {{ '<start_of_turn>' ~ role ~ '\\n' ~ (first_user_prefix if loop.first else '') }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- elif item['type'] == 'tool_call' -%}\n                ```tool_code\n                {{ item['code'] | trim }}\n                ```\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception('Invalid content type') }}\n    {%- endif -%}\n    {{ '<end_of_turn>\\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt and (loop_messages | length == 0 or loop_messages[-1]['role'] == 'user') -%}\n    {{ '<start_of_turn>model\\n' }}\n{%- endif -%}"
-          },
-          "stopStrings": [
-            "<end_of_turn>",
-            "<start_of_turn>user",
-            "<start_of_turn>model",
-            "<start_of_turn>tool"
-          ],
-          "manualPromptTemplate": {
-            "beforeSystem": "<|im_start|>system\n",
-            "afterSystem": "<|im_end|>\n",
-            "beforeUser": "<|im_start|>user\n",
-            "afterUser": "<|im_end|>\n",
-            "beforeAssistant": "<|im_start|>assistant\n",
-            "afterAssistant": "<|im_end|>\n"
-          }
-        }
-      },
-      {
-        "key": "llm.prediction.topPSampling",
-        "value": {
-          "checked": true,
-          "value": 0.8
-        }
-      },
-      {
-        "key": "llm.prediction.topKSampling",
-        "value": 20
-      },
-      {
-        "key": "llm.prediction.temperature",
-        "value": 0.7
-      },
-      {
-        "key": "llm.prediction.repeatPenalty",
-        "value": {
-          "checked": true,
-          "value": 1.05
-        }
-      }
-    ]
-  },
-  "load": {
-    "fields": []
-  }
-}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/README.md DELETED Viewed

@@ -1,82 +0,0 @@
-# BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2 - MLX 4-bit Quantization
-A massive and gentlemanly thank you to the original author **[BasedBase](https://huggingface.co/BasedBase)** for creating this incredible model. This is a 4-bit quantized version of the original [Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-Fp32](https://huggingface.co/BasedBase/Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-Fp32) model, optimized for Apple Silicon with MLX.
-All of my additions and modifications are detailed below. The original, highly-detailed model card from `BasedBase` can be found further down this page.
----
-## My Contributions & Modifications
-### MLX Quantization
-This version of the model has been quantized to **4-bit precision** using the MLX framework, making it incredibly efficient to run on Apple Silicon devices.
-- **Framework:** MLX
-- **Quantization:** 4-bit
-- **Performance:** Blazing fast! From my limited testing, you can expect speeds of **70-90 tokens per second** on an M4 Pro Mac.
-### LM Studio Configuration & A Little Hackery...
-To get this model purring perfectly with tool-calling in LM Studio, a little creative problem-solving was required.
-> I'm not a big Qwen guy, so I re-used a prompt template I knew worked with my last Gemma 3 MLX quant and I adapted it. Hey, if it works, it works! 😉
-This workaround involved modifying the `.jinja` prompt template to ensure native tool-calling compatibility. Because of this, a few extra steps are needed for optimal performance:
-- **Additional Stop Strings:** Custom stop strings are necessary to prevent the model from generating unwanted text.
-- **Reinforcing System Prompt:** A specific system prompt helps guide the model's behavior.
-To make your life easier, I've included an **LM Studio preset** (`.preset.json` file) in this repository. This preset includes the correct stop strings and a well-tuned sampling/generation configuration. Just load it up, and you're good to go!
----
----
-## Original Model Card from BasedBase
-*(The following is the original information provided by the model's creator.)*
-### Model Description
-This model is a distilled version of **`Qwen/Qwen3-Coder-30B-A3B-Instruct`** designed to achieve coding and reasoning capabilities approaching those of a much larger teacher model.
-It is the result of applying a LoRA made via a SVD distillation pipeline, and then merging those weights into the base model. The core of this process was to transfer the nuanced knowledge from a **62-layer, 160-expert teacher model** into the more efficient **48-layer, 128-expert architecture** of the `Qwen3-Coder-30b-a3b` student model.
-The primary goal was to significantly enhance performance on **complex coding tasks**, where the specialized knowledge of Mixture-of-Experts (MoE) layers is critical.
-### The Distillation Methodology
-This model was not trained in a conventional sense. Instead, it was created using a layer-by-layer distillation process implemented in the `SVD-based` script. This pipeline was designed to ensure maximum precision and knowledge transfer.
-#### Core Components
-*   **Teacher Model:** 'Qwen/Qwen3-Coder-480B-A35B-Instruct'.
-*   **Student Model:** `Qwen/Qwen3-Coder-30B-A3B-Instruct`.
-*   **LoRA Rank:** A high rank of **`r=2048`** was used for all modules to capture a very high degree of information from the teacher.
-#### The Distillation Pipeline
-For each corresponding layer in the student and teacher, the following pipeline was executed:
-1.  **Spherical Linear Interpolation (SLERP):** For layers that fall between two teacher layers, SLERP was used to create a smooth, geometrically sound interpolation of the teacher's weights. This avoids the pitfalls of simple linear averaging.
-2.  **Singular Value Decomposition (SVD) Projection:** The core of the distillation. The (potentially blended) teacher layer's weight matrix was decomposed into its fundamental components (`U`, `S`, `V`). The **top 2048** most important components were selected and then reconstructed to fit the student layer's smaller dimensions. This high-rank projection ensures maximum fidelity.
-3.  **Procrustes Analysis:** After projection, the newly created "synthetic" tensor was optimally rotated in high-dimensional space to perfectly align with the student's original pre-trained tensor. This minimizes the "distance" between them before calculating the difference.
-4.  **DARE (Drop and Rescale):** The difference tensor (`Distilled - Aligned Student`) was then purified using DARE. This process drops a significant percentage of the lowest-magnitude values (noise) and rescales the remaining important differences, creating a clean signal for the final LoRA.
-#### Mixture-of-Experts (MoE) Distillation
-The standout feature of this process is the full distillation of the MoE layers, which are critical for complex reasoning.
-*   **Expert Fingerprinting & Clustering:** To map the 160 teacher experts to the 128 student experts, each teacher expert was "fingerprinted." **K-Means clustering** was then used to group these 160 fingerprints into 128 distinct clusters.
-*   **Expert-to-Expert Distillation:** Each of the student's 128 experts was then distilled from a weighted blend of the teacher experts assigned to its cluster. This ensures the specialized knowledge (e.g., recursion, API usage, security patterns) is transferred.
-*   **Router Gate Distillation:** The main MoE router gate, which decides which expert to use for a given token, was also distilled to preserve the teacher's intelligent routing logic.
-### Intended Use
-This model is intended for **code generation**. It should be better at tasks that require understanding complex logic, algorithms, and software architecture.
-*   **Primary Use:** Code generation, refactoring, explanation (although since its an instruct it may not be perfect for explaining things), and debugging.
-*   **Out of Scope:** This is not a general-purpose conversational chatbot. While it can follow instructions, its knowledge is specialized for programming tasks.

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/added_tokens.json DELETED Viewed

@@ -1,28 +0,0 @@
-{
-  "</think>": 151668,
-  "</tool_call>": 151658,
-  "</tool_response>": 151666,
-  "<think>": 151667,
-  "<tool_call>": 151657,
-  "<tool_response>": 151665,
-  "<|box_end|>": 151649,
-  "<|box_start|>": 151648,
-  "<|endoftext|>": 151643,
-  "<|file_sep|>": 151664,
-  "<|fim_middle|>": 151660,
-  "<|fim_pad|>": 151662,
-  "<|fim_prefix|>": 151659,
-  "<|fim_suffix|>": 151661,
-  "<|im_end|>": 151645,
-  "<|im_start|>": 151644,
-  "<|image_pad|>": 151655,
-  "<|object_ref_end|>": 151647,
-  "<|object_ref_start|>": 151646,
-  "<|quad_end|>": 151651,
-  "<|quad_start|>": 151650,
-  "<|repo_name|>": 151663,
-  "<|video_pad|>": 151656,
-  "<|vision_end|>": 151653,
-  "<|vision_pad|>": 151654,
-  "<|vision_start|>": 151652
-}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/chat_template.jinja DELETED Viewed

@@ -1,33 +0,0 @@
-{{ bos_token }}
-{%- if messages and messages[0]['role'] == 'system' -%}
-    {%- set first_user_prefix = messages[0]['content'] ~ '\n\n' -%}
-    {%- set loop_messages = messages[1:] -%}
-{%- else -%}
-    {%- set first_user_prefix = '' -%}
-    {%- set loop_messages = messages -%}
-{%- endif -%}
-{%- for message in loop_messages -%}
-    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
-    {{ '<start_of_turn>' ~ role ~ '\n' ~ (first_user_prefix if loop.first else '') }}
-    {%- if message['content'] is string -%}
-        {{ message['content'] | trim }}
-    {%- elif message['content'] is iterable -%}
-        {%- for item in message['content'] -%}
-            {%- if item['type'] == 'image' -%}
-                {{ '<start_of_image>' }}
-            {%- elif item['type'] == 'text' -%}
-                {{ item['text'] | trim }}
-            {%- elif item['type'] == 'tool_call' -%}
-                ```tool_code
-                {{ item['code'] | trim }}
-                ```
-            {%- endif -%}
-        {%- endfor -%}
-    {%- else -%}
-        {{ raise_exception('Invalid content type') }}
-    {%- endif -%}
-    {{ '<end_of_turn>\n' }}
-{%- endfor -%}
-{%- if add_generation_prompt and (loop_messages | length == 0 or loop_messages[-1]['role'] == 'user') -%}
-    {{ '<start_of_turn>model\n' }}
-{%- endif -%}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/config.json DELETED Viewed

@@ -1,433 +0,0 @@
-{
-    "architectures": [
-        "Qwen3MoeForCausalLM"
-    ],
-    "attention_dropout": 0.0,
-    "decoder_sparse_step": 1,
-    "eos_token_id": 151645,
-    "head_dim": 128,
-    "hidden_act": "silu",
-    "hidden_size": 2048,
-    "initializer_range": 0.02,
-    "intermediate_size": 5472,
-    "max_position_embeddings": 262144,
-    "max_window_layers": 28,
-    "mlp_only_layers": [],
-    "model_type": "qwen3_moe",
-    "moe_intermediate_size": 768,
-    "norm_topk_prob": true,
-    "num_attention_heads": 32,
-    "num_experts": 128,
-    "num_experts_per_tok": 8,
-    "num_hidden_layers": 48,
-    "num_key_value_heads": 4,
-    "output_router_logits": false,
-    "qkv_bias": false,
-    "quantization": {
-        "group_size": 32,
-        "bits": 4,
-        "mode": "affine",
-        "model.layers.0.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.1.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.2.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.3.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.4.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.5.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.6.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.7.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.8.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.9.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.10.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.11.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.12.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.13.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.14.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.15.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.16.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.17.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.18.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.19.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.20.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.21.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.22.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.23.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.24.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.25.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.26.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.27.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.28.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.29.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.30.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.31.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.32.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.33.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.34.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.35.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.36.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.37.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.38.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.39.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.40.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.41.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.42.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.43.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.44.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.45.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.46.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.47.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        }
-    },
-    "quantization_config": {
-        "group_size": 32,
-        "bits": 4,
-        "mode": "affine",
-        "model.layers.0.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.1.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.2.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.3.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.4.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.5.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.6.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.7.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.8.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.9.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.10.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.11.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.12.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.13.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.14.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.15.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.16.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.17.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.18.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.19.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.20.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.21.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.22.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.23.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.24.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.25.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.26.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.27.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.28.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.29.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.30.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.31.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.32.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.33.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.34.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.35.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.36.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.37.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.38.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.39.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.40.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.41.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.42.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.43.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.44.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.45.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.46.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        },
-        "model.layers.47.mlp.gate": {
-            "group_size": 64,
-            "bits": 8
-        }
-    },
-    "rms_norm_eps": 1e-06,
-    "rope_scaling": null,
-    "rope_theta": 10000000,
-    "router_aux_loss_coef": 0.0,
-    "shared_expert_intermediate_size": 0,
-    "sliding_window": null,
-    "tie_word_embeddings": false,
-    "torch_dtype": "bfloat16",
-    "transformers_version": "4.52.3",
-    "use_cache": true,
-    "use_qk_norm": true,
-    "use_sliding_window": false,
-    "vocab_size": 151936
-}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/generation_config.json DELETED Viewed

@@ -1,12 +0,0 @@
-{
-  "pad_token_id": 151643,
-  "do_sample": true,
-  "eos_token_id": [
-    151645,
-    151643
-  ],
-  "repetition_penalty": 1.05,
-  "temperature": 0.7,
-  "top_p": 0.8,
-  "top_k": 20
-}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00001-of-00004.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4fb65aa867a695472c5e167d484332cdfcd6053553f36dc476c92665108c31a9
-size 5271030693

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00002-of-00004.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b838beb2bf7d79a16f8dccec9e5a4f7f5097a39369611fe043d55d60469e2545
-size 5316125765

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00003-of-00004.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b4a4383af06ad95c82f3c00400042bfe26a33551016851e2b3278562d2389d52
-size 5328211659

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model-00004-of-00004.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:08b8bedf6e8d4c508afb7f00950cf8bab348c1d432cb807c01b8a01f1f86c8a8
-size 3173162067

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/model.safetensors.index.json DELETED Viewed

The diff for this file is too large to render. See raw diff

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/qwen3coder_tool_parser.py DELETED Viewed

@@ -1,689 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import ast
-import json
-import uuid
-from collections.abc import Sequence
-from typing import Any, List, Optional, Union
-import regex as re
-from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
-                                              ChatCompletionToolsParam,
-                                              DeltaFunctionCall, DeltaMessage,
-                                              DeltaToolCall,
-                                              ExtractedToolCallInformation,
-                                              FunctionCall, ToolCall)
-from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
-    ToolParser, ToolParserManager)
-from vllm.logger import init_logger
-from vllm.transformers_utils.tokenizer import AnyTokenizer
-logger = init_logger(__name__)
-@ToolParserManager.register_module("qwen3_coder")
-class Qwen3CoderToolParser(ToolParser):
-    def __init__(self, tokenizer: AnyTokenizer):
-        super().__init__(tokenizer)
-        self.current_tool_name_sent: bool = False
-        self.prev_tool_call_arr: list[dict] = []
-        self.current_tool_id: int = -1
-        self.streamed_args_for_tool: list[str] = []
-        # Sentinel tokens for streaming mode
-        self.tool_call_start_token: str = "<tool_call>"
-        self.tool_call_end_token: str = "</tool_call>"
-        self.tool_call_prefix: str = "<function="
-        self.function_end_token: str = "</function>"
-        self.parameter_prefix: str = "<parameter="
-        self.parameter_end_token: str = "</parameter>"
-        self.is_tool_call_started: bool = False
-        self.failed_count: int = 0
-        # Enhanced streaming state - reset for each new message
-        self._reset_streaming_state()
-        # Regex patterns
-        self.tool_call_complete_regex = re.compile(
-            r"<tool_call>(.*?)</tool_call>", re.DOTALL)
-        self.tool_call_regex = re.compile(
-            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
-        self.tool_call_function_regex = re.compile(
-            r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
-        self.tool_call_parameter_regex = re.compile(
-            r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)",
-            re.DOTALL)
-        if not self.model_tokenizer:
-            raise ValueError(
-                "The model tokenizer must be passed to the ToolParser "
-                "constructor during construction.")
-        self.tool_call_start_token_id = self.vocab.get(
-            self.tool_call_start_token)
-        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
-        if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
-            raise RuntimeError(
-                "Qwen3 XML Tool parser could not locate tool call start/end "
-                "tokens in the tokenizer!")
-        logger.info(
-            f"vLLM Successfully import tool parser {self.__class__.__name__} !"
-        )
-    def _generate_tool_call_id(self) -> str:
-        """Generate a unique tool call ID."""
-        return f"call_{uuid.uuid4().hex[:24]}"
-    def _reset_streaming_state(self):
-        """Reset all streaming state."""
-        self.current_tool_index = 0
-        self.is_tool_call_started = False
-        self.header_sent = False
-        self.current_tool_id = None
-        self.current_function_name = None
-        self.current_param_name = None
-        self.current_param_value = ""
-        self.param_count = 0
-        self.in_param = False
-        self.in_function = False
-        self.accumulated_text = ""
-        self.json_started = False
-        self.json_closed = False
-        # Store accumulated parameters for type conversion
-        self.accumulated_params = {}
-        self.streaming_request = None
-    def _get_arguments_config(
-            self, func_name: str,
-            tools: Optional[list[ChatCompletionToolsParam]]) -> dict:
-        """Extract argument configuration for a function."""
-        if tools is None:
-            return {}
-        for config in tools:
-            if not hasattr(config, "type") or not (hasattr(
-                    config, "function") and hasattr(config.function, "name")):
-                continue
-            if config.type == "function" and config.function.name == func_name:
-                if not hasattr(config.function, "parameters"):
-                    return {}
-                params = config.function.parameters
-                if isinstance(params, dict) and "properties" in params:
-                    return params["properties"]
-                elif isinstance(params, dict):
-                    return params
-                else:
-                    return {}
-        logger.warning(f"Tool '{func_name}' is not defined in the tools list.")
-        return {}
-    def _convert_param_value(self, param_value: str, param_name: str,
-                             param_config: dict, func_name: str) -> Any:
-        """Convert parameter value based on its type in the schema."""
-        # Handle null value for any type
-        if param_value.lower() == "null":
-            return None
-        if param_name not in param_config:
-            if param_config != {}:
-                logger.warning(
-                    f"Parsed parameter '{param_name}' is not defined in the tool "
-                    f"parameters for tool '{func_name}', directly returning the string value."
-                )
-            return param_value
-        if isinstance(param_config[param_name],
-                      dict) and "type" in param_config[param_name]:
-            param_type = str(param_config[param_name]["type"]).strip().lower()
-        else:
-            param_type = "string"
-        if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
-            return param_value
-        elif param_type.startswith("int") or param_type.startswith(
-                "uint") or param_type.startswith(
-                    "long") or param_type.startswith(
-                        "short") or param_type.startswith("unsigned"):
-            try:
-                param_value = int(param_value)
-            except:
-                logger.warning(
-                    f"Parsed value '{param_value}' of parameter '{param_name}' is not an integer in tool "
-                    f"'{func_name}', degenerating to string.")
-            return param_value
-        elif param_type.startswith("num") or param_type.startswith("float"):
-            try:
-                float_param_value = float(param_value)
-                param_value = float_param_value if float_param_value - int(
-                    float_param_value) != 0 else int(float_param_value)
-            except:
-                logger.warning(
-                    f"Parsed value '{param_value}' of parameter '{param_name}' is not a float in tool "
-                    f"'{func_name}', degenerating to string.")
-            return param_value
-        elif param_type in ["boolean", "bool", "binary"]:
-            param_value = param_value.lower()
-            if param_value not in ["true", "false"]:
-                logger.warning(
-                    f"Parsed value '{param_value}' of parameter '{param_name}' is not a boolean (`true` of `false`) in tool '{func_name}', degenerating to false."
-                )
-            return param_value == "true"
-        else:
-            if param_type in ["object", "array", "arr"
-                              ] or param_type.startswith(
-                                  "dict") or param_type.startswith("list"):
-                try:
-                    param_value = json.loads(param_value)
-                    return param_value
-                except:
-                    logger.warning(
-                        f"Parsed value '{param_value}' of parameter '{param_name}' cannot be parsed with json.loads in tool "
-                        f"'{func_name}', will try other methods to parse it.")
-            try:
-                param_value = ast.literal_eval(param_value)  # safer
-            except:
-                logger.warning(
-                    f"Parsed value '{param_value}' of parameter '{param_name}' cannot be converted via Python `ast.literal_eval()` in tool '{func_name}', degenerating to string."
-                )
-            return param_value
-    def _parse_xml_function_call(
-            self, function_call_str: str,
-            tools: Optional[list[ChatCompletionToolsParam]]
-    ) -> Optional[ToolCall]:
-        # Extract function name
-        end_index = function_call_str.index(">")
-        function_name = function_call_str[:end_index]
-        param_config = self._get_arguments_config(function_name, tools)
-        parameters = function_call_str[end_index + 1:]
-        param_dict = {}
-        for match_text in self.tool_call_parameter_regex.findall(parameters):
-            idx = match_text.index(">")
-            param_name = match_text[:idx]
-            param_value = str(match_text[idx + 1:])
-            # Remove prefix and trailing \n
-            if param_value.startswith("\n"):
-                param_value = param_value[1:]
-            if param_value.endswith("\n"):
-                param_value = param_value[:-1]
-            param_dict[param_name] = self._convert_param_value(
-                param_value, param_name, param_config, function_name)
-        return ToolCall(
-            type="function",
-            function=FunctionCall(name=function_name,
-                                  arguments=json.dumps(param_dict,
-                                                       ensure_ascii=False)),
-        )
-    def _get_function_calls(self, model_output: str) -> List[str]:
-        # Find all tool calls
-        matched_ranges = self.tool_call_regex.findall(model_output)
-        raw_tool_calls = [
-            match[0] if match[0] else match[1] for match in matched_ranges
-        ]
-        # Back-off strategy if no tool_call tags found
-        if len(raw_tool_calls) == 0:
-            raw_tool_calls = [model_output]
-        raw_function_calls = []
-        for tool_call in raw_tool_calls:
-            raw_function_calls.extend(
-                self.tool_call_function_regex.findall(tool_call))
-        function_calls = [
-            match[0] if match[0] else match[1] for match in raw_function_calls
-        ]
-        return function_calls
-    def extract_tool_calls(
-        self,
-        model_output: str,
-        request: ChatCompletionRequest,
-    ) -> ExtractedToolCallInformation:
-        # Quick check to avoid unnecessary processing
-        if self.tool_call_prefix not in model_output:
-            return ExtractedToolCallInformation(tools_called=False,
-                                                tool_calls=[],
-                                                content=model_output)
-        try:
-            function_calls = self._get_function_calls(model_output)
-            if len(function_calls) == 0:
-                return ExtractedToolCallInformation(tools_called=False,
-                                                    tool_calls=[],
-                                                    content=model_output)
-            tool_calls = [
-                self._parse_xml_function_call(function_call_str, request.tools)
-                for function_call_str in function_calls
-            ]
-            # Populate prev_tool_call_arr for serving layer to set finish_reason
-            self.prev_tool_call_arr.clear()  # Clear previous calls
-            for tool_call in tool_calls:
-                if tool_call:
-                    self.prev_tool_call_arr.append({
-                        "name":
-                        tool_call.function.name,
-                        "arguments":
-                        tool_call.function.arguments,
-                    })
-            # Extract content before tool calls
-            content_index = model_output.find(self.tool_call_start_token)
-            content_index = content_index if content_index >= 0 else model_output.find(
-                self.tool_call_prefix)
-            content = model_output[:content_index]  # .rstrip()
-            return ExtractedToolCallInformation(
-                tools_called=(len(tool_calls) > 0),
-                tool_calls=tool_calls,
-                content=content if content else None,
-            )
-        except Exception:
-            logger.exception("Error in extracting tool call from response.")
-            return ExtractedToolCallInformation(tools_called=False,
-                                                tool_calls=[],
-                                                content=model_output)
-    def extract_tool_calls_streaming(
-        self,
-        previous_text: str,
-        current_text: str,
-        delta_text: str,
-        previous_token_ids: Sequence[int],
-        current_token_ids: Sequence[int],
-        delta_token_ids: Sequence[int],
-        request: ChatCompletionRequest,
-    ) -> Union[DeltaMessage, None]:
-        # Store request for type conversion
-        if not previous_text:
-            self._reset_streaming_state()
-            self.streaming_request = request
-        # If no delta text, return None unless it's an EOS token after tool calls
-        if not delta_text:
-            # Check if this is an EOS token after all tool calls are complete
-            # We check for tool calls in the text even if is_tool_call_started is False
-            # because it might have been reset after processing all tools
-            if delta_token_ids and self.tool_call_end_token_id not in delta_token_ids:
-                # Count complete tool calls
-                complete_calls = len(
-                    self.tool_call_complete_regex.findall(current_text))
-                # If we have completed tool calls and populated prev_tool_call_arr
-                if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
-                    # Check if all tool calls are closed
-                    open_calls = current_text.count(
-                        self.tool_call_start_token) - current_text.count(
-                            self.tool_call_end_token)
-                    if open_calls == 0:
-                        # Return empty delta message to allow finish_reason processing
-                        return DeltaMessage(content="")
-                elif not self.is_tool_call_started and current_text:
-                    # This is a regular content response that's now complete
-                    return DeltaMessage(content="")
-            return None
-        # Update accumulated text
-        self.accumulated_text = current_text
-        # Check if we need to advance to next tool
-        if self.json_closed and not self.in_function:
-            # Check if this tool call has ended
-            tool_ends = current_text.count(self.tool_call_end_token)
-            if tool_ends > self.current_tool_index:
-                # This tool has ended, advance to next
-                self.current_tool_index += 1
-                self.header_sent = False
-                self.param_count = 0
-                self.json_started = False
-                self.json_closed = False
-                self.accumulated_params = {}
-                # Check if there are more tool calls
-                tool_starts = current_text.count(self.tool_call_start_token)
-                if self.current_tool_index >= tool_starts:
-                    # No more tool calls
-                    self.is_tool_call_started = False
-                # Continue processing next tool
-                return None
-        # Handle normal content before tool calls
-        if not self.is_tool_call_started:
-            # Check if tool call is starting
-            if self.tool_call_start_token_id in delta_token_ids or self.tool_call_start_token in delta_text:
-                self.is_tool_call_started = True
-                # Return any content before the tool call
-                if self.tool_call_start_token in delta_text:
-                    content_before = delta_text[:delta_text.index(
-                        self.tool_call_start_token)]
-                    if content_before:
-                        return DeltaMessage(content=content_before)
-                return None
-            else:
-                # Check if we're between tool calls - skip whitespace
-                if current_text.rstrip().endswith(self.tool_call_end_token):
-                    # We just ended a tool call, skip whitespace
-                    if delta_text.strip() == "":
-                        return None
-                # Normal content, no tool call
-                return DeltaMessage(content=delta_text)
-        # Check if we're between tool calls (waiting for next one)
-        # Count tool calls we've seen vs processed
-        tool_starts_count = current_text.count(self.tool_call_start_token)
-        if self.current_tool_index >= tool_starts_count:
-            # We're past all tool calls, shouldn't be here
-            return None
-        # We're in a tool call, find the current tool call portion
-        # Need to find the correct tool call based on current_tool_index
-        tool_starts = []
-        idx = 0
-        while True:
-            idx = current_text.find(self.tool_call_start_token, idx)
-            if idx == -1:
-                break
-            tool_starts.append(idx)
-            idx += len(self.tool_call_start_token)
-        if self.current_tool_index >= len(tool_starts):
-            # No more tool calls to process yet
-            return None
-        tool_start_idx = tool_starts[self.current_tool_index]
-        # Find where this tool call ends (or current position if not ended yet)
-        tool_end_idx = current_text.find(self.tool_call_end_token,
-                                         tool_start_idx)
-        if tool_end_idx == -1:
-            tool_text = current_text[tool_start_idx:]
-        else:
-            tool_text = current_text[tool_start_idx:tool_end_idx +
-                                     len(self.tool_call_end_token)]
-        # Looking for function header
-        if not self.header_sent:
-            if self.tool_call_prefix in tool_text:
-                func_start = tool_text.find(self.tool_call_prefix) + len(
-                    self.tool_call_prefix)
-                func_end = tool_text.find(">", func_start)
-                if func_end != -1:
-                    # Found complete function name
-                    self.current_function_name = tool_text[func_start:func_end]
-                    self.current_tool_id = self._generate_tool_call_id()
-                    self.header_sent = True
-                    self.in_function = True
-                    # IMPORTANT: Add to prev_tool_call_arr immediately when we detect a tool call
-                    # This ensures finish_reason="tool_calls" even if parsing isn't complete
-                    already_added = any(
-                        tool.get("name") == self.current_function_name
-                        for tool in self.prev_tool_call_arr)
-                    if not already_added:
-                        self.prev_tool_call_arr.append({
-                            "name": self.current_function_name,
-                            "arguments":
-                            "{}",  # Placeholder, will be updated later
-                        })
-                    # Send header with function info
-                    return DeltaMessage(tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            id=self.current_tool_id,
-                            function=DeltaFunctionCall(
-                                name=self.current_function_name, arguments=""),
-                            type="function",
-                        )
-                    ])
-            return None
-        # We've sent header, now handle function body
-        if self.in_function:
-            # Send opening brace if not sent yet
-            if not self.json_started and self.parameter_prefix not in delta_text:
-                self.json_started = True
-                return DeltaMessage(tool_calls=[
-                    DeltaToolCall(
-                        index=self.current_tool_index,
-                        function=DeltaFunctionCall(arguments="{"),
-                    )
-                ])
-            # Make sure json_started is set if we're processing parameters
-            if not self.json_started:
-                self.json_started = True
-            # Check for function end in accumulated text
-            if not self.json_closed and self.function_end_token in tool_text:
-                # Close JSON
-                self.json_closed = True
-                # Extract the complete tool call to update prev_tool_call_arr with final arguments
-                # Find the function content
-                func_start = tool_text.find(self.tool_call_prefix) + len(
-                    self.tool_call_prefix)
-                func_content_end = tool_text.find(self.function_end_token,
-                                                  func_start)
-                if func_content_end != -1:
-                    func_content = tool_text[func_start:func_content_end]
-                    # Parse to get the complete arguments
-                    try:
-                        parsed_tool = self._parse_xml_function_call(
-                            func_content, self.streaming_request.tools
-                            if self.streaming_request else None)
-                        if parsed_tool:
-                            # Update existing entry in prev_tool_call_arr with complete arguments
-                            for i, tool in enumerate(self.prev_tool_call_arr):
-                                if tool.get(
-                                        "name") == parsed_tool.function.name:
-                                    self.prev_tool_call_arr[i][
-                                        "arguments"] = parsed_tool.function.arguments
-                                    break
-                    except Exception:
-                        pass  # Ignore parsing errors during streaming
-                result = DeltaMessage(tool_calls=[
-                    DeltaToolCall(
-                        index=self.current_tool_index,
-                        function=DeltaFunctionCall(arguments="}"),
-                    )
-                ])
-                # Reset state for next tool
-                self.in_function = False
-                self.json_closed = True
-                self.accumulated_params = {}
-                return result
-            # Look for parameters
-            # Find all parameter starts
-            param_starts = []
-            idx = 0
-            while True:
-                idx = tool_text.find(self.parameter_prefix, idx)
-                if idx == -1:
-                    break
-                param_starts.append(idx)
-                idx += len(self.parameter_prefix)
-            # Check if we should start a new parameter
-            if not self.in_param and self.param_count < len(param_starts):
-                if len(param_starts) > self.param_count:
-                    # Process the next parameter
-                    param_idx = param_starts[self.param_count]
-                    param_start = param_idx + len(self.parameter_prefix)
-                    remaining = tool_text[param_start:]
-                    if ">" in remaining:
-                        # We have the complete parameter name
-                        name_end = remaining.find(">")
-                        self.current_param_name = remaining[:name_end]
-                        # Find the parameter value
-                        value_start = param_start + name_end + 1
-                        value_text = tool_text[value_start:]
-                        if value_text.startswith("\n"):
-                            value_text = value_text[1:]
-                        # Find where this parameter ends
-                        param_end_idx = value_text.find(
-                            self.parameter_end_token)
-                        if param_end_idx == -1:
-                            # No closing tag, look for next parameter or function end
-                            next_param_idx = value_text.find(
-                                self.parameter_prefix)
-                            func_end_idx = value_text.find(
-                                self.function_end_token)
-                            if next_param_idx != -1 and (func_end_idx == -1
-                                                         or next_param_idx
-                                                         < func_end_idx):
-                                param_end_idx = next_param_idx
-                            elif func_end_idx != -1:
-                                param_end_idx = func_end_idx
-                            else:
-                                # Neither found, check if tool call is complete
-                                if self.tool_call_end_token in tool_text:
-                                    # Tool call is complete, so parameter must be complete too
-                                    # Use all remaining text before function end as value
-                                    param_end_idx = len(value_text)
-                                else:
-                                    # Still streaming, wait for more content
-                                    return None
-                        if param_end_idx != -1:
-                            # Complete parameter found
-                            param_value = value_text[:param_end_idx]
-                            if param_value.endswith("\n"):
-                                param_value = param_value[:-1]
-                            # Store raw value for later processing
-                            self.accumulated_params[
-                                self.current_param_name] = param_value
-                            # Get parameter configuration for type conversion
-                            param_config = self._get_arguments_config(
-                                self.current_function_name,
-                                self.streaming_request.tools
-                                if self.streaming_request else None)
-                            # Convert the parameter value to the appropriate type
-                            converted_value = self._convert_param_value(
-                                param_value, self.current_param_name,
-                                param_config, self.current_function_name)
-                            # Build JSON fragment based on the converted type
-                            # Use json.dumps to properly serialize the value
-                            serialized_value = json.dumps(converted_value,
-                                                          ensure_ascii=False)
-                            if self.param_count == 0:
-                                json_fragment = f'"{self.current_param_name}": {serialized_value}'
-                            else:
-                                json_fragment = f', "{self.current_param_name}": {serialized_value}'
-                            self.param_count += 1
-                            return DeltaMessage(tool_calls=[
-                                DeltaToolCall(
-                                    index=self.current_tool_index,
-                                    function=DeltaFunctionCall(
-                                        arguments=json_fragment),
-                                )
-                            ])
-            # Continue parameter value - Not used in the current implementation
-            # since we process complete parameters above
-            if self.in_param:
-                if self.parameter_end_token in delta_text:
-                    # End of parameter
-                    end_idx = delta_text.find(self.parameter_end_token)
-                    value_chunk = delta_text[:end_idx]
-                    # Skip past > if at start
-                    if not self.current_param_value and ">" in value_chunk:
-                        gt_idx = value_chunk.find(">")
-                        value_chunk = value_chunk[gt_idx + 1:]
-                    if not self.current_param_value and value_chunk.startswith(
-                            "\n"):
-                        value_chunk = value_chunk[1:]
-                    # Store complete value
-                    full_value = self.current_param_value + value_chunk
-                    self.accumulated_params[
-                        self.current_param_name] = full_value
-                    # Get parameter configuration for type conversion
-                    param_config = self._get_arguments_config(
-                        self.current_function_name,
-                        self.streaming_request.tools
-                        if self.streaming_request else None)
-                    # Convert the parameter value to the appropriate type
-                    converted_value = self._convert_param_value(
-                        full_value, self.current_param_name, param_config,
-                        self.current_function_name)
-                    # Serialize the converted value
-                    serialized_value = json.dumps(converted_value,
-                                                  ensure_ascii=False)
-                    # Since we've been streaming the quoted version, we need to close it properly
-                    # This is complex - for now just complete the value
-                    self.in_param = False
-                    self.current_param_value = ""
-                    # Just close the current parameter string
-                    return DeltaMessage(tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            function=DeltaFunctionCall(
-                                arguments='"'),  # Close the string quote
-                        )
-                    ])
-                else:
-                    # Continue accumulating value
-                    value_chunk = delta_text
-                    # Handle first chunk after param name
-                    if not self.current_param_value and ">" in value_chunk:
-                        gt_idx = value_chunk.find(">")
-                        value_chunk = value_chunk[gt_idx + 1:]
-                    if not self.current_param_value and value_chunk.startswith(
-                            "\n"):
-                        value_chunk = value_chunk[1:]
-                    if value_chunk:
-                        # Stream the escaped delta
-                        prev_escaped = json.dumps(
-                            self.current_param_value, ensure_ascii=False
-                        )[1:-1] if self.current_param_value else ""
-                        self.current_param_value += value_chunk
-                        full_escaped = json.dumps(self.current_param_value,
-                                                  ensure_ascii=False)[1:-1]
-                        delta_escaped = full_escaped[len(prev_escaped):]
-                        if delta_escaped:
-                            return DeltaMessage(tool_calls=[
-                                DeltaToolCall(
-                                    index=self.current_tool_index,
-                                    function=DeltaFunctionCall(
-                                        arguments=delta_escaped),
-                                )
-                            ])
-        return None

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/special_tokens_map.json DELETED Viewed

@@ -1,31 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>",
-    "<|object_ref_start|>",
-    "<|object_ref_end|>",
-    "<|box_start|>",
-    "<|box_end|>",
-    "<|quad_start|>",
-    "<|quad_end|>",
-    "<|vision_start|>",
-    "<|vision_end|>",
-    "<|vision_pad|>",
-    "<|image_pad|>",
-    "<|video_pad|>"
-  ],
-  "eos_token": {
-    "content": "<|im_end|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/tokenizer.json DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
-size 11422654

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/tokenizer_config.json DELETED Viewed

@@ -1,239 +0,0 @@
-{
-  "add_bos_token": false,
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "151643": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151644": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151645": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151646": {
-      "content": "<|object_ref_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151647": {
-      "content": "<|object_ref_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151648": {
-      "content": "<|box_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151649": {
-      "content": "<|box_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151650": {
-      "content": "<|quad_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151651": {
-      "content": "<|quad_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151652": {
-      "content": "<|vision_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151653": {
-      "content": "<|vision_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151654": {
-      "content": "<|vision_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151655": {
-      "content": "<|image_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151656": {
-      "content": "<|video_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151657": {
-      "content": "<tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151658": {
-      "content": "</tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151659": {
-      "content": "<|fim_prefix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151660": {
-      "content": "<|fim_middle|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151661": {
-      "content": "<|fim_suffix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151662": {
-      "content": "<|fim_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151663": {
-      "content": "<|repo_name|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151664": {
-      "content": "<|file_sep|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151665": {
-      "content": "<tool_response>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151666": {
-      "content": "</tool_response>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151667": {
-      "content": "<think>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151668": {
-      "content": "</think>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    }
-  },
-  "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>",
-    "<|object_ref_start|>",
-    "<|object_ref_end|>",
-    "<|box_start|>",
-    "<|box_end|>",
-    "<|quad_start|>",
-    "<|quad_end|>",
-    "<|vision_start|>",
-    "<|vision_end|>",
-    "<|vision_pad|>",
-    "<|image_pad|>",
-    "<|video_pad|>"
-  ],
-  "bos_token": null,
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "errors": "replace",
-  "extra_special_tokens": {},
-  "model_max_length": 1048576,
-  "pad_token": "<|endoftext|>",
-  "split_special_tokens": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "unk_token": null
-}

BasedBase-Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2-MLX-4bit/vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff