Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- added_tokens.json +47 -0
- chat_template.json +2 -2
- config.json +50 -47
- generation_config.json +3 -2
- merges.txt +1 -0
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- model.safetensors.index.json +89 -89
- preprocessor_config.json +17 -7
- special_tokens_map.json +7 -1
- tokenizer.json +0 -0
- tokenizer_config.json +2 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|assistant|>": 151672,
|
| 5 |
+
"<|box_end|>": 151649,
|
| 6 |
+
"<|box_start|>": 151648,
|
| 7 |
+
"<|endofassistant|>": 151673,
|
| 8 |
+
"<|endofimg|>": 151667,
|
| 9 |
+
"<|endofslice|>": 151682,
|
| 10 |
+
"<|endofsystemprompt|>": 151669,
|
| 11 |
+
"<|endoftext|>": 151643,
|
| 12 |
+
"<|endofuser|>": 151671,
|
| 13 |
+
"<|file_sep|>": 151664,
|
| 14 |
+
"<|fim_middle|>": 151660,
|
| 15 |
+
"<|fim_pad|>": 151662,
|
| 16 |
+
"<|fim_prefix|>": 151659,
|
| 17 |
+
"<|fim_suffix|>": 151661,
|
| 18 |
+
"<|im_end|>": 151645,
|
| 19 |
+
"<|im_start|>": 151644,
|
| 20 |
+
"<|image_gen_end|>": 151687,
|
| 21 |
+
"<|image_gen_start|>": 151686,
|
| 22 |
+
"<|image_pad|>": 151655,
|
| 23 |
+
"<|imgpad|>": 151665,
|
| 24 |
+
"<|imgrowend|>": 151683,
|
| 25 |
+
"<|img|>": 151666,
|
| 26 |
+
"<|object_ref_end|>": 151647,
|
| 27 |
+
"<|object_ref_start|>": 151646,
|
| 28 |
+
"<|pictotext|>": 151679,
|
| 29 |
+
"<|pic|>": 151677,
|
| 30 |
+
"<|polygon_end|>": 151685,
|
| 31 |
+
"<|polygon_start|>": 151684,
|
| 32 |
+
"<|quad_end|>": 151651,
|
| 33 |
+
"<|quad_start|>": 151650,
|
| 34 |
+
"<|ref_end|>": 151675,
|
| 35 |
+
"<|ref_start|>": 151674,
|
| 36 |
+
"<|repo_name|>": 151663,
|
| 37 |
+
"<|slice|>": 151681,
|
| 38 |
+
"<|systemprompt|>": 151668,
|
| 39 |
+
"<|text|>": 151678,
|
| 40 |
+
"<|user|>": 151670,
|
| 41 |
+
"<|video_pad|>": 151656,
|
| 42 |
+
"<|vision_end|>": 151653,
|
| 43 |
+
"<|vision_pad|>": 151654,
|
| 44 |
+
"<|vision_start|>": 151652,
|
| 45 |
+
"[PAD]": 151680,
|
| 46 |
+
"[SEP]": 151676
|
| 47 |
+
}
|
chat_template.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{%- for m in messages %}{%- if m.role == 'system' %}{{- '<|system|>' + m.content + '<|endofsystem|>\n' }}{%- elif m.role == 'user' %}{% if m.content is string %}{{- '<|user|>' + m.content + '<|endofuser|>' }}{% else %} {% for content in m.content %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|img|><|imgpad|><|endofimg|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|img|><|video_pad|><|endofimg|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}{%- endif %}{%- elif m.role == 'assistant' %}{{- '<|assistant|>' + m.content }}{%- if not loop.last %}{{- '<|endofassistant|>' }}{%- endif %}{%- endif %}{%- endfor %}{%- if messages[-1].role != 'assistant' %}{{- '<|assistant|>' }}{%- endif %}"
|
| 3 |
+
}
|
config.json
CHANGED
|
@@ -1,51 +1,54 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"hidden_size": 1536,
|
|
|
|
| 14 |
"initializer_range": 0.02,
|
| 15 |
-
"intermediate_size":
|
| 16 |
-
"
|
| 17 |
-
"
|
| 18 |
"num_attention_heads": 12,
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
-
"
|
| 23 |
-
"
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
"vocab_size": 151936,
|
| 31 |
-
"image_token_id": 151665,
|
| 32 |
-
"video_token_id": 151656,
|
| 33 |
-
"vision_config": {
|
| 34 |
-
"embed_dim": 1536,
|
| 35 |
-
"hidden_size": 1536,
|
| 36 |
-
"intermediate_size": 4224,
|
| 37 |
-
"num_hidden_layers": 42,
|
| 38 |
-
"num_attention_heads": 12,
|
| 39 |
-
"num_channels": 3,
|
| 40 |
-
"patch_size": 14,
|
| 41 |
-
"post_norm": true,
|
| 42 |
-
"rms_norm_eps": 1e-05,
|
| 43 |
-
"spatial_merge_size": 2,
|
| 44 |
-
"temporal_patch_size": 1,
|
| 45 |
-
"use_bias": false,
|
| 46 |
-
"attn_implementation": "flash_attention_2",
|
| 47 |
-
"init_merger_std": 0.02,
|
| 48 |
-
"initializer_range": 0.02,
|
| 49 |
-
"is_causal": false
|
| 50 |
-
}
|
| 51 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DotsOCRForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": true,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"auto_map": {
|
| 8 |
+
"AutoConfig": "configuration_dots.DotsOCRConfig",
|
| 9 |
+
"AutoModelForCausalLM": "modeling_dots_ocr.DotsOCRForCausalLM"
|
| 10 |
+
},
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 1536,
|
| 13 |
+
"image_token_id": 151665,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8960,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"max_window_layers": 28,
|
| 18 |
+
"model_type": "dots_ocr",
|
| 19 |
+
"num_attention_heads": 12,
|
| 20 |
+
"num_hidden_layers": 28,
|
| 21 |
+
"num_key_value_heads": 2,
|
| 22 |
+
"rms_norm_eps": 1e-06,
|
| 23 |
+
"rope_scaling": null,
|
| 24 |
+
"rope_theta": 1000000,
|
| 25 |
+
"sliding_window": 131072,
|
| 26 |
+
"tie_word_embeddings": false,
|
| 27 |
+
"torch_dtype": "bfloat16",
|
| 28 |
+
"transformers_version": "4.51.3",
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"use_sliding_window": false,
|
| 31 |
+
"video_token_id": 151656,
|
| 32 |
+
"vision_config": {
|
| 33 |
+
"_attn_implementation_autoset": true,
|
| 34 |
+
"attn_implementation": "sdpa",
|
| 35 |
+
"embed_dim": 1536,
|
| 36 |
+
"gradient_checkpointing": false,
|
| 37 |
"hidden_size": 1536,
|
| 38 |
+
"init_merger_std": 0.02,
|
| 39 |
"initializer_range": 0.02,
|
| 40 |
+
"intermediate_size": 4224,
|
| 41 |
+
"is_causal": false,
|
| 42 |
+
"model_type": "dots_vit",
|
| 43 |
"num_attention_heads": 12,
|
| 44 |
+
"num_channels": 3,
|
| 45 |
+
"num_hidden_layers": 42,
|
| 46 |
+
"patch_size": 14,
|
| 47 |
+
"post_norm": true,
|
| 48 |
+
"rms_norm_eps": 1e-05,
|
| 49 |
+
"spatial_merge_size": 2,
|
| 50 |
+
"temporal_patch_size": 1,
|
| 51 |
+
"use_bias": false
|
| 52 |
+
},
|
| 53 |
+
"vocab_size": 151936
|
| 54 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generation_config.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"max_length": 32768,
|
| 3 |
"eos_token_id": [
|
| 4 |
151643,
|
| 5 |
151673
|
| 6 |
-
]
|
|
|
|
|
|
|
| 7 |
}
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"eos_token_id": [
|
| 3 |
151643,
|
| 4 |
151673
|
| 5 |
+
],
|
| 6 |
+
"max_length": 32768,
|
| 7 |
+
"transformers_version": "4.51.3"
|
| 8 |
}
|
merges.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
Ġ Ġ
|
| 2 |
ĠĠ ĠĠ
|
| 3 |
i n
|
|
|
|
| 1 |
+
#version: 0.2
|
| 2 |
Ġ Ġ
|
| 3 |
ĠĠ ĠĠ
|
| 4 |
i n
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4988290528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f8cc38dae12107ee8633e42f7b8b96a8a7a8773714bc7aa04514ba02ecedbf7
|
| 3 |
size 4988290528
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1090141200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10ad7ef4b15cde199c2b0ead551f9de126e2d39db93fc27e9bc23e8e05a710b4
|
| 3 |
size 1090141200
|
model.safetensors.index.json
CHANGED
|
@@ -430,43 +430,43 @@
|
|
| 430 |
"vision_tower.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 431 |
"vision_tower.blocks.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 432 |
"vision_tower.blocks.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 433 |
-
"vision_tower.blocks.2.mlp.fc3.weight": "model-
|
| 434 |
-
"vision_tower.blocks.2.norm1.weight": "model-
|
| 435 |
-
"vision_tower.blocks.2.norm2.weight": "model-
|
| 436 |
-
"vision_tower.blocks.20.attn.proj.weight": "model-
|
| 437 |
-
"vision_tower.blocks.20.attn.qkv.weight": "model-
|
| 438 |
-
"vision_tower.blocks.20.mlp.fc1.weight": "model-
|
| 439 |
-
"vision_tower.blocks.20.mlp.fc2.weight": "model-
|
| 440 |
-
"vision_tower.blocks.20.mlp.fc3.weight": "model-
|
| 441 |
-
"vision_tower.blocks.20.norm1.weight": "model-
|
| 442 |
-
"vision_tower.blocks.20.norm2.weight": "model-
|
| 443 |
-
"vision_tower.blocks.21.attn.proj.weight": "model-
|
| 444 |
-
"vision_tower.blocks.21.attn.qkv.weight": "model-
|
| 445 |
-
"vision_tower.blocks.21.mlp.fc1.weight": "model-
|
| 446 |
-
"vision_tower.blocks.21.mlp.fc2.weight": "model-
|
| 447 |
-
"vision_tower.blocks.21.mlp.fc3.weight": "model-
|
| 448 |
-
"vision_tower.blocks.21.norm1.weight": "model-
|
| 449 |
-
"vision_tower.blocks.21.norm2.weight": "model-
|
| 450 |
-
"vision_tower.blocks.22.attn.proj.weight": "model-
|
| 451 |
-
"vision_tower.blocks.22.attn.qkv.weight": "model-
|
| 452 |
-
"vision_tower.blocks.22.mlp.fc1.weight": "model-
|
| 453 |
-
"vision_tower.blocks.22.mlp.fc2.weight": "model-
|
| 454 |
-
"vision_tower.blocks.22.mlp.fc3.weight": "model-
|
| 455 |
-
"vision_tower.blocks.22.norm1.weight": "model-
|
| 456 |
-
"vision_tower.blocks.22.norm2.weight": "model-
|
| 457 |
-
"vision_tower.blocks.23.attn.proj.weight": "model-
|
| 458 |
-
"vision_tower.blocks.23.attn.qkv.weight": "model-
|
| 459 |
-
"vision_tower.blocks.23.mlp.fc1.weight": "model-
|
| 460 |
-
"vision_tower.blocks.23.mlp.fc2.weight": "model-
|
| 461 |
-
"vision_tower.blocks.23.mlp.fc3.weight": "model-
|
| 462 |
-
"vision_tower.blocks.23.norm1.weight": "model-
|
| 463 |
-
"vision_tower.blocks.23.norm2.weight": "model-
|
| 464 |
-
"vision_tower.blocks.24.attn.proj.weight": "model-
|
| 465 |
-
"vision_tower.blocks.24.attn.qkv.weight": "model-
|
| 466 |
-
"vision_tower.blocks.24.mlp.fc1.weight": "model-
|
| 467 |
-
"vision_tower.blocks.24.mlp.fc2.weight": "model-
|
| 468 |
"vision_tower.blocks.24.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 469 |
-
"vision_tower.blocks.24.norm1.weight": "model-
|
| 470 |
"vision_tower.blocks.24.norm2.weight": "model-00002-of-00002.safetensors",
|
| 471 |
"vision_tower.blocks.25.attn.proj.weight": "model-00002-of-00002.safetensors",
|
| 472 |
"vision_tower.blocks.25.attn.qkv.weight": "model-00002-of-00002.safetensors",
|
|
@@ -503,13 +503,13 @@
|
|
| 503 |
"vision_tower.blocks.29.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 504 |
"vision_tower.blocks.29.norm1.weight": "model-00002-of-00002.safetensors",
|
| 505 |
"vision_tower.blocks.29.norm2.weight": "model-00002-of-00002.safetensors",
|
| 506 |
-
"vision_tower.blocks.3.attn.proj.weight": "model-
|
| 507 |
-
"vision_tower.blocks.3.attn.qkv.weight": "model-
|
| 508 |
-
"vision_tower.blocks.3.mlp.fc1.weight": "model-
|
| 509 |
-
"vision_tower.blocks.3.mlp.fc2.weight": "model-
|
| 510 |
-
"vision_tower.blocks.3.mlp.fc3.weight": "model-
|
| 511 |
-
"vision_tower.blocks.3.norm1.weight": "model-
|
| 512 |
-
"vision_tower.blocks.3.norm2.weight": "model-
|
| 513 |
"vision_tower.blocks.30.attn.proj.weight": "model-00002-of-00002.safetensors",
|
| 514 |
"vision_tower.blocks.30.attn.qkv.weight": "model-00002-of-00002.safetensors",
|
| 515 |
"vision_tower.blocks.30.mlp.fc1.weight": "model-00002-of-00002.safetensors",
|
|
@@ -580,13 +580,13 @@
|
|
| 580 |
"vision_tower.blocks.39.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 581 |
"vision_tower.blocks.39.norm1.weight": "model-00002-of-00002.safetensors",
|
| 582 |
"vision_tower.blocks.39.norm2.weight": "model-00002-of-00002.safetensors",
|
| 583 |
-
"vision_tower.blocks.4.attn.proj.weight": "model-
|
| 584 |
-
"vision_tower.blocks.4.attn.qkv.weight": "model-
|
| 585 |
-
"vision_tower.blocks.4.mlp.fc1.weight": "model-
|
| 586 |
-
"vision_tower.blocks.4.mlp.fc2.weight": "model-
|
| 587 |
-
"vision_tower.blocks.4.mlp.fc3.weight": "model-
|
| 588 |
-
"vision_tower.blocks.4.norm1.weight": "model-
|
| 589 |
-
"vision_tower.blocks.4.norm2.weight": "model-
|
| 590 |
"vision_tower.blocks.40.attn.proj.weight": "model-00002-of-00002.safetensors",
|
| 591 |
"vision_tower.blocks.40.attn.qkv.weight": "model-00002-of-00002.safetensors",
|
| 592 |
"vision_tower.blocks.40.mlp.fc1.weight": "model-00002-of-00002.safetensors",
|
|
@@ -601,50 +601,50 @@
|
|
| 601 |
"vision_tower.blocks.41.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 602 |
"vision_tower.blocks.41.norm1.weight": "model-00002-of-00002.safetensors",
|
| 603 |
"vision_tower.blocks.41.norm2.weight": "model-00002-of-00002.safetensors",
|
| 604 |
-
"vision_tower.blocks.5.attn.proj.weight": "model-
|
| 605 |
-
"vision_tower.blocks.5.attn.qkv.weight": "model-
|
| 606 |
-
"vision_tower.blocks.5.mlp.fc1.weight": "model-
|
| 607 |
-
"vision_tower.blocks.5.mlp.fc2.weight": "model-
|
| 608 |
-
"vision_tower.blocks.5.mlp.fc3.weight": "model-
|
| 609 |
-
"vision_tower.blocks.5.norm1.weight": "model-
|
| 610 |
-
"vision_tower.blocks.5.norm2.weight": "model-
|
| 611 |
-
"vision_tower.blocks.6.attn.proj.weight": "model-
|
| 612 |
-
"vision_tower.blocks.6.attn.qkv.weight": "model-
|
| 613 |
-
"vision_tower.blocks.6.mlp.fc1.weight": "model-
|
| 614 |
-
"vision_tower.blocks.6.mlp.fc2.weight": "model-
|
| 615 |
-
"vision_tower.blocks.6.mlp.fc3.weight": "model-
|
| 616 |
-
"vision_tower.blocks.6.norm1.weight": "model-
|
| 617 |
-
"vision_tower.blocks.6.norm2.weight": "model-
|
| 618 |
-
"vision_tower.blocks.7.attn.proj.weight": "model-
|
| 619 |
-
"vision_tower.blocks.7.attn.qkv.weight": "model-
|
| 620 |
-
"vision_tower.blocks.7.mlp.fc1.weight": "model-
|
| 621 |
-
"vision_tower.blocks.7.mlp.fc2.weight": "model-
|
| 622 |
-
"vision_tower.blocks.7.mlp.fc3.weight": "model-
|
| 623 |
-
"vision_tower.blocks.7.norm1.weight": "model-
|
| 624 |
-
"vision_tower.blocks.7.norm2.weight": "model-
|
| 625 |
-
"vision_tower.blocks.8.attn.proj.weight": "model-
|
| 626 |
-
"vision_tower.blocks.8.attn.qkv.weight": "model-
|
| 627 |
-
"vision_tower.blocks.8.mlp.fc1.weight": "model-
|
| 628 |
-
"vision_tower.blocks.8.mlp.fc2.weight": "model-
|
| 629 |
-
"vision_tower.blocks.8.mlp.fc3.weight": "model-
|
| 630 |
-
"vision_tower.blocks.8.norm1.weight": "model-
|
| 631 |
-
"vision_tower.blocks.8.norm2.weight": "model-
|
| 632 |
-
"vision_tower.blocks.9.attn.proj.weight": "model-
|
| 633 |
-
"vision_tower.blocks.9.attn.qkv.weight": "model-
|
| 634 |
-
"vision_tower.blocks.9.mlp.fc1.weight": "model-
|
| 635 |
-
"vision_tower.blocks.9.mlp.fc2.weight": "model-
|
| 636 |
-
"vision_tower.blocks.9.mlp.fc3.weight": "model-
|
| 637 |
-
"vision_tower.blocks.9.norm1.weight": "model-
|
| 638 |
-
"vision_tower.blocks.9.norm2.weight": "model-
|
| 639 |
"vision_tower.merger.ln_q.bias": "model-00002-of-00002.safetensors",
|
| 640 |
"vision_tower.merger.ln_q.weight": "model-00002-of-00002.safetensors",
|
| 641 |
"vision_tower.merger.mlp.0.bias": "model-00002-of-00002.safetensors",
|
| 642 |
"vision_tower.merger.mlp.0.weight": "model-00002-of-00002.safetensors",
|
| 643 |
"vision_tower.merger.mlp.2.bias": "model-00002-of-00002.safetensors",
|
| 644 |
"vision_tower.merger.mlp.2.weight": "model-00002-of-00002.safetensors",
|
| 645 |
-
"vision_tower.patch_embed.patchifier.norm.weight": "model-
|
| 646 |
-
"vision_tower.patch_embed.patchifier.proj.bias": "model-
|
| 647 |
-
"vision_tower.patch_embed.patchifier.proj.weight": "model-
|
| 648 |
"vision_tower.post_trunk_norm.weight": "model-00002-of-00002.safetensors"
|
| 649 |
}
|
| 650 |
-
}
|
|
|
|
| 430 |
"vision_tower.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 431 |
"vision_tower.blocks.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 432 |
"vision_tower.blocks.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 433 |
+
"vision_tower.blocks.2.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 434 |
+
"vision_tower.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
|
| 435 |
+
"vision_tower.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
|
| 436 |
+
"vision_tower.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 437 |
+
"vision_tower.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 438 |
+
"vision_tower.blocks.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 439 |
+
"vision_tower.blocks.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 440 |
+
"vision_tower.blocks.20.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 441 |
+
"vision_tower.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
|
| 442 |
+
"vision_tower.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
|
| 443 |
+
"vision_tower.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 444 |
+
"vision_tower.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 445 |
+
"vision_tower.blocks.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 446 |
+
"vision_tower.blocks.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 447 |
+
"vision_tower.blocks.21.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 448 |
+
"vision_tower.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
|
| 449 |
+
"vision_tower.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
|
| 450 |
+
"vision_tower.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 451 |
+
"vision_tower.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 452 |
+
"vision_tower.blocks.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 453 |
+
"vision_tower.blocks.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 454 |
+
"vision_tower.blocks.22.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 455 |
+
"vision_tower.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
|
| 456 |
+
"vision_tower.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
|
| 457 |
+
"vision_tower.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 458 |
+
"vision_tower.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 459 |
+
"vision_tower.blocks.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 460 |
+
"vision_tower.blocks.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 461 |
+
"vision_tower.blocks.23.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 462 |
+
"vision_tower.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
|
| 463 |
+
"vision_tower.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
|
| 464 |
+
"vision_tower.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 465 |
+
"vision_tower.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 466 |
+
"vision_tower.blocks.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 467 |
+
"vision_tower.blocks.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 468 |
"vision_tower.blocks.24.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 469 |
+
"vision_tower.blocks.24.norm1.weight": "model-00001-of-00002.safetensors",
|
| 470 |
"vision_tower.blocks.24.norm2.weight": "model-00002-of-00002.safetensors",
|
| 471 |
"vision_tower.blocks.25.attn.proj.weight": "model-00002-of-00002.safetensors",
|
| 472 |
"vision_tower.blocks.25.attn.qkv.weight": "model-00002-of-00002.safetensors",
|
|
|
|
| 503 |
"vision_tower.blocks.29.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 504 |
"vision_tower.blocks.29.norm1.weight": "model-00002-of-00002.safetensors",
|
| 505 |
"vision_tower.blocks.29.norm2.weight": "model-00002-of-00002.safetensors",
|
| 506 |
+
"vision_tower.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 507 |
+
"vision_tower.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 508 |
+
"vision_tower.blocks.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 509 |
+
"vision_tower.blocks.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 510 |
+
"vision_tower.blocks.3.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 511 |
+
"vision_tower.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
|
| 512 |
+
"vision_tower.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
|
| 513 |
"vision_tower.blocks.30.attn.proj.weight": "model-00002-of-00002.safetensors",
|
| 514 |
"vision_tower.blocks.30.attn.qkv.weight": "model-00002-of-00002.safetensors",
|
| 515 |
"vision_tower.blocks.30.mlp.fc1.weight": "model-00002-of-00002.safetensors",
|
|
|
|
| 580 |
"vision_tower.blocks.39.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 581 |
"vision_tower.blocks.39.norm1.weight": "model-00002-of-00002.safetensors",
|
| 582 |
"vision_tower.blocks.39.norm2.weight": "model-00002-of-00002.safetensors",
|
| 583 |
+
"vision_tower.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 584 |
+
"vision_tower.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 585 |
+
"vision_tower.blocks.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 586 |
+
"vision_tower.blocks.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 587 |
+
"vision_tower.blocks.4.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 588 |
+
"vision_tower.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
|
| 589 |
+
"vision_tower.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
|
| 590 |
"vision_tower.blocks.40.attn.proj.weight": "model-00002-of-00002.safetensors",
|
| 591 |
"vision_tower.blocks.40.attn.qkv.weight": "model-00002-of-00002.safetensors",
|
| 592 |
"vision_tower.blocks.40.mlp.fc1.weight": "model-00002-of-00002.safetensors",
|
|
|
|
| 601 |
"vision_tower.blocks.41.mlp.fc3.weight": "model-00002-of-00002.safetensors",
|
| 602 |
"vision_tower.blocks.41.norm1.weight": "model-00002-of-00002.safetensors",
|
| 603 |
"vision_tower.blocks.41.norm2.weight": "model-00002-of-00002.safetensors",
|
| 604 |
+
"vision_tower.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 605 |
+
"vision_tower.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 606 |
+
"vision_tower.blocks.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 607 |
+
"vision_tower.blocks.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 608 |
+
"vision_tower.blocks.5.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 609 |
+
"vision_tower.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
|
| 610 |
+
"vision_tower.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
|
| 611 |
+
"vision_tower.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 612 |
+
"vision_tower.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 613 |
+
"vision_tower.blocks.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 614 |
+
"vision_tower.blocks.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 615 |
+
"vision_tower.blocks.6.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 616 |
+
"vision_tower.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
|
| 617 |
+
"vision_tower.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
|
| 618 |
+
"vision_tower.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 619 |
+
"vision_tower.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 620 |
+
"vision_tower.blocks.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 621 |
+
"vision_tower.blocks.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 622 |
+
"vision_tower.blocks.7.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 623 |
+
"vision_tower.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
|
| 624 |
+
"vision_tower.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
|
| 625 |
+
"vision_tower.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 626 |
+
"vision_tower.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 627 |
+
"vision_tower.blocks.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 628 |
+
"vision_tower.blocks.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 629 |
+
"vision_tower.blocks.8.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 630 |
+
"vision_tower.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
|
| 631 |
+
"vision_tower.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
|
| 632 |
+
"vision_tower.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
|
| 633 |
+
"vision_tower.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
|
| 634 |
+
"vision_tower.blocks.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 635 |
+
"vision_tower.blocks.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 636 |
+
"vision_tower.blocks.9.mlp.fc3.weight": "model-00001-of-00002.safetensors",
|
| 637 |
+
"vision_tower.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
|
| 638 |
+
"vision_tower.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
|
| 639 |
"vision_tower.merger.ln_q.bias": "model-00002-of-00002.safetensors",
|
| 640 |
"vision_tower.merger.ln_q.weight": "model-00002-of-00002.safetensors",
|
| 641 |
"vision_tower.merger.mlp.0.bias": "model-00002-of-00002.safetensors",
|
| 642 |
"vision_tower.merger.mlp.0.weight": "model-00002-of-00002.safetensors",
|
| 643 |
"vision_tower.merger.mlp.2.bias": "model-00002-of-00002.safetensors",
|
| 644 |
"vision_tower.merger.mlp.2.weight": "model-00002-of-00002.safetensors",
|
| 645 |
+
"vision_tower.patch_embed.patchifier.norm.weight": "model-00001-of-00002.safetensors",
|
| 646 |
+
"vision_tower.patch_embed.patchifier.proj.bias": "model-00001-of-00002.safetensors",
|
| 647 |
+
"vision_tower.patch_embed.patchifier.proj.weight": "model-00001-of-00002.safetensors",
|
| 648 |
"vision_tower.post_trunk_norm.weight": "model-00002-of-00002.safetensors"
|
| 649 |
}
|
| 650 |
+
}
|
preprocessor_config.json
CHANGED
|
@@ -1,19 +1,29 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"merge_size": 2,
|
| 7 |
"image_mean": [
|
| 8 |
0.48145466,
|
| 9 |
0.4578275,
|
| 10 |
0.40821073
|
| 11 |
],
|
|
|
|
| 12 |
"image_std": [
|
| 13 |
0.26862954,
|
| 14 |
0.26130258,
|
| 15 |
0.27577711
|
| 16 |
],
|
| 17 |
-
"
|
| 18 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
|
|
|
| 6 |
"image_mean": [
|
| 7 |
0.48145466,
|
| 8 |
0.4578275,
|
| 9 |
0.40821073
|
| 10 |
],
|
| 11 |
+
"image_processor_type": "Qwen2VLImageProcessor",
|
| 12 |
"image_std": [
|
| 13 |
0.26862954,
|
| 14 |
0.26130258,
|
| 15 |
0.27577711
|
| 16 |
],
|
| 17 |
+
"max_pixels": 11289600,
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"min_pixels": 3136,
|
| 20 |
+
"patch_size": 14,
|
| 21 |
+
"processor_class": "DotsVLProcessor",
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"longest_edge": 11289600,
|
| 26 |
+
"shortest_edge": 3136
|
| 27 |
+
},
|
| 28 |
+
"temporal_patch_size": 1
|
| 29 |
}
|
special_tokens_map.json
CHANGED
|
@@ -21,5 +21,11 @@
|
|
| 21 |
"rstrip": false,
|
| 22 |
"single_word": false
|
| 23 |
},
|
| 24 |
-
"pad_token":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
|
|
|
| 21 |
"rstrip": false,
|
| 22 |
"single_word": false
|
| 23 |
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "[PAD]",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
}
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -383,8 +383,10 @@
|
|
| 383 |
"clean_up_tokenization_spaces": false,
|
| 384 |
"eos_token": "<|endoftext|>",
|
| 385 |
"errors": "replace",
|
|
|
|
| 386 |
"model_max_length": 131072,
|
| 387 |
"pad_token": "[PAD]",
|
|
|
|
| 388 |
"split_special_tokens": false,
|
| 389 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 390 |
"unk_token": null
|
|
|
|
| 383 |
"clean_up_tokenization_spaces": false,
|
| 384 |
"eos_token": "<|endoftext|>",
|
| 385 |
"errors": "replace",
|
| 386 |
+
"extra_special_tokens": {},
|
| 387 |
"model_max_length": 131072,
|
| 388 |
"pad_token": "[PAD]",
|
| 389 |
+
"processor_class": "DotsVLProcessor",
|
| 390 |
"split_special_tokens": false,
|
| 391 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 392 |
"unk_token": null
|