FrankC0st1e
commited on
Commit
·
8fa0de6
1
Parent(s):
91a5e0a
change name to minicpm3
Browse files- config.json +7 -7
- configuration_minicpm.py +1 -1
- modeling_minicpm.py +4 -4
config.json
CHANGED
|
@@ -4,14 +4,14 @@
|
|
| 4 |
"MiniCPM3ForCausalLM"
|
| 5 |
],
|
| 6 |
"auto_map": {
|
| 7 |
-
"AutoConfig": "configuration_minicpm.
|
| 8 |
-
"AutoModel": "modeling_minicpm.
|
| 9 |
-
"AutoModelForCausalLM": "modeling_minicpm.
|
| 10 |
-
"AutoModelForSeq2SeqLM": "modeling_minicpm.
|
| 11 |
-
"AutoModelForSequenceClassification": "modeling_minicpm.
|
| 12 |
},
|
| 13 |
"bos_token_id": 1,
|
| 14 |
-
"eos_token_id": 2,
|
| 15 |
"hidden_act": "silu",
|
| 16 |
"initializer_range": 0.1,
|
| 17 |
"hidden_size": 2560,
|
|
@@ -32,7 +32,7 @@
|
|
| 32 |
"original_max_position_embeddings": 32768
|
| 33 |
},
|
| 34 |
"torch_dtype": "bfloat16",
|
| 35 |
-
"transformers_version": "4.
|
| 36 |
"use_cache": true,
|
| 37 |
"vocab_size": 73448,
|
| 38 |
"scale_emb": 12,
|
|
|
|
| 4 |
"MiniCPM3ForCausalLM"
|
| 5 |
],
|
| 6 |
"auto_map": {
|
| 7 |
+
"AutoConfig": "configuration_minicpm.MiniCPM3Config",
|
| 8 |
+
"AutoModel": "modeling_minicpm.MiniCPM3Model",
|
| 9 |
+
"AutoModelForCausalLM": "modeling_minicpm.MiniCPM3ForCausalLM",
|
| 10 |
+
"AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPM3ForCausalLM",
|
| 11 |
+
"AutoModelForSequenceClassification": "modeling_minicpm.MiniCPM3ForSequenceClassification"
|
| 12 |
},
|
| 13 |
"bos_token_id": 1,
|
| 14 |
+
"eos_token_id": [2, 73440],
|
| 15 |
"hidden_act": "silu",
|
| 16 |
"initializer_range": 0.1,
|
| 17 |
"hidden_size": 2560,
|
|
|
|
| 32 |
"original_max_position_embeddings": 32768
|
| 33 |
},
|
| 34 |
"torch_dtype": "bfloat16",
|
| 35 |
+
"transformers_version": "4.41.0",
|
| 36 |
"use_cache": true,
|
| 37 |
"vocab_size": 73448,
|
| 38 |
"scale_emb": 12,
|
configuration_minicpm.py
CHANGED
|
@@ -28,7 +28,7 @@ logger = logging.get_logger(__name__)
|
|
| 28 |
MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
| 29 |
|
| 30 |
|
| 31 |
-
class
|
| 32 |
r"""
|
| 33 |
This is the configuration class to store the configuration of a [`MiniCPMModel`]. It is used to instantiate an MiniCPM
|
| 34 |
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
|
|
|
| 28 |
MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
| 29 |
|
| 30 |
|
| 31 |
+
class MiniCPM3Config(PretrainedConfig):
|
| 32 |
r"""
|
| 33 |
This is the configuration class to store the configuration of a [`MiniCPMModel`]. It is used to instantiate an MiniCPM
|
| 34 |
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
modeling_minicpm.py
CHANGED
|
@@ -979,7 +979,7 @@ MINICPM_START_DOCSTRING = r"""
|
|
| 979 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
| 980 |
MINICPM_START_DOCSTRING,
|
| 981 |
)
|
| 982 |
-
class
|
| 983 |
config_class = MiniCPMConfig
|
| 984 |
base_model_prefix = "model"
|
| 985 |
supports_gradient_checkpointing = True
|
|
@@ -1075,7 +1075,7 @@ MINICPM_INPUTS_DOCSTRING = r"""
|
|
| 1075 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
| 1076 |
MINICPM_START_DOCSTRING,
|
| 1077 |
)
|
| 1078 |
-
class
|
| 1079 |
"""
|
| 1080 |
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
|
| 1081 |
|
|
@@ -1239,7 +1239,7 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
|
|
| 1239 |
)
|
| 1240 |
|
| 1241 |
|
| 1242 |
-
class
|
| 1243 |
_tied_weights_keys = ["lm_head.weight"]
|
| 1244 |
|
| 1245 |
def __init__(self, config):
|
|
@@ -1465,7 +1465,7 @@ class MiniCPMForCausalLM(MiniCPMPreTrainedModel):
|
|
| 1465 |
""",
|
| 1466 |
MINICPM_START_DOCSTRING,
|
| 1467 |
)
|
| 1468 |
-
class
|
| 1469 |
def __init__(self, config):
|
| 1470 |
super().__init__(config)
|
| 1471 |
self.num_labels = config.num_labels
|
|
|
|
| 979 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
| 980 |
MINICPM_START_DOCSTRING,
|
| 981 |
)
|
| 982 |
+
class MiniCPM3PreTrainedModel(PreTrainedModel):
|
| 983 |
config_class = MiniCPMConfig
|
| 984 |
base_model_prefix = "model"
|
| 985 |
supports_gradient_checkpointing = True
|
|
|
|
| 1075 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
| 1076 |
MINICPM_START_DOCSTRING,
|
| 1077 |
)
|
| 1078 |
+
class MiniCPM3Model(MiniCPM3PreTrainedModel):
|
| 1079 |
"""
|
| 1080 |
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
|
| 1081 |
|
|
|
|
| 1239 |
)
|
| 1240 |
|
| 1241 |
|
| 1242 |
+
class MiniCPM3ForCausalLM(MiniCPM3PreTrainedModel):
|
| 1243 |
_tied_weights_keys = ["lm_head.weight"]
|
| 1244 |
|
| 1245 |
def __init__(self, config):
|
|
|
|
| 1465 |
""",
|
| 1466 |
MINICPM_START_DOCSTRING,
|
| 1467 |
)
|
| 1468 |
+
class MiniCPM3ForSequenceClassification(MiniCPM3PreTrainedModel):
|
| 1469 |
def __init__(self, config):
|
| 1470 |
super().__init__(config)
|
| 1471 |
self.num_labels = config.num_labels
|