kokolamba commited on
Commit
a2efb43
·
1 Parent(s): f640da0

Update model files

Browse files
checkpoint-2700/config.json CHANGED
@@ -2,6 +2,11 @@
2
  "architectures": [
3
  "SharedSpaceDecoderForCausalLM"
4
  ],
 
 
 
 
 
5
  "attention_backend": "flash_attention_2",
6
  "attention_bias": false,
7
  "attention_dropout_prob": 0.1,
@@ -18,7 +23,6 @@
18
  "kv_shared_dim": null,
19
  "layer_norm_eps": 1e-12,
20
  "max_position_embeddings": 1024,
21
- "model_type": "shared_subspace_decoder",
22
  "nope_dims": 32,
23
  "norm_type": "rmsnorm",
24
  "num_attention_heads": 12,
@@ -40,4 +44,4 @@
40
  "vocab_rank": null,
41
  "vocab_size": 50257,
42
  "vocab_subspace": false
43
- }
 
2
  "architectures": [
3
  "SharedSpaceDecoderForCausalLM"
4
  ],
5
+ "model_type": "shared_subspace_decoder",
6
+ "auto_map": {
7
+ "AutoConfig": "shared_space_config.SharedSpaceDecoderConfig",
8
+ "AutoModelForCausalLM": "task_heads.SharedSpaceDecoderForCausalLM"
9
+ },
10
  "attention_backend": "flash_attention_2",
11
  "attention_bias": false,
12
  "attention_dropout_prob": 0.1,
 
23
  "kv_shared_dim": null,
24
  "layer_norm_eps": 1e-12,
25
  "max_position_embeddings": 1024,
 
26
  "nope_dims": 32,
27
  "norm_type": "rmsnorm",
28
  "num_attention_heads": 12,
 
44
  "vocab_rank": null,
45
  "vocab_size": 50257,
46
  "vocab_subspace": false
47
+ }
checkpoint-3000/config.json CHANGED
@@ -2,6 +2,11 @@
2
  "architectures": [
3
  "SharedSpaceDecoderForCausalLM"
4
  ],
 
 
 
 
 
5
  "attention_backend": "flash_attention_2",
6
  "attention_bias": false,
7
  "attention_dropout_prob": 0.1,
@@ -18,7 +23,6 @@
18
  "kv_shared_dim": null,
19
  "layer_norm_eps": 1e-12,
20
  "max_position_embeddings": 1024,
21
- "model_type": "shared_subspace_decoder",
22
  "nope_dims": 32,
23
  "norm_type": "rmsnorm",
24
  "num_attention_heads": 12,
@@ -40,4 +44,4 @@
40
  "vocab_rank": null,
41
  "vocab_size": 50257,
42
  "vocab_subspace": false
43
- }
 
2
  "architectures": [
3
  "SharedSpaceDecoderForCausalLM"
4
  ],
5
+ "model_type": "shared_subspace_decoder",
6
+ "auto_map": {
7
+ "AutoConfig": "shared_space_config.SharedSpaceDecoderConfig",
8
+ "AutoModelForCausalLM": "task_heads.SharedSpaceDecoderForCausalLM"
9
+ },
10
  "attention_backend": "flash_attention_2",
11
  "attention_bias": false,
12
  "attention_dropout_prob": 0.1,
 
23
  "kv_shared_dim": null,
24
  "layer_norm_eps": 1e-12,
25
  "max_position_embeddings": 1024,
 
26
  "nope_dims": 32,
27
  "norm_type": "rmsnorm",
28
  "num_attention_heads": 12,
 
44
  "vocab_rank": null,
45
  "vocab_size": 50257,
46
  "vocab_subspace": false
47
+ }