Update config.json

#2
by AlexGall - opened
Files changed (1) hide show
  1. config.json +19 -3
config.json CHANGED
@@ -32,9 +32,25 @@
32
  "pad_token_id": 128001,
33
  "mlp_bias": false,
34
  "head_dim": 128,
35
- "quantization_config": {
36
- "load_in_8bit": false,
37
- "load_in_4bit": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  },
39
  "auto_map": {
40
  "AutoConfig": "configuration_llama.LlamaConfig",
 
32
  "pad_token_id": 128001,
33
  "mlp_bias": false,
34
  "head_dim": 128,
35
+ "attention_implementation": "flash_attention_2",
36
+ "use_sliding_window": false,
37
+ "sliding_window": null,
38
+ "quantization_config": null,
39
+ "safetensors": {
40
+ "enabled": true,
41
+ "total_shards": 96,
42
+ "shard_pattern": "model-{:05d}-of-00096.safetensors",
43
+ "index_file": "model.safetensors.index.json",
44
+ "fast_loading": true,
45
+ "zero_copy": true
46
+ },
47
+ "optimization": {
48
+ "flash_attention": true,
49
+ "gradient_checkpointing": false,
50
+ "tensor_parallel_size": 2,
51
+ "pipeline_parallel_size": 1,
52
+ "gpu_memory_fraction": 0.95,
53
+ "max_batch_size": 32
54
  },
55
  "auto_map": {
56
  "AutoConfig": "configuration_llama.LlamaConfig",