{"vocab_size": 256000, "emb_dim": 640, "n_heads": 4, "head_dim": 256, "n_kv_groups": 1, "n_layers": 18, "hidden_dim": 2048, "context_length": 32768, "layer_types": ["sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention"], "rope_local_base": 10000.0, "rope_base": 1000000.0, "sliding_window": 512, "qk_norm": true, "query_pre_attn_scalar": 256, "dtype": "torch.bfloat16"}