Sentence Similarity
sentence-transformers
Safetensors
English
modernbert
feature-extraction
Generated from Trainer
dataset_size:6661966
loss:MultipleNegativesRankingLoss
loss:CachedMultipleNegativesRankingLoss
loss:SoftmaxLoss
loss:AnglELoss
loss:CoSENTLoss
loss:CosineSimilarityLoss
text-embeddings-inference
| { | |
| "_name_or_path": "tasksource/ModernBERT-base-nli", | |
| "architectures": [ | |
| "ModernBertModel" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 50281, | |
| "classifier_activation": "gelu", | |
| "classifier_bias": false, | |
| "classifier_dropout": 0.0, | |
| "classifier_pooling": "mean", | |
| "classifiers_size": [ | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 2, | |
| 2, | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 6, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 2, | |
| 4, | |
| 3, | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 2, | |
| 3, | |
| 2, | |
| 4, | |
| 3, | |
| 3, | |
| 3, | |
| 2, | |
| 3, | |
| 1, | |
| 2, | |
| 2, | |
| 3, | |
| 13, | |
| 2, | |
| 3, | |
| 2, | |
| 2, | |
| 3, | |
| 3, | |
| 2, | |
| 3, | |
| 3, | |
| 2, | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 4, | |
| 3, | |
| 3, | |
| 2, | |
| 2, | |
| 3, | |
| 3, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 4, | |
| 3, | |
| 2, | |
| 2, | |
| 3 | |
| ], | |
| "cls_token_id": 50281, | |
| "decoder_bias": true, | |
| "deterministic_flash_attn": false, | |
| "embedding_dropout": 0.0, | |
| "eos_token_id": 50282, | |
| "global_attn_every_n_layers": 3, | |
| "global_rope_theta": 160000.0, | |
| "gradient_checkpointing": false, | |
| "hidden_activation": "gelu", | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "entailment", | |
| "1": "neutral", | |
| "2": "contradiction" | |
| }, | |
| "initializer_cutoff_factor": 2.0, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 1152, | |
| "label2id": { | |
| "contradiction": 2, | |
| "entailment": 0, | |
| "neutral": 1 | |
| }, | |
| "layer_norm_eps": 1e-05, | |
| "local_attention": 128, | |
| "local_rope_theta": 10000.0, | |
| "max_position_embeddings": 2048, | |
| "mlp_bias": false, | |
| "mlp_dropout": 0.0, | |
| "model_type": "modernbert", | |
| "norm_bias": false, | |
| "norm_eps": 1e-05, | |
| "num_attention_heads": 12, | |
| "num_hidden_layers": 22, | |
| "pad_token_id": 50283, | |
| "position_embedding_type": "absolute", | |
| "problem_type": "single_label_classification", | |
| "reference_compile": true, | |
| "sep_token_id": 50282, | |
| "sparse_pred_ignore_index": -100, | |
| "sparse_prediction": false, | |
| "tasks": [ | |
| "glue/mnli", | |
| "glue/qnli", | |
| "glue/rte", | |
| "glue/wnli", | |
| "super_glue/boolq", | |
| "super_glue/cb", | |
| "anli/a1", | |
| "anli/a2", | |
| "anli/a3", | |
| "sick/label", | |
| "sick/entailment_AB", | |
| "snli", | |
| "scitail/snli_format", | |
| "hans", | |
| "WANLI", | |
| "recast/recast_sentiment", | |
| "recast/recast_verbcorner", | |
| "recast/recast_ner", | |
| "recast/recast_factuality", | |
| "recast/recast_puns", | |
| "recast/recast_kg_relations", | |
| "recast/recast_verbnet", | |
| "recast/recast_megaveridicality", | |
| "probability_words_nli/usnli", | |
| "probability_words_nli/reasoning_1hop", | |
| "probability_words_nli/reasoning_2hop", | |
| "nan-nli", | |
| "nli_fever", | |
| "breaking_nli", | |
| "conj_nli", | |
| "fracas", | |
| "dialogue_nli", | |
| "mpe", | |
| "dnc", | |
| "recast_white/fnplus", | |
| "recast_white/sprl", | |
| "recast_white/dpr", | |
| "robust_nli/IS_CS", | |
| "robust_nli/LI_LI", | |
| "robust_nli/ST_WO", | |
| "robust_nli/PI_SP", | |
| "robust_nli/PI_CD", | |
| "robust_nli/ST_SE", | |
| "robust_nli/ST_NE", | |
| "robust_nli/ST_LM", | |
| "robust_nli_is_sd", | |
| "robust_nli_li_ts", | |
| "add_one_rte", | |
| "cycic_classification", | |
| "lingnli", | |
| "monotonicity-entailment", | |
| "scinli", | |
| "naturallogic", | |
| "syntactic-augmentation-nli", | |
| "autotnli", | |
| "defeasible-nli/atomic", | |
| "defeasible-nli/snli", | |
| "help-nli", | |
| "nli-veridicality-transitivity", | |
| "lonli", | |
| "dadc-limit-nli", | |
| "folio", | |
| "tomi-nli", | |
| "puzzte", | |
| "temporal-nli", | |
| "counterfactually-augmented-snli", | |
| "cnli", | |
| "boolq-natural-perturbations", | |
| "equate", | |
| "chaos-mnli-ambiguity", | |
| "logiqa-2.0-nli", | |
| "mindgames", | |
| "ConTRoL-nli", | |
| "logical-fallacy", | |
| "conceptrules_v2", | |
| "zero-shot-label-nli", | |
| "scone", | |
| "monli", | |
| "SpaceNLI", | |
| "propsegment/nli", | |
| "SDOH-NLI", | |
| "scifact_entailment", | |
| "AdjectiveScaleProbe-nli", | |
| "resnli", | |
| "semantic_fragments_nli", | |
| "dataset_train_nli", | |
| "nlgraph", | |
| "ruletaker", | |
| "PARARULE-Plus", | |
| "logical-entailment", | |
| "nope", | |
| "LogicNLI", | |
| "contract-nli/contractnli_a/seg", | |
| "contract-nli/contractnli_b/full", | |
| "nli4ct_semeval2024", | |
| "biosift-nli", | |
| "SIGA-nli", | |
| "FOL-nli", | |
| "doc-nli", | |
| "mctest-nli", | |
| "natural-language-satisfiability", | |
| "idioms-nli", | |
| "lifecycle-entailment", | |
| "MSciNLI", | |
| "hover-3way/nli", | |
| "seahorse_summarization_evaluation", | |
| "babi_nli", | |
| "gen_debiased_nli" | |
| ], | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.48.0.dev0", | |
| "vocab_size": 50368 | |
| } | |