Spaces:
Sleeping
Sleeping
Commit
·
aedfaa8
1
Parent(s):
bb73124
changes
Browse files- .gitignore +3 -0
- README.md +2 -3
- flare/params_binnedSpec.yaml +0 -122
- flare/params_filipGlobal.yaml +0 -95
- flare/params_formSpec.yaml +0 -121
- flare/params_jestr.yaml +0 -122
- flare/params_tmp.yaml +0 -125
- pretrained_models/.gitkeep +0 -0
.gitignore
CHANGED
|
@@ -207,3 +207,6 @@ marimo/_lsp/
|
|
| 207 |
__marimo__/
|
| 208 |
experiments/20250913_optimized_filip-model/*.ckpt
|
| 209 |
experiments/20250913_optimized_filip-model/*.ckpt
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
__marimo__/
|
| 208 |
experiments/20250913_optimized_filip-model/*.ckpt
|
| 209 |
experiments/20250913_optimized_filip-model/*.ckpt
|
| 210 |
+
|
| 211 |
+
data/
|
| 212 |
+
pretrained_models/*.ckpt
|
README.md
CHANGED
|
@@ -7,7 +7,6 @@ sdk: streamlit
|
|
| 7 |
pinned: false
|
| 8 |
python_version: 3.11.7
|
| 9 |
---
|
| 10 |
-
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 12 |
|
| 13 |
# 🔥 FLARE
|
|
@@ -15,12 +14,12 @@ Fine-grained Learning for Aligment of spectra-molecule REpresentation
|
|
| 15 |
|
| 16 |
### Yan Zhou Chen, Soha Hassoun
|
| 17 |
#### Department of Computer Science, Tufts University
|
| 18 |
-
This repository provides the implementation of
|
| 19 |
|
| 20 |
## 🛠 Set up
|
| 21 |
### Clone repository
|
| 22 |
```
|
| 23 |
-
git clone https://
|
| 24 |
cd flare
|
| 25 |
```
|
| 26 |
### Set up environment and install dependencies
|
|
|
|
| 7 |
pinned: false
|
| 8 |
python_version: 3.11.7
|
| 9 |
---
|
|
|
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 11 |
|
| 12 |
# 🔥 FLARE
|
|
|
|
| 14 |
|
| 15 |
### Yan Zhou Chen, Soha Hassoun
|
| 16 |
#### Department of Computer Science, Tufts University
|
| 17 |
+
This repository provides the implementation of FLARE. Given a mass spectrum, FLARE can be used to rank a set of molecular candidates. Further, FLARE enables visualization of peak-to-node attribution.
|
| 18 |
|
| 19 |
## 🛠 Set up
|
| 20 |
### Clone repository
|
| 21 |
```
|
| 22 |
+
git clone https://huggingface.co/spaces/HassounLab/FLARE
|
| 23 |
cd flare
|
| 24 |
```
|
| 25 |
### Set up environment and install dependencies
|
flare/params_binnedSpec.yaml
DELETED
|
@@ -1,122 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
# Experiment setup
|
| 3 |
-
job_key: ''
|
| 4 |
-
run_name: 'binnedSpec_experiment'
|
| 5 |
-
run_details: ""
|
| 6 |
-
project_name: ''
|
| 7 |
-
wandb_entity_name: 'mass-spec-ml'
|
| 8 |
-
no_wandb: True
|
| 9 |
-
seed: 0
|
| 10 |
-
debug: False
|
| 11 |
-
checkpoint_pth: ""
|
| 12 |
-
|
| 13 |
-
# Training setup
|
| 14 |
-
max_epochs: 1000
|
| 15 |
-
accelerator: 'gpu'
|
| 16 |
-
devices: [1]
|
| 17 |
-
log_every_n_steps: 250
|
| 18 |
-
val_check_interval: 1.0
|
| 19 |
-
|
| 20 |
-
# Data paths
|
| 21 |
-
candidates_pth: ../data/sample/candidates_mass.json
|
| 22 |
-
dataset_pth: "../data/sample/data.tsv"
|
| 23 |
-
subformula_dir_pth: ""
|
| 24 |
-
split_pth:
|
| 25 |
-
fp_dir_pth: '../data/sample/morganfp_r5_1024.pickle'
|
| 26 |
-
cons_spec_dir_pth: "../data/sample/consensus_binnedSpec.pkl"
|
| 27 |
-
NL_spec_dir_pth: ""
|
| 28 |
-
partial_checkpoint: ""
|
| 29 |
-
|
| 30 |
-
# General hyperparameters
|
| 31 |
-
batch_size: 64
|
| 32 |
-
lr: 5.0e-4
|
| 33 |
-
weight_decay: 0
|
| 34 |
-
contr_temp: 0.05
|
| 35 |
-
early_stopping_patience: 300
|
| 36 |
-
loss_strategy: 'static' # static, linear, manual
|
| 37 |
-
num_workers: 50
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
############################## Data transforms ##############################
|
| 41 |
-
# - Spectra
|
| 42 |
-
spectra_view: SpecBinnerLog
|
| 43 |
-
max_mz: 1000
|
| 44 |
-
bin_width: 1
|
| 45 |
-
mask_peak_ratio: 0.00
|
| 46 |
-
|
| 47 |
-
# 2. SpecFormula
|
| 48 |
-
element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
|
| 49 |
-
add_intensities: True
|
| 50 |
-
mask_precursor: False
|
| 51 |
-
|
| 52 |
-
# - Molecule
|
| 53 |
-
molecule_view: "MolGraph"
|
| 54 |
-
atom_feature: 'full'
|
| 55 |
-
bond_feature: 'full'
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
############################## Views ##############################
|
| 59 |
-
# contrastive
|
| 60 |
-
use_contr: True
|
| 61 |
-
contr_wt: 1
|
| 62 |
-
contr_wt_update: {}
|
| 63 |
-
|
| 64 |
-
# consensus spectra
|
| 65 |
-
use_cons_spec: False
|
| 66 |
-
cons_spec_wt: 3
|
| 67 |
-
cons_spec_wt_update: {}
|
| 68 |
-
cons_loss_type: 'l2' # cosine, l2
|
| 69 |
-
|
| 70 |
-
# fp prediction/usage
|
| 71 |
-
pred_fp: False
|
| 72 |
-
use_fp: False
|
| 73 |
-
fp_loss_type: 'cosine' #cosine, bce
|
| 74 |
-
fp_wt: 3
|
| 75 |
-
fp_wt_update: {}
|
| 76 |
-
fp_size: 1024
|
| 77 |
-
fp_radius: 5
|
| 78 |
-
fp_dropout: 0.4
|
| 79 |
-
|
| 80 |
-
# candidates
|
| 81 |
-
aug_cands: False
|
| 82 |
-
aug_cands_wt: 0.1
|
| 83 |
-
aug_cands_update: {}
|
| 84 |
-
aug_cands_size: 3
|
| 85 |
-
|
| 86 |
-
# neutral loss
|
| 87 |
-
use_NL: False
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
############################## Task and model ##############################
|
| 92 |
-
task: 'retrieval'
|
| 93 |
-
spec_enc: MLP_BIN
|
| 94 |
-
mol_enc: "GNN"
|
| 95 |
-
model: "MultiviewContrastive"
|
| 96 |
-
contr_views: [['spec_enc', 'mol_enc']]
|
| 97 |
-
log_only_loss_at_stages: []
|
| 98 |
-
df_test_path: ""
|
| 99 |
-
|
| 100 |
-
# - Spectra encoder
|
| 101 |
-
final_embedding_dim: 512
|
| 102 |
-
fc_dropout: 0.4
|
| 103 |
-
|
| 104 |
-
# - Spectra Token encoder
|
| 105 |
-
hidden_dims: [64, 128]
|
| 106 |
-
peak_dropout: 0.2
|
| 107 |
-
|
| 108 |
-
# - Formula-based spec encoders
|
| 109 |
-
formula_dropout: 0.2
|
| 110 |
-
formula_dims: [64, 128, 256]
|
| 111 |
-
cross_attn_heads: 2
|
| 112 |
-
use_cls: True
|
| 113 |
-
|
| 114 |
-
# -- GAT params
|
| 115 |
-
attn_heads: [12,12,12]
|
| 116 |
-
|
| 117 |
-
# - Molecule encoder (GNN)
|
| 118 |
-
gnn_channels: [64,128,256]
|
| 119 |
-
gnn_type: "gcn"
|
| 120 |
-
num_gnn_layers: 3
|
| 121 |
-
gnn_hidden_dim: 512
|
| 122 |
-
gnn_dropout: 0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flare/params_filipGlobal.yaml
DELETED
|
@@ -1,95 +0,0 @@
|
|
| 1 |
-
# Experiment setup
|
| 2 |
-
job_key: ''
|
| 3 |
-
run_name: 'filip-global'
|
| 4 |
-
run_details: ""
|
| 5 |
-
project_name: ''
|
| 6 |
-
wandb_entity_name: 'mass-spec-ml'
|
| 7 |
-
no_wandb: True
|
| 8 |
-
seed: 42
|
| 9 |
-
debug: False
|
| 10 |
-
checkpoint_pth:
|
| 11 |
-
|
| 12 |
-
# Training setup
|
| 13 |
-
max_epochs: 2000
|
| 14 |
-
accelerator: 'gpu'
|
| 15 |
-
devices: [1]
|
| 16 |
-
log_every_n_steps: 250
|
| 17 |
-
val_check_interval: 1.0
|
| 18 |
-
|
| 19 |
-
# Data paths
|
| 20 |
-
candidates_pth: /r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json # "../data/MassSpecGym/data/molecules/MassSpecGym_retrieval_candidates_formula.json"
|
| 21 |
-
dataset_pth: /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # /data/yzhouc01/MVP/data/sample/data.tsv #/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv #/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv # /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # "../data/MassSpecGym/data/sample_data.tsv"
|
| 22 |
-
subformula_dir_pth: /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/FILIP-MS/data/magma # /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default #/data/yzhouc01/spectra_data/subformulae #"../data/MassSpecGym/data/subformulae_default"
|
| 23 |
-
split_pth:
|
| 24 |
-
fp_dir_pth:
|
| 25 |
-
partial_checkpoint: ""
|
| 26 |
-
|
| 27 |
-
# General hyperparameters
|
| 28 |
-
batch_size: 64 #64
|
| 29 |
-
lr: 2.881339661302105e-05 # 5.0e-05
|
| 30 |
-
weight_decay: 1.8376229667330708e-05
|
| 31 |
-
contr_temp: 0.022772534845886608 # 0.022772534845886608 # 0.05
|
| 32 |
-
num_workers: 50
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
# FILIP_GLOBAL model parameters
|
| 36 |
-
loss_mode: "pcgrad"
|
| 37 |
-
agg_fn: "mean"
|
| 38 |
-
loss_weight: 1.1
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
############################## Data transforms ##############################
|
| 42 |
-
# - Spectra
|
| 43 |
-
spectra_view: SpecFormula #SpecMzIntTokens #SpecFormula
|
| 44 |
-
formula_source: 'default' # magma_1, magma_all, sirius, default
|
| 45 |
-
# 1. Binner
|
| 46 |
-
max_mz: 1000
|
| 47 |
-
bin_width: 1
|
| 48 |
-
mask_peak_ratio: 0.00
|
| 49 |
-
|
| 50 |
-
# 2. SpecFormula
|
| 51 |
-
element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
|
| 52 |
-
add_intensities: True
|
| 53 |
-
|
| 54 |
-
# - Molecule
|
| 55 |
-
molecule_view: "MolGraph"
|
| 56 |
-
atom_feature: 'full'
|
| 57 |
-
bond_feature: 'full'
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
############################## Task and model ##############################
|
| 61 |
-
task: 'retrieval'
|
| 62 |
-
spec_enc: Transformer_Formula # Transformer_MzInt #Transformer_Formula
|
| 63 |
-
mol_enc: "GNN"
|
| 64 |
-
model: filipGlobalContrastive #filipContrastive # "MultiviewContrastive"
|
| 65 |
-
contr_views: [['spec_enc', 'mol_enc']]
|
| 66 |
-
log_only_loss_at_stages: []
|
| 67 |
-
df_test_path: ""
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
# - Formula-based spec encoders
|
| 71 |
-
formula_dropout: 0.2
|
| 72 |
-
formula_dims: [512,256,512] #[512, 256, 512] #[64, 128, 256]
|
| 73 |
-
cross_attn_heads: 2
|
| 74 |
-
use_cls: False
|
| 75 |
-
peak_dropout: 0.2
|
| 76 |
-
formula_attn_heads: 4 # 2
|
| 77 |
-
formula_transformer_layers: 2 #2
|
| 78 |
-
|
| 79 |
-
# -- GAT params
|
| 80 |
-
attn_heads: [12,12,12]
|
| 81 |
-
|
| 82 |
-
# - Molecule encoder (GNN)
|
| 83 |
-
gnn_channels: [128, 256, 512] #[64,128,512]
|
| 84 |
-
gnn_type: "gcn"
|
| 85 |
-
# num_gnn_layers: 3
|
| 86 |
-
# gnn_hidden_dim: 512
|
| 87 |
-
gnn_dropout: 0.23234950970370824 #0.3
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
# - Spectra encoder (cross attention model)
|
| 91 |
-
# final_embedding_dim: 512
|
| 92 |
-
# fc_dropout: 0.4
|
| 93 |
-
|
| 94 |
-
# - Spectra Token encoder (mz-int token model)
|
| 95 |
-
# hidden_dims: [64, 256]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flare/params_formSpec.yaml
DELETED
|
@@ -1,121 +0,0 @@
|
|
| 1 |
-
# Experiment setup
|
| 2 |
-
job_key: ''
|
| 3 |
-
run_name: 'flare_sirius_labels_42'
|
| 4 |
-
run_details: ""
|
| 5 |
-
project_name: ''
|
| 6 |
-
wandb_entity_name: 'mass-spec-ml'
|
| 7 |
-
no_wandb: True
|
| 8 |
-
seed: 42
|
| 9 |
-
debug: False
|
| 10 |
-
checkpoint_pth:
|
| 11 |
-
|
| 12 |
-
# Training setup
|
| 13 |
-
max_epochs: 2000
|
| 14 |
-
accelerator: 'gpu'
|
| 15 |
-
devices: [1]
|
| 16 |
-
log_every_n_steps: 250
|
| 17 |
-
val_check_interval: 1.0
|
| 18 |
-
|
| 19 |
-
# Data paths
|
| 20 |
-
candidates_pth: /r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json # "../data/MassSpecGym/data/molecules/MassSpecGym_retrieval_candidates_formula.json"
|
| 21 |
-
dataset_pth: /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # /data/yzhouc01/MVP/data/sample/data.tsv #/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv #/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv # /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # "../data/MassSpecGym/data/sample_data.tsv"
|
| 22 |
-
subformula_dir_pth: /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/FILIP-MS/data/magma # /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default #/data/yzhouc01/spectra_data/subformulae #"../data/MassSpecGym/data/subformulae_default"
|
| 23 |
-
split_pth:
|
| 24 |
-
fp_dir_pth:
|
| 25 |
-
partial_checkpoint: ""
|
| 26 |
-
|
| 27 |
-
# General hyperparameters
|
| 28 |
-
batch_size: 64 #64
|
| 29 |
-
lr: 2.881339661302105e-05 # 5.0e-05
|
| 30 |
-
weight_decay: 1.8376229667330708e-05
|
| 31 |
-
contr_temp: 0.022772534845886608 # 0.022772534845886608 # 0.05
|
| 32 |
-
num_workers: 50
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
############################## Data transforms ##############################
|
| 36 |
-
# - Spectra
|
| 37 |
-
spectra_view: SpecFormula #SpecMzIntTokens #SpecFormula
|
| 38 |
-
formula_source: 'sirius' # magma_1, magma_all, sirius, default
|
| 39 |
-
# 1. Binner
|
| 40 |
-
max_mz: 1000
|
| 41 |
-
bin_width: 1
|
| 42 |
-
mask_peak_ratio: 0.00
|
| 43 |
-
|
| 44 |
-
# 2. SpecFormula
|
| 45 |
-
element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
|
| 46 |
-
add_intensities: True
|
| 47 |
-
|
| 48 |
-
# - Molecule
|
| 49 |
-
molecule_view: "MolGraph"
|
| 50 |
-
atom_feature: 'full'
|
| 51 |
-
bond_feature: 'full'
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
############################## Views ##############################
|
| 55 |
-
# contrastive
|
| 56 |
-
# use_contr: False
|
| 57 |
-
# contr_wt: 1
|
| 58 |
-
# contr_wt_update: {}
|
| 59 |
-
|
| 60 |
-
# consensus spectra
|
| 61 |
-
# use_cons_spec: False
|
| 62 |
-
# cons_spec_wt: 3
|
| 63 |
-
# cons_spec_wt_update: {}
|
| 64 |
-
# cons_loss_type: 'l2' # cosine, l2
|
| 65 |
-
|
| 66 |
-
# fp prediction/usage
|
| 67 |
-
# pred_fp: False
|
| 68 |
-
# use_fp: False
|
| 69 |
-
# fp_loss_type: 'cosine' #cosine, bce
|
| 70 |
-
# fp_wt: 3
|
| 71 |
-
# fp_wt_update: {}
|
| 72 |
-
# fp_size: 1024
|
| 73 |
-
# fp_radius: 5
|
| 74 |
-
# fp_dropout: 0.4
|
| 75 |
-
|
| 76 |
-
# candidates
|
| 77 |
-
# aug_cands: False
|
| 78 |
-
# aug_cands_wt: 0.1
|
| 79 |
-
# aug_cands_update: {}
|
| 80 |
-
# aug_cands_size: 3
|
| 81 |
-
|
| 82 |
-
# neutral loss
|
| 83 |
-
# use_NL: False
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
############################## Task and model ##############################
|
| 87 |
-
task: 'retrieval'
|
| 88 |
-
spec_enc: Transformer_Formula # Transformer_MzInt #Transformer_Formula
|
| 89 |
-
mol_enc: "GNN"
|
| 90 |
-
model: filipContrastive # "MultiviewContrastive"
|
| 91 |
-
contr_views: [['spec_enc', 'mol_enc']]
|
| 92 |
-
log_only_loss_at_stages: []
|
| 93 |
-
df_test_path: ""
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
# - Formula-based spec encoders
|
| 97 |
-
formula_dropout: 0.2
|
| 98 |
-
formula_dims: [512,256,512] #[512, 256, 512] #[64, 128, 256]
|
| 99 |
-
cross_attn_heads: 2
|
| 100 |
-
use_cls: False
|
| 101 |
-
peak_dropout: 0.2
|
| 102 |
-
formula_attn_heads: 4 # 2
|
| 103 |
-
formula_transformer_layers: 2 #2
|
| 104 |
-
|
| 105 |
-
# -- GAT params
|
| 106 |
-
attn_heads: [12,12,12]
|
| 107 |
-
|
| 108 |
-
# - Molecule encoder (GNN)
|
| 109 |
-
gnn_channels: [128, 256, 512] #[64,128,512]
|
| 110 |
-
gnn_type: "gcn"
|
| 111 |
-
# num_gnn_layers: 3
|
| 112 |
-
# gnn_hidden_dim: 512
|
| 113 |
-
gnn_dropout: 0.23234950970370824 #0.3
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
# - Spectra encoder (cross attention model)
|
| 117 |
-
# final_embedding_dim: 512
|
| 118 |
-
# fc_dropout: 0.4
|
| 119 |
-
|
| 120 |
-
# - Spectra Token encoder (mz-int token model)
|
| 121 |
-
# hidden_dims: [64, 256]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flare/params_jestr.yaml
DELETED
|
@@ -1,122 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
# Experiment setup
|
| 3 |
-
job_key: ''
|
| 4 |
-
run_name: 'combined_d_1024dim_100bs'
|
| 5 |
-
run_details: ""
|
| 6 |
-
project_name: ''
|
| 7 |
-
wandb_entity_name: 'mass-spec-ml'
|
| 8 |
-
no_wandb: True
|
| 9 |
-
seed: 3
|
| 10 |
-
debug: False
|
| 11 |
-
checkpoint_pth:
|
| 12 |
-
|
| 13 |
-
# Training setup
|
| 14 |
-
max_epochs: 2000
|
| 15 |
-
accelerator: 'gpu'
|
| 16 |
-
devices: [1]
|
| 17 |
-
log_every_n_steps: 250
|
| 18 |
-
val_check_interval: 1.0
|
| 19 |
-
|
| 20 |
-
# Data paths
|
| 21 |
-
candidates_pth: "/r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json"
|
| 22 |
-
dataset_pth: '/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv' # '/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv' #"/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv"
|
| 23 |
-
subformula_dir_pth: ""
|
| 24 |
-
split_pth:
|
| 25 |
-
fp_dir_pth: ''
|
| 26 |
-
cons_spec_dir_pth:
|
| 27 |
-
NL_spec_dir_pth: ""
|
| 28 |
-
partial_checkpoint: ""
|
| 29 |
-
|
| 30 |
-
# General hyperparameters
|
| 31 |
-
batch_size: 100
|
| 32 |
-
lr: 5.0e-4
|
| 33 |
-
weight_decay: 0
|
| 34 |
-
contr_temp: 0.05
|
| 35 |
-
early_stopping_patience: 300
|
| 36 |
-
loss_strategy: 'static' # static, linear, manual
|
| 37 |
-
num_workers: 50
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
############################## Data transforms ##############################
|
| 41 |
-
# - Spectra
|
| 42 |
-
spectra_view: SpecBinnerLog
|
| 43 |
-
max_mz: 1000
|
| 44 |
-
bin_width: 1
|
| 45 |
-
mask_peak_ratio: 0.00
|
| 46 |
-
|
| 47 |
-
# 2. SpecFormula
|
| 48 |
-
element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
|
| 49 |
-
add_intensities: True
|
| 50 |
-
mask_precursor: False
|
| 51 |
-
|
| 52 |
-
# - Molecule
|
| 53 |
-
molecule_view: "MolGraph"
|
| 54 |
-
atom_feature: 'full'
|
| 55 |
-
bond_feature: 'full'
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
############################## Views ##############################
|
| 59 |
-
# contrastive
|
| 60 |
-
use_contr: True
|
| 61 |
-
contr_wt: 1
|
| 62 |
-
contr_wt_update: {}
|
| 63 |
-
|
| 64 |
-
# consensus spectra
|
| 65 |
-
use_cons_spec: False
|
| 66 |
-
cons_spec_wt: 3
|
| 67 |
-
cons_spec_wt_update: {}
|
| 68 |
-
cons_loss_type: 'l2' # cosine, l2
|
| 69 |
-
|
| 70 |
-
# fp prediction/usage
|
| 71 |
-
pred_fp: False
|
| 72 |
-
use_fp: False
|
| 73 |
-
fp_loss_type: 'cosine' #cosine, bce
|
| 74 |
-
fp_wt: 3
|
| 75 |
-
fp_wt_update: {}
|
| 76 |
-
fp_size: 1024
|
| 77 |
-
fp_radius: 5
|
| 78 |
-
fp_dropout: 0.4
|
| 79 |
-
|
| 80 |
-
# candidates
|
| 81 |
-
aug_cands: False
|
| 82 |
-
aug_cands_wt: 0.1
|
| 83 |
-
aug_cands_update: {}
|
| 84 |
-
aug_cands_size: 3
|
| 85 |
-
|
| 86 |
-
# neutral loss
|
| 87 |
-
use_NL: False
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
############################## Task and model ##############################
|
| 92 |
-
task: 'retrieval'
|
| 93 |
-
spec_enc: MLP_BIN
|
| 94 |
-
mol_enc: "GNN"
|
| 95 |
-
model: "MultiviewContrastive"
|
| 96 |
-
contr_views: [['spec_enc', 'mol_enc']]
|
| 97 |
-
log_only_loss_at_stages: []
|
| 98 |
-
df_test_path: ""
|
| 99 |
-
|
| 100 |
-
# - Spectra encoder
|
| 101 |
-
final_embedding_dim: 1024
|
| 102 |
-
fc_dropout: 0.4
|
| 103 |
-
|
| 104 |
-
# - Spectra Token encoder
|
| 105 |
-
hidden_dims: [64, 128]
|
| 106 |
-
peak_dropout: 0.2
|
| 107 |
-
|
| 108 |
-
# - Formula-based spec encoders
|
| 109 |
-
formula_dropout: 0.2
|
| 110 |
-
formula_dims: [64, 128, 256]
|
| 111 |
-
cross_attn_heads: 2
|
| 112 |
-
use_cls: True
|
| 113 |
-
|
| 114 |
-
# -- GAT params
|
| 115 |
-
attn_heads: [12,12,12]
|
| 116 |
-
|
| 117 |
-
# - Molecule encoder (GNN)
|
| 118 |
-
gnn_channels: [64,128,256]
|
| 119 |
-
gnn_type: "gcn"
|
| 120 |
-
num_gnn_layers: 3
|
| 121 |
-
gnn_hidden_dim: 1024
|
| 122 |
-
gnn_dropout: 0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flare/params_tmp.yaml
DELETED
|
@@ -1,125 +0,0 @@
|
|
| 1 |
-
# Experiment setup
|
| 2 |
-
job_key: ''
|
| 3 |
-
run_name: 'crossAttnModel'
|
| 4 |
-
run_details: ""
|
| 5 |
-
project_name: ''
|
| 6 |
-
wandb_entity_name: 'mass-spec-ml'
|
| 7 |
-
no_wandb: True
|
| 8 |
-
seed: 0
|
| 9 |
-
debug: False
|
| 10 |
-
checkpoint_pth:
|
| 11 |
-
|
| 12 |
-
# Training setup
|
| 13 |
-
max_epochs: 1000
|
| 14 |
-
accelerator: 'gpu'
|
| 15 |
-
devices: [1]
|
| 16 |
-
log_every_n_steps: 250
|
| 17 |
-
val_check_interval: 1.0
|
| 18 |
-
|
| 19 |
-
# Data paths
|
| 20 |
-
candidates_pth: /r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json # "../data/MassSpecGym/data/molecules/MassSpecGym_retrieval_candidates_formula.json"
|
| 21 |
-
dataset_pth: /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # /data/yzhouc01/MVP/data/sample/data.tsv #/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv #/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv # /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # "../data/MassSpecGym/data/sample_data.tsv"
|
| 22 |
-
subformula_dir_pth: /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/FILIP-MS/data/magma # /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default #/data/yzhouc01/spectra_data/subformulae #"../data/MassSpecGym/data/subformulae_default"
|
| 23 |
-
split_pth:
|
| 24 |
-
fp_dir_pth:
|
| 25 |
-
cons_spec_dir_pth:
|
| 26 |
-
NL_spec_dir_pth: ""
|
| 27 |
-
partial_checkpoint: ""
|
| 28 |
-
|
| 29 |
-
# General hyperparameters
|
| 30 |
-
batch_size: 64
|
| 31 |
-
lr: 2.881339661302105e-05 # 5.0e-05
|
| 32 |
-
weight_decay: 1.1586679936312845e-05
|
| 33 |
-
contr_temp: 0.022772534845886608 # 0.05
|
| 34 |
-
early_stopping_patience: 300
|
| 35 |
-
loss_strategy: 'static'
|
| 36 |
-
num_workers: 50
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
############################## Data transforms ##############################
|
| 40 |
-
# - Spectra
|
| 41 |
-
spectra_view: SpecFormula #SpecMzIntTokens #SpecFormula
|
| 42 |
-
formula_source: 'default' # magma_1, magma_all, sirius, default
|
| 43 |
-
# 1. Binner
|
| 44 |
-
max_mz: 1000
|
| 45 |
-
bin_width: 1
|
| 46 |
-
mask_peak_ratio: 0.00
|
| 47 |
-
|
| 48 |
-
# 2. SpecFormula
|
| 49 |
-
element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
|
| 50 |
-
add_intensities: True
|
| 51 |
-
mask_precursor: False
|
| 52 |
-
|
| 53 |
-
# - Molecule
|
| 54 |
-
molecule_view: "MolGraph"
|
| 55 |
-
atom_feature: 'full'
|
| 56 |
-
bond_feature: 'full'
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
############################## Views ##############################
|
| 60 |
-
# contrastive
|
| 61 |
-
use_contr: False
|
| 62 |
-
contr_wt: 1
|
| 63 |
-
contr_wt_update: {}
|
| 64 |
-
|
| 65 |
-
# consensus spectra
|
| 66 |
-
use_cons_spec: False
|
| 67 |
-
cons_spec_wt: 3
|
| 68 |
-
cons_spec_wt_update: {}
|
| 69 |
-
cons_loss_type: 'l2' # cosine, l2
|
| 70 |
-
|
| 71 |
-
# fp prediction/usage
|
| 72 |
-
pred_fp: False
|
| 73 |
-
use_fp: False
|
| 74 |
-
fp_loss_type: 'cosine' #cosine, bce
|
| 75 |
-
fp_wt: 3
|
| 76 |
-
fp_wt_update: {}
|
| 77 |
-
fp_size: 1024
|
| 78 |
-
fp_radius: 5
|
| 79 |
-
fp_dropout: 0.4
|
| 80 |
-
|
| 81 |
-
# candidates
|
| 82 |
-
aug_cands: False
|
| 83 |
-
aug_cands_wt: 0.1
|
| 84 |
-
aug_cands_update: {}
|
| 85 |
-
aug_cands_size: 3
|
| 86 |
-
|
| 87 |
-
# neutral loss
|
| 88 |
-
use_NL: False
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
############################## Task and model ##############################
|
| 92 |
-
task: 'retrieval'
|
| 93 |
-
spec_enc: Transformer_Formula # Transformer_MzInt #Transformer_Formula
|
| 94 |
-
mol_enc: "GNN"
|
| 95 |
-
model: crossAttenContrastive # "MultiviewContrastive"
|
| 96 |
-
contr_views: [['spec_enc', 'mol_enc']] #[['spec_enc', 'mol_enc'], ['spec_enc', 'NL_spec_enc'], ['mol_enc', 'NL_spec_enc']] #[['spec_enc', 'mol_enc'], ['mol_enc', 'cons_spec_enc'], ['cons_spec_enc', 'spec_enc'], ['fp_enc', 'mol_enc'], ['fp_enc', 'spec_enc'], ['fp_enc', 'cons_spec_enc']]
|
| 97 |
-
log_only_loss_at_stages: []
|
| 98 |
-
df_test_path: ""
|
| 99 |
-
|
| 100 |
-
# - Spectra encoder
|
| 101 |
-
final_embedding_dim: 512
|
| 102 |
-
fc_dropout: 0.4
|
| 103 |
-
|
| 104 |
-
# - Spectra Token encoder
|
| 105 |
-
hidden_dims: [64, 128]
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
# - Formula-based spec encoders
|
| 109 |
-
formula_dropout: 0.2
|
| 110 |
-
formula_dims: [128, 256, 512] #[64, 128, 256]
|
| 111 |
-
cross_attn_heads: 4 # 2
|
| 112 |
-
use_cls: False
|
| 113 |
-
peak_dropout: 0.414425691950033 # 0.2
|
| 114 |
-
formula_attn_heads: 4 # 2
|
| 115 |
-
formula_transformer_layers: 2
|
| 116 |
-
|
| 117 |
-
# -- GAT params
|
| 118 |
-
attn_heads: [12,12,12]
|
| 119 |
-
|
| 120 |
-
# - Molecule encoder (GNN)
|
| 121 |
-
gnn_channels: [64,128,512]
|
| 122 |
-
gnn_type: "gcn"
|
| 123 |
-
num_gnn_layers: 3
|
| 124 |
-
gnn_hidden_dim: 512
|
| 125 |
-
gnn_dropout: 0.23234950970370824 #0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pretrained_models/.gitkeep
ADDED
|
File without changes
|