yzhouchen001 commited on
Commit
aedfaa8
·
1 Parent(s): bb73124
.gitignore CHANGED
@@ -207,3 +207,6 @@ marimo/_lsp/
207
  __marimo__/
208
  experiments/20250913_optimized_filip-model/*.ckpt
209
  experiments/20250913_optimized_filip-model/*.ckpt
 
 
 
 
207
  __marimo__/
208
  experiments/20250913_optimized_filip-model/*.ckpt
209
  experiments/20250913_optimized_filip-model/*.ckpt
210
+
211
+ data/
212
+ pretrained_models/*.ckpt
README.md CHANGED
@@ -7,7 +7,6 @@ sdk: streamlit
7
  pinned: false
8
  python_version: 3.11.7
9
  ---
10
-
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
 
13
  # 🔥 FLARE
@@ -15,12 +14,12 @@ Fine-grained Learning for Aligment of spectra-molecule REpresentation
15
 
16
  ### Yan Zhou Chen, Soha Hassoun
17
  #### Department of Computer Science, Tufts University
18
- This repository provides the implementation of FALRE. Given a mass spectrum, FLARE can be used to rank a set of molecular candidates. Further, FLARE enables visualization of peak-to-node attribution.
19
 
20
  ## 🛠 Set up
21
  ### Clone repository
22
  ```
23
- git clone https://github.com/hassounlab/flare
24
  cd flare
25
  ```
26
  ### Set up environment and install dependencies
 
7
  pinned: false
8
  python_version: 3.11.7
9
  ---
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
 
12
  # 🔥 FLARE
 
14
 
15
  ### Yan Zhou Chen, Soha Hassoun
16
  #### Department of Computer Science, Tufts University
17
+ This repository provides the implementation of FLARE. Given a mass spectrum, FLARE can be used to rank a set of molecular candidates. Further, FLARE enables visualization of peak-to-node attribution.
18
 
19
  ## 🛠 Set up
20
  ### Clone repository
21
  ```
22
+ git clone https://huggingface.co/spaces/HassounLab/FLARE
23
  cd flare
24
  ```
25
  ### Set up environment and install dependencies
flare/params_binnedSpec.yaml DELETED
@@ -1,122 +0,0 @@
1
-
2
- # Experiment setup
3
- job_key: ''
4
- run_name: 'binnedSpec_experiment'
5
- run_details: ""
6
- project_name: ''
7
- wandb_entity_name: 'mass-spec-ml'
8
- no_wandb: True
9
- seed: 0
10
- debug: False
11
- checkpoint_pth: ""
12
-
13
- # Training setup
14
- max_epochs: 1000
15
- accelerator: 'gpu'
16
- devices: [1]
17
- log_every_n_steps: 250
18
- val_check_interval: 1.0
19
-
20
- # Data paths
21
- candidates_pth: ../data/sample/candidates_mass.json
22
- dataset_pth: "../data/sample/data.tsv"
23
- subformula_dir_pth: ""
24
- split_pth:
25
- fp_dir_pth: '../data/sample/morganfp_r5_1024.pickle'
26
- cons_spec_dir_pth: "../data/sample/consensus_binnedSpec.pkl"
27
- NL_spec_dir_pth: ""
28
- partial_checkpoint: ""
29
-
30
- # General hyperparameters
31
- batch_size: 64
32
- lr: 5.0e-4
33
- weight_decay: 0
34
- contr_temp: 0.05
35
- early_stopping_patience: 300
36
- loss_strategy: 'static' # static, linear, manual
37
- num_workers: 50
38
-
39
-
40
- ############################## Data transforms ##############################
41
- # - Spectra
42
- spectra_view: SpecBinnerLog
43
- max_mz: 1000
44
- bin_width: 1
45
- mask_peak_ratio: 0.00
46
-
47
- # 2. SpecFormula
48
- element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
49
- add_intensities: True
50
- mask_precursor: False
51
-
52
- # - Molecule
53
- molecule_view: "MolGraph"
54
- atom_feature: 'full'
55
- bond_feature: 'full'
56
-
57
-
58
- ############################## Views ##############################
59
- # contrastive
60
- use_contr: True
61
- contr_wt: 1
62
- contr_wt_update: {}
63
-
64
- # consensus spectra
65
- use_cons_spec: False
66
- cons_spec_wt: 3
67
- cons_spec_wt_update: {}
68
- cons_loss_type: 'l2' # cosine, l2
69
-
70
- # fp prediction/usage
71
- pred_fp: False
72
- use_fp: False
73
- fp_loss_type: 'cosine' #cosine, bce
74
- fp_wt: 3
75
- fp_wt_update: {}
76
- fp_size: 1024
77
- fp_radius: 5
78
- fp_dropout: 0.4
79
-
80
- # candidates
81
- aug_cands: False
82
- aug_cands_wt: 0.1
83
- aug_cands_update: {}
84
- aug_cands_size: 3
85
-
86
- # neutral loss
87
- use_NL: False
88
-
89
-
90
-
91
- ############################## Task and model ##############################
92
- task: 'retrieval'
93
- spec_enc: MLP_BIN
94
- mol_enc: "GNN"
95
- model: "MultiviewContrastive"
96
- contr_views: [['spec_enc', 'mol_enc']]
97
- log_only_loss_at_stages: []
98
- df_test_path: ""
99
-
100
- # - Spectra encoder
101
- final_embedding_dim: 512
102
- fc_dropout: 0.4
103
-
104
- # - Spectra Token encoder
105
- hidden_dims: [64, 128]
106
- peak_dropout: 0.2
107
-
108
- # - Formula-based spec encoders
109
- formula_dropout: 0.2
110
- formula_dims: [64, 128, 256]
111
- cross_attn_heads: 2
112
- use_cls: True
113
-
114
- # -- GAT params
115
- attn_heads: [12,12,12]
116
-
117
- # - Molecule encoder (GNN)
118
- gnn_channels: [64,128,256]
119
- gnn_type: "gcn"
120
- num_gnn_layers: 3
121
- gnn_hidden_dim: 512
122
- gnn_dropout: 0.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
flare/params_filipGlobal.yaml DELETED
@@ -1,95 +0,0 @@
1
- # Experiment setup
2
- job_key: ''
3
- run_name: 'filip-global'
4
- run_details: ""
5
- project_name: ''
6
- wandb_entity_name: 'mass-spec-ml'
7
- no_wandb: True
8
- seed: 42
9
- debug: False
10
- checkpoint_pth:
11
-
12
- # Training setup
13
- max_epochs: 2000
14
- accelerator: 'gpu'
15
- devices: [1]
16
- log_every_n_steps: 250
17
- val_check_interval: 1.0
18
-
19
- # Data paths
20
- candidates_pth: /r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json # "../data/MassSpecGym/data/molecules/MassSpecGym_retrieval_candidates_formula.json"
21
- dataset_pth: /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # /data/yzhouc01/MVP/data/sample/data.tsv #/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv #/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv # /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # "../data/MassSpecGym/data/sample_data.tsv"
22
- subformula_dir_pth: /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/FILIP-MS/data/magma # /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default #/data/yzhouc01/spectra_data/subformulae #"../data/MassSpecGym/data/subformulae_default"
23
- split_pth:
24
- fp_dir_pth:
25
- partial_checkpoint: ""
26
-
27
- # General hyperparameters
28
- batch_size: 64 #64
29
- lr: 2.881339661302105e-05 # 5.0e-05
30
- weight_decay: 1.8376229667330708e-05
31
- contr_temp: 0.022772534845886608 # 0.022772534845886608 # 0.05
32
- num_workers: 50
33
-
34
-
35
- # FILIP_GLOBAL model parameters
36
- loss_mode: "pcgrad"
37
- agg_fn: "mean"
38
- loss_weight: 1.1
39
-
40
-
41
- ############################## Data transforms ##############################
42
- # - Spectra
43
- spectra_view: SpecFormula #SpecMzIntTokens #SpecFormula
44
- formula_source: 'default' # magma_1, magma_all, sirius, default
45
- # 1. Binner
46
- max_mz: 1000
47
- bin_width: 1
48
- mask_peak_ratio: 0.00
49
-
50
- # 2. SpecFormula
51
- element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
52
- add_intensities: True
53
-
54
- # - Molecule
55
- molecule_view: "MolGraph"
56
- atom_feature: 'full'
57
- bond_feature: 'full'
58
-
59
-
60
- ############################## Task and model ##############################
61
- task: 'retrieval'
62
- spec_enc: Transformer_Formula # Transformer_MzInt #Transformer_Formula
63
- mol_enc: "GNN"
64
- model: filipGlobalContrastive #filipContrastive # "MultiviewContrastive"
65
- contr_views: [['spec_enc', 'mol_enc']]
66
- log_only_loss_at_stages: []
67
- df_test_path: ""
68
-
69
-
70
- # - Formula-based spec encoders
71
- formula_dropout: 0.2
72
- formula_dims: [512,256,512] #[512, 256, 512] #[64, 128, 256]
73
- cross_attn_heads: 2
74
- use_cls: False
75
- peak_dropout: 0.2
76
- formula_attn_heads: 4 # 2
77
- formula_transformer_layers: 2 #2
78
-
79
- # -- GAT params
80
- attn_heads: [12,12,12]
81
-
82
- # - Molecule encoder (GNN)
83
- gnn_channels: [128, 256, 512] #[64,128,512]
84
- gnn_type: "gcn"
85
- # num_gnn_layers: 3
86
- # gnn_hidden_dim: 512
87
- gnn_dropout: 0.23234950970370824 #0.3
88
-
89
-
90
- # - Spectra encoder (cross attention model)
91
- # final_embedding_dim: 512
92
- # fc_dropout: 0.4
93
-
94
- # - Spectra Token encoder (mz-int token model)
95
- # hidden_dims: [64, 256]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
flare/params_formSpec.yaml DELETED
@@ -1,121 +0,0 @@
1
- # Experiment setup
2
- job_key: ''
3
- run_name: 'flare_sirius_labels_42'
4
- run_details: ""
5
- project_name: ''
6
- wandb_entity_name: 'mass-spec-ml'
7
- no_wandb: True
8
- seed: 42
9
- debug: False
10
- checkpoint_pth:
11
-
12
- # Training setup
13
- max_epochs: 2000
14
- accelerator: 'gpu'
15
- devices: [1]
16
- log_every_n_steps: 250
17
- val_check_interval: 1.0
18
-
19
- # Data paths
20
- candidates_pth: /r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json # "../data/MassSpecGym/data/molecules/MassSpecGym_retrieval_candidates_formula.json"
21
- dataset_pth: /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # /data/yzhouc01/MVP/data/sample/data.tsv #/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv #/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv # /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # "../data/MassSpecGym/data/sample_data.tsv"
22
- subformula_dir_pth: /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/FILIP-MS/data/magma # /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default #/data/yzhouc01/spectra_data/subformulae #"../data/MassSpecGym/data/subformulae_default"
23
- split_pth:
24
- fp_dir_pth:
25
- partial_checkpoint: ""
26
-
27
- # General hyperparameters
28
- batch_size: 64 #64
29
- lr: 2.881339661302105e-05 # 5.0e-05
30
- weight_decay: 1.8376229667330708e-05
31
- contr_temp: 0.022772534845886608 # 0.022772534845886608 # 0.05
32
- num_workers: 50
33
-
34
-
35
- ############################## Data transforms ##############################
36
- # - Spectra
37
- spectra_view: SpecFormula #SpecMzIntTokens #SpecFormula
38
- formula_source: 'sirius' # magma_1, magma_all, sirius, default
39
- # 1. Binner
40
- max_mz: 1000
41
- bin_width: 1
42
- mask_peak_ratio: 0.00
43
-
44
- # 2. SpecFormula
45
- element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
46
- add_intensities: True
47
-
48
- # - Molecule
49
- molecule_view: "MolGraph"
50
- atom_feature: 'full'
51
- bond_feature: 'full'
52
-
53
-
54
- ############################## Views ##############################
55
- # contrastive
56
- # use_contr: False
57
- # contr_wt: 1
58
- # contr_wt_update: {}
59
-
60
- # consensus spectra
61
- # use_cons_spec: False
62
- # cons_spec_wt: 3
63
- # cons_spec_wt_update: {}
64
- # cons_loss_type: 'l2' # cosine, l2
65
-
66
- # fp prediction/usage
67
- # pred_fp: False
68
- # use_fp: False
69
- # fp_loss_type: 'cosine' #cosine, bce
70
- # fp_wt: 3
71
- # fp_wt_update: {}
72
- # fp_size: 1024
73
- # fp_radius: 5
74
- # fp_dropout: 0.4
75
-
76
- # candidates
77
- # aug_cands: False
78
- # aug_cands_wt: 0.1
79
- # aug_cands_update: {}
80
- # aug_cands_size: 3
81
-
82
- # neutral loss
83
- # use_NL: False
84
-
85
-
86
- ############################## Task and model ##############################
87
- task: 'retrieval'
88
- spec_enc: Transformer_Formula # Transformer_MzInt #Transformer_Formula
89
- mol_enc: "GNN"
90
- model: filipContrastive # "MultiviewContrastive"
91
- contr_views: [['spec_enc', 'mol_enc']]
92
- log_only_loss_at_stages: []
93
- df_test_path: ""
94
-
95
-
96
- # - Formula-based spec encoders
97
- formula_dropout: 0.2
98
- formula_dims: [512,256,512] #[512, 256, 512] #[64, 128, 256]
99
- cross_attn_heads: 2
100
- use_cls: False
101
- peak_dropout: 0.2
102
- formula_attn_heads: 4 # 2
103
- formula_transformer_layers: 2 #2
104
-
105
- # -- GAT params
106
- attn_heads: [12,12,12]
107
-
108
- # - Molecule encoder (GNN)
109
- gnn_channels: [128, 256, 512] #[64,128,512]
110
- gnn_type: "gcn"
111
- # num_gnn_layers: 3
112
- # gnn_hidden_dim: 512
113
- gnn_dropout: 0.23234950970370824 #0.3
114
-
115
-
116
- # - Spectra encoder (cross attention model)
117
- # final_embedding_dim: 512
118
- # fc_dropout: 0.4
119
-
120
- # - Spectra Token encoder (mz-int token model)
121
- # hidden_dims: [64, 256]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
flare/params_jestr.yaml DELETED
@@ -1,122 +0,0 @@
1
-
2
- # Experiment setup
3
- job_key: ''
4
- run_name: 'combined_d_1024dim_100bs'
5
- run_details: ""
6
- project_name: ''
7
- wandb_entity_name: 'mass-spec-ml'
8
- no_wandb: True
9
- seed: 3
10
- debug: False
11
- checkpoint_pth:
12
-
13
- # Training setup
14
- max_epochs: 2000
15
- accelerator: 'gpu'
16
- devices: [1]
17
- log_every_n_steps: 250
18
- val_check_interval: 1.0
19
-
20
- # Data paths
21
- candidates_pth: "/r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json"
22
- dataset_pth: '/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv' # '/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv' #"/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv"
23
- subformula_dir_pth: ""
24
- split_pth:
25
- fp_dir_pth: ''
26
- cons_spec_dir_pth:
27
- NL_spec_dir_pth: ""
28
- partial_checkpoint: ""
29
-
30
- # General hyperparameters
31
- batch_size: 100
32
- lr: 5.0e-4
33
- weight_decay: 0
34
- contr_temp: 0.05
35
- early_stopping_patience: 300
36
- loss_strategy: 'static' # static, linear, manual
37
- num_workers: 50
38
-
39
-
40
- ############################## Data transforms ##############################
41
- # - Spectra
42
- spectra_view: SpecBinnerLog
43
- max_mz: 1000
44
- bin_width: 1
45
- mask_peak_ratio: 0.00
46
-
47
- # 2. SpecFormula
48
- element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
49
- add_intensities: True
50
- mask_precursor: False
51
-
52
- # - Molecule
53
- molecule_view: "MolGraph"
54
- atom_feature: 'full'
55
- bond_feature: 'full'
56
-
57
-
58
- ############################## Views ##############################
59
- # contrastive
60
- use_contr: True
61
- contr_wt: 1
62
- contr_wt_update: {}
63
-
64
- # consensus spectra
65
- use_cons_spec: False
66
- cons_spec_wt: 3
67
- cons_spec_wt_update: {}
68
- cons_loss_type: 'l2' # cosine, l2
69
-
70
- # fp prediction/usage
71
- pred_fp: False
72
- use_fp: False
73
- fp_loss_type: 'cosine' #cosine, bce
74
- fp_wt: 3
75
- fp_wt_update: {}
76
- fp_size: 1024
77
- fp_radius: 5
78
- fp_dropout: 0.4
79
-
80
- # candidates
81
- aug_cands: False
82
- aug_cands_wt: 0.1
83
- aug_cands_update: {}
84
- aug_cands_size: 3
85
-
86
- # neutral loss
87
- use_NL: False
88
-
89
-
90
-
91
- ############################## Task and model ##############################
92
- task: 'retrieval'
93
- spec_enc: MLP_BIN
94
- mol_enc: "GNN"
95
- model: "MultiviewContrastive"
96
- contr_views: [['spec_enc', 'mol_enc']]
97
- log_only_loss_at_stages: []
98
- df_test_path: ""
99
-
100
- # - Spectra encoder
101
- final_embedding_dim: 1024
102
- fc_dropout: 0.4
103
-
104
- # - Spectra Token encoder
105
- hidden_dims: [64, 128]
106
- peak_dropout: 0.2
107
-
108
- # - Formula-based spec encoders
109
- formula_dropout: 0.2
110
- formula_dims: [64, 128, 256]
111
- cross_attn_heads: 2
112
- use_cls: True
113
-
114
- # -- GAT params
115
- attn_heads: [12,12,12]
116
-
117
- # - Molecule encoder (GNN)
118
- gnn_channels: [64,128,256]
119
- gnn_type: "gcn"
120
- num_gnn_layers: 3
121
- gnn_hidden_dim: 1024
122
- gnn_dropout: 0.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
flare/params_tmp.yaml DELETED
@@ -1,125 +0,0 @@
1
- # Experiment setup
2
- job_key: ''
3
- run_name: 'crossAttnModel'
4
- run_details: ""
5
- project_name: ''
6
- wandb_entity_name: 'mass-spec-ml'
7
- no_wandb: True
8
- seed: 0
9
- debug: False
10
- checkpoint_pth:
11
-
12
- # Training setup
13
- max_epochs: 1000
14
- accelerator: 'gpu'
15
- devices: [1]
16
- log_every_n_steps: 250
17
- val_check_interval: 1.0
18
-
19
- # Data paths
20
- candidates_pth: /r/hassounlab/spectra_data/msgym/molecules/MassSpecGym_retrieval_candidates_mass.json # "../data/MassSpecGym/data/molecules/MassSpecGym_retrieval_candidates_formula.json"
21
- dataset_pth: /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # /data/yzhouc01/MVP/data/sample/data.tsv #/r/hassounlab/spectra_data/msgym/MassSpecGym.tsv #/data/yzhouc01/spectra_data/combined_msgym_nist23_multiplex.tsv # /r/hassounlab/spectra_data/msgym/MassSpecGym.tsv # "../data/MassSpecGym/data/sample_data.tsv"
22
- subformula_dir_pth: /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default # /data/yzhouc01/FILIP-MS/data/magma # /r/hassounlab/msgym_sirius # /data/yzhouc01/MVP/data/MassSpecGym/data/subformulae_default #/data/yzhouc01/spectra_data/subformulae #"../data/MassSpecGym/data/subformulae_default"
23
- split_pth:
24
- fp_dir_pth:
25
- cons_spec_dir_pth:
26
- NL_spec_dir_pth: ""
27
- partial_checkpoint: ""
28
-
29
- # General hyperparameters
30
- batch_size: 64
31
- lr: 2.881339661302105e-05 # 5.0e-05
32
- weight_decay: 1.1586679936312845e-05
33
- contr_temp: 0.022772534845886608 # 0.05
34
- early_stopping_patience: 300
35
- loss_strategy: 'static'
36
- num_workers: 50
37
-
38
-
39
- ############################## Data transforms ##############################
40
- # - Spectra
41
- spectra_view: SpecFormula #SpecMzIntTokens #SpecFormula
42
- formula_source: 'default' # magma_1, magma_all, sirius, default
43
- # 1. Binner
44
- max_mz: 1000
45
- bin_width: 1
46
- mask_peak_ratio: 0.00
47
-
48
- # 2. SpecFormula
49
- element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
50
- add_intensities: True
51
- mask_precursor: False
52
-
53
- # - Molecule
54
- molecule_view: "MolGraph"
55
- atom_feature: 'full'
56
- bond_feature: 'full'
57
-
58
-
59
- ############################## Views ##############################
60
- # contrastive
61
- use_contr: False
62
- contr_wt: 1
63
- contr_wt_update: {}
64
-
65
- # consensus spectra
66
- use_cons_spec: False
67
- cons_spec_wt: 3
68
- cons_spec_wt_update: {}
69
- cons_loss_type: 'l2' # cosine, l2
70
-
71
- # fp prediction/usage
72
- pred_fp: False
73
- use_fp: False
74
- fp_loss_type: 'cosine' #cosine, bce
75
- fp_wt: 3
76
- fp_wt_update: {}
77
- fp_size: 1024
78
- fp_radius: 5
79
- fp_dropout: 0.4
80
-
81
- # candidates
82
- aug_cands: False
83
- aug_cands_wt: 0.1
84
- aug_cands_update: {}
85
- aug_cands_size: 3
86
-
87
- # neutral loss
88
- use_NL: False
89
-
90
-
91
- ############################## Task and model ##############################
92
- task: 'retrieval'
93
- spec_enc: Transformer_Formula # Transformer_MzInt #Transformer_Formula
94
- mol_enc: "GNN"
95
- model: crossAttenContrastive # "MultiviewContrastive"
96
- contr_views: [['spec_enc', 'mol_enc']] #[['spec_enc', 'mol_enc'], ['spec_enc', 'NL_spec_enc'], ['mol_enc', 'NL_spec_enc']] #[['spec_enc', 'mol_enc'], ['mol_enc', 'cons_spec_enc'], ['cons_spec_enc', 'spec_enc'], ['fp_enc', 'mol_enc'], ['fp_enc', 'spec_enc'], ['fp_enc', 'cons_spec_enc']]
97
- log_only_loss_at_stages: []
98
- df_test_path: ""
99
-
100
- # - Spectra encoder
101
- final_embedding_dim: 512
102
- fc_dropout: 0.4
103
-
104
- # - Spectra Token encoder
105
- hidden_dims: [64, 128]
106
-
107
-
108
- # - Formula-based spec encoders
109
- formula_dropout: 0.2
110
- formula_dims: [128, 256, 512] #[64, 128, 256]
111
- cross_attn_heads: 4 # 2
112
- use_cls: False
113
- peak_dropout: 0.414425691950033 # 0.2
114
- formula_attn_heads: 4 # 2
115
- formula_transformer_layers: 2
116
-
117
- # -- GAT params
118
- attn_heads: [12,12,12]
119
-
120
- # - Molecule encoder (GNN)
121
- gnn_channels: [64,128,512]
122
- gnn_type: "gcn"
123
- num_gnn_layers: 3
124
- gnn_hidden_dim: 512
125
- gnn_dropout: 0.23234950970370824 #0.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pretrained_models/.gitkeep ADDED
File without changes