Anton Bushuiev commited on
Commit
33fc999
·
1 Parent(s): fc34019

Minor fixes

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -150,7 +150,7 @@ def predict(lib_pth, in_pth, progress=gr.Progress(track_tqdm=True)):
150
  progress(0.1, desc="Loading spectra data...")
151
  msdata = MSData.load(in_pth)
152
 
153
- progress(0.2, desc="Computing spectra embeddings with DreaMS...")
154
  embs = dreams_embeddings(msdata)
155
  print('Shape of the query embeddings:', embs.shape)
156
 
@@ -185,7 +185,7 @@ def predict(lib_pth, in_pth, progress=gr.Progress(track_tqdm=True)):
185
  'library_SMILES': smiles_to_html_img(smiles),
186
  'library_SMILES_raw': smiles,
187
  'Spectrum': spectrum_to_html_img(spec1, spec2),
188
- 'Spectrum_raw': spec1,
189
  'library_ID': msdata_lib.get_values('IDENTIFIER', j),
190
  'DreaMS_similarity': sims[i, j],
191
  'Modified_cosine_similarity': cos_sim(
@@ -196,7 +196,7 @@ def predict(lib_pth, in_pth, progress=gr.Progress(track_tqdm=True)):
196
  ),
197
  'i': i,
198
  'j': j,
199
- 'DreaMS_embedding': ' '.join(embs[i].astype(str)),
200
  })
201
  df = pd.DataFrame(df)
202
 
@@ -207,9 +207,9 @@ def predict(lib_pth, in_pth, progress=gr.Progress(track_tqdm=True)):
207
  progress(0.9, desc="Post-processing results...")
208
  # Remove unnecessary columns and round similarity scores
209
  df = df.drop(columns=['i', 'j', 'library_j'])
210
- df['DreaMS_similarity'] = df['DreaMS_similarity'].round(4)
211
- df['Modified_cosine_similarity'] = df['Modified_cosine_similarity'].round(4)
212
- df['precursor_mz'] = df['precursor_mz'].round(4)
213
  # df['RT'] = df['RT'].round(1)
214
  df = df.rename(columns={
215
  'topk': 'Top k',
@@ -229,7 +229,8 @@ def predict(lib_pth, in_pth, progress=gr.Progress(track_tqdm=True)):
229
  progress(0.95, desc="Saving results to CSV...")
230
  # Save full df to .csv
231
  df_path = dio.append_to_stem(in_pth, f"MassSpecGym_hits_{datetime.now().strftime('%Y%m%d_%H%M%S')}").with_suffix('.csv')
232
- df.to_csv(df_path, index=False)
 
233
 
234
  progress(0.98, desc="Filtering and sorting results...")
235
  # Postprocess to only show most relevant hits
 
150
  progress(0.1, desc="Loading spectra data...")
151
  msdata = MSData.load(in_pth)
152
 
153
+ progress(0.2, desc="Computing DreaMS embeddings...")
154
  embs = dreams_embeddings(msdata)
155
  print('Shape of the query embeddings:', embs.shape)
156
 
 
185
  'library_SMILES': smiles_to_html_img(smiles),
186
  'library_SMILES_raw': smiles,
187
  'Spectrum': spectrum_to_html_img(spec1, spec2),
188
+ 'Spectrum_raw': su.unpad_peak_list(spec1),
189
  'library_ID': msdata_lib.get_values('IDENTIFIER', j),
190
  'DreaMS_similarity': sims[i, j],
191
  'Modified_cosine_similarity': cos_sim(
 
196
  ),
197
  'i': i,
198
  'j': j,
199
+ 'DreaMS_embedding': embs[i],
200
  })
201
  df = pd.DataFrame(df)
202
 
 
207
  progress(0.9, desc="Post-processing results...")
208
  # Remove unnecessary columns and round similarity scores
209
  df = df.drop(columns=['i', 'j', 'library_j'])
210
+ df['DreaMS_similarity'] = df['DreaMS_similarity'].astype(float).round(4)
211
+ df['Modified_cosine_similarity'] = df['Modified_cosine_similarity'].astype(float).round(4)
212
+ df['precursor_mz'] = df['precursor_mz'].astype(float).round(4)
213
  # df['RT'] = df['RT'].round(1)
214
  df = df.rename(columns={
215
  'topk': 'Top k',
 
229
  progress(0.95, desc="Saving results to CSV...")
230
  # Save full df to .csv
231
  df_path = dio.append_to_stem(in_pth, f"MassSpecGym_hits_{datetime.now().strftime('%Y%m%d_%H%M%S')}").with_suffix('.csv')
232
+ df_to_save = df.drop(columns=['Molecule', 'Spectrum', 'Top k'])
233
+ df_to_save.to_csv(df_path, index=False)
234
 
235
  progress(0.98, desc="Filtering and sorting results...")
236
  # Postprocess to only show most relevant hits