Spaces:
Sleeping
Sleeping
| """ magma_utils.py | |
| Additional utility file to assist with fingerprinting. | |
| """ | |
| import os | |
| from ast import literal_eval | |
| import numpy as np | |
| import pandas as pd | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem, DataStructs | |
| import re | |
| def extract_adduct_ion(adduct, default='+H'): | |
| pattern = re.compile(r"\[M([+-][^\]]+)\]") | |
| match = pattern.search(adduct) | |
| if match: | |
| return match.group(1) | |
| return default | |
| def get_magma_fingerprint(smile): | |
| """ get_magma_fingerprint. """ | |
| mol = Chem.MolFromSmiles(smile, sanitize=False) | |
| Chem.SanitizeMol( | |
| mol, | |
| sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL | |
| ^ Chem.SanitizeFlags.SANITIZE_KEKULIZE, | |
| ) | |
| curr_fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048) | |
| fingerprint = np.zeros((0,), dtype=np.uint8) | |
| DataStructs.ConvertToNumpyArray(curr_fp, fingerprint) | |
| return fingerprint | |
| def get_magma_fingerprint_bits(smile): | |
| """ get magma fingerprint bits """ | |
| fingerprint = get_magma_fingerprint(smile) | |
| hot_indices = list(np.where(np.array(list(fingerprint)) == 1)[0]) | |
| return hot_indices | |
| def read_magma_file(magma_frag_file): | |
| """Read in magma file""" | |
| if ( | |
| magma_frag_file is not None | |
| and os.path.exists(magma_frag_file) | |
| and os.path.getsize(magma_frag_file) > 0 | |
| ): | |
| # correct for inconsistency by me in file parsing (sad) | |
| sep = "\t" | |
| spectra_df = pd.read_csv(magma_frag_file, index_col=0, sep=sep) | |
| if ( | |
| "smiles" not in spectra_df.columns | |
| or "chemical_formula" not in spectra_df.columns | |
| ): | |
| pass | |
| else: | |
| spectra_df = _convert_str_to_list(spectra_df, "smiles") | |
| spectra_df = _convert_str_to_list(spectra_df, "chemical_formula") | |
| if "mass_to_charge" not in spectra_df.columns: | |
| spectra_df["mass_to_charge"] = spectra_df["mz"] | |
| return spectra_df | |
| spectra_df = pd.DataFrame( | |
| columns=[ | |
| "mass_to_charge", | |
| "intensity", | |
| "chemical_formula", | |
| "smiles", | |
| "molecule_peak", | |
| ] | |
| ) | |
| return spectra_df | |
| def _convert_str_to_list(df, column): | |
| """_convert_str_to_list""" | |
| df.loc[:, column] = df.loc[:, column].apply( | |
| lambda x: literal_eval(x) if x != "NAN" and not pd.isna(x) else [] | |
| ) | |
| return df | |