Christina Theodoris
commited on
Commit
·
316d817
1
Parent(s):
e3330a6
Handle case of single gene del for isp modeling of gene embs
Browse files
geneformer/in_silico_perturber.py
CHANGED
|
@@ -636,7 +636,7 @@ class InSilicoPerturber:
|
|
| 636 |
if len(self.genes_to_perturb) > 1:
|
| 637 |
tokens_to_perturb = tuple(self.tokens_to_perturb)
|
| 638 |
else:
|
| 639 |
-
tokens_to_perturb = self.tokens_to_perturb
|
| 640 |
|
| 641 |
# fill in the gene cosine similarities
|
| 642 |
try:
|
|
|
|
| 636 |
if len(self.genes_to_perturb) > 1:
|
| 637 |
tokens_to_perturb = tuple(self.tokens_to_perturb)
|
| 638 |
else:
|
| 639 |
+
tokens_to_perturb = self.tokens_to_perturb[0]
|
| 640 |
|
| 641 |
# fill in the gene cosine similarities
|
| 642 |
try:
|
geneformer/in_silico_perturber_stats.py
CHANGED
|
@@ -158,7 +158,7 @@ def token_tuple_to_ensembl_ids(token_tuple, gene_token_id_dict):
|
|
| 158 |
try:
|
| 159 |
return tuple([gene_token_id_dict.get(i, np.nan) for i in token_tuple])
|
| 160 |
except TypeError:
|
| 161 |
-
return
|
| 162 |
|
| 163 |
|
| 164 |
def n_detections(token, dict_list, mode, anchor_token):
|
|
@@ -208,7 +208,7 @@ def find(variable, x):
|
|
| 208 |
try:
|
| 209 |
if x in variable: # Test if variable is iterable and contains x
|
| 210 |
return True
|
| 211 |
-
except TypeError:
|
| 212 |
return x == variable # Test if variable is x if non-iterable
|
| 213 |
|
| 214 |
|
|
@@ -239,8 +239,9 @@ def isp_aggregate_gene_shifts(
|
|
| 239 |
cos_sims_df[cos_sims_df["Gene"] == k[0]]["Ensembl_ID"][0]
|
| 240 |
for k, v in cos_data_mean.items()
|
| 241 |
]
|
|
|
|
| 242 |
cos_sims_full_df["Affected"] = [k[1] for k, v in cos_data_mean.items()]
|
| 243 |
-
cos_sims_full_df["
|
| 244 |
gene_id_name_dict.get(gene_token_id_dict.get(token, np.nan), np.nan)
|
| 245 |
for token in cos_sims_full_df["Affected"]
|
| 246 |
]
|
|
@@ -1026,7 +1027,7 @@ class InSilicoPerturberStats:
|
|
| 1026 |
cos_sims_df.to_csv(output_path)
|
| 1027 |
|
| 1028 |
def token_to_gene_name(self, item):
|
| 1029 |
-
if
|
| 1030 |
return self.gene_id_name_dict.get(
|
| 1031 |
self.gene_token_id_dict.get(item, np.nan), np.nan
|
| 1032 |
)
|
|
|
|
| 158 |
try:
|
| 159 |
return tuple([gene_token_id_dict.get(i, np.nan) for i in token_tuple])
|
| 160 |
except TypeError:
|
| 161 |
+
return gene_token_id_dict.get(token_tuple, np.nan)
|
| 162 |
|
| 163 |
|
| 164 |
def n_detections(token, dict_list, mode, anchor_token):
|
|
|
|
| 208 |
try:
|
| 209 |
if x in variable: # Test if variable is iterable and contains x
|
| 210 |
return True
|
| 211 |
+
except (ValueError, TypeError):
|
| 212 |
return x == variable # Test if variable is x if non-iterable
|
| 213 |
|
| 214 |
|
|
|
|
| 239 |
cos_sims_df[cos_sims_df["Gene"] == k[0]]["Ensembl_ID"][0]
|
| 240 |
for k, v in cos_data_mean.items()
|
| 241 |
]
|
| 242 |
+
|
| 243 |
cos_sims_full_df["Affected"] = [k[1] for k, v in cos_data_mean.items()]
|
| 244 |
+
cos_sims_full_df["Affected_gene_name"] = [
|
| 245 |
gene_id_name_dict.get(gene_token_id_dict.get(token, np.nan), np.nan)
|
| 246 |
for token in cos_sims_full_df["Affected"]
|
| 247 |
]
|
|
|
|
| 1027 |
cos_sims_df.to_csv(output_path)
|
| 1028 |
|
| 1029 |
def token_to_gene_name(self, item):
|
| 1030 |
+
if np.issubdtype(type(item), np.integer):
|
| 1031 |
return self.gene_id_name_dict.get(
|
| 1032 |
self.gene_token_id_dict.get(item, np.nan), np.nan
|
| 1033 |
)
|