Christina Theodoris
commited on
Commit
·
17f036a
1
Parent(s):
e04975c
change doc formatting
Browse files
geneformer/emb_extractor.py
CHANGED
|
@@ -395,8 +395,8 @@ class EmbExtractor:
|
|
| 395 |
"""
|
| 396 |
Initialize embedding extractor.
|
| 397 |
|
| 398 |
-
Parameters
|
| 399 |
-
|
| 400 |
model_type : {"Pretrained","GeneClassifier","CellClassifier"}
|
| 401 |
| Whether model is the pretrained Geneformer or a fine-tuned gene or cell classifier.
|
| 402 |
num_classes : int
|
|
@@ -442,8 +442,7 @@ class EmbExtractor:
|
|
| 442 |
token_dictionary_file : Path
|
| 443 |
| Path to pickle file containing token dictionary (Ensembl ID:token).
|
| 444 |
|
| 445 |
-
Examples
|
| 446 |
-
~~~~~~~~
|
| 447 |
|
| 448 |
.. code-block :: python
|
| 449 |
|
|
@@ -532,8 +531,8 @@ class EmbExtractor:
|
|
| 532 |
"""
|
| 533 |
Extract embeddings from input data and save as results in output_directory.
|
| 534 |
|
| 535 |
-
Parameters
|
| 536 |
-
|
| 537 |
model_directory : Path
|
| 538 |
| Path to directory containing model
|
| 539 |
input_data_file : Path
|
|
@@ -548,8 +547,7 @@ class EmbExtractor:
|
|
| 548 |
cell_state : dict
|
| 549 |
| Cell state key and value for state embedding extraction.
|
| 550 |
|
| 551 |
-
Examples
|
| 552 |
-
~~~~~~~~
|
| 553 |
|
| 554 |
.. code-block :: python
|
| 555 |
|
|
@@ -629,8 +627,8 @@ class EmbExtractor:
|
|
| 629 |
"""
|
| 630 |
Extract exact mean or exact median cell state embedding positions from input data and save as results in output_directory.
|
| 631 |
|
| 632 |
-
Parameters
|
| 633 |
-
|
| 634 |
cell_states_to_model : None, dict
|
| 635 |
| Cell states to model if testing perturbations that achieve goal state change.
|
| 636 |
| Four-item dictionary with keys: state_key, start_state, goal_state, and alt_states
|
|
@@ -655,8 +653,8 @@ class EmbExtractor:
|
|
| 655 |
| Whether or not to also output the embeddings as a tensor.
|
| 656 |
| Note, if true, will output embeddings as both dataframe and tensor.
|
| 657 |
|
| 658 |
-
Outputs
|
| 659 |
-
|
| 660 |
| Outputs state_embs_dict for use with in silico perturber.
|
| 661 |
| Format is dictionary of embedding positions of each cell state to model shifts from/towards.
|
| 662 |
| Keys specify each possible cell state to model.
|
|
@@ -721,8 +719,8 @@ class EmbExtractor:
|
|
| 721 |
"""
|
| 722 |
Plot embeddings, coloring by provided labels.
|
| 723 |
|
| 724 |
-
Parameters
|
| 725 |
-
|
| 726 |
embs : pandas.core.frame.DataFrame
|
| 727 |
| Pandas dataframe containing embeddings output from extract_embs
|
| 728 |
plot_style : str
|
|
@@ -738,8 +736,7 @@ class EmbExtractor:
|
|
| 738 |
kwargs_dict : dict
|
| 739 |
| Dictionary of kwargs to pass to plotting function.
|
| 740 |
|
| 741 |
-
Examples
|
| 742 |
-
~~~~~~~~
|
| 743 |
|
| 744 |
.. code-block :: python
|
| 745 |
|
|
|
|
| 395 |
"""
|
| 396 |
Initialize embedding extractor.
|
| 397 |
|
| 398 |
+
**Parameters:**
|
| 399 |
+
|
| 400 |
model_type : {"Pretrained","GeneClassifier","CellClassifier"}
|
| 401 |
| Whether model is the pretrained Geneformer or a fine-tuned gene or cell classifier.
|
| 402 |
num_classes : int
|
|
|
|
| 442 |
token_dictionary_file : Path
|
| 443 |
| Path to pickle file containing token dictionary (Ensembl ID:token).
|
| 444 |
|
| 445 |
+
**Examples:**
|
|
|
|
| 446 |
|
| 447 |
.. code-block :: python
|
| 448 |
|
|
|
|
| 531 |
"""
|
| 532 |
Extract embeddings from input data and save as results in output_directory.
|
| 533 |
|
| 534 |
+
**Parameters:**
|
| 535 |
+
|
| 536 |
model_directory : Path
|
| 537 |
| Path to directory containing model
|
| 538 |
input_data_file : Path
|
|
|
|
| 547 |
cell_state : dict
|
| 548 |
| Cell state key and value for state embedding extraction.
|
| 549 |
|
| 550 |
+
**Examples:**
|
|
|
|
| 551 |
|
| 552 |
.. code-block :: python
|
| 553 |
|
|
|
|
| 627 |
"""
|
| 628 |
Extract exact mean or exact median cell state embedding positions from input data and save as results in output_directory.
|
| 629 |
|
| 630 |
+
**Parameters:**
|
| 631 |
+
|
| 632 |
cell_states_to_model : None, dict
|
| 633 |
| Cell states to model if testing perturbations that achieve goal state change.
|
| 634 |
| Four-item dictionary with keys: state_key, start_state, goal_state, and alt_states
|
|
|
|
| 653 |
| Whether or not to also output the embeddings as a tensor.
|
| 654 |
| Note, if true, will output embeddings as both dataframe and tensor.
|
| 655 |
|
| 656 |
+
**Outputs**
|
| 657 |
+
|
| 658 |
| Outputs state_embs_dict for use with in silico perturber.
|
| 659 |
| Format is dictionary of embedding positions of each cell state to model shifts from/towards.
|
| 660 |
| Keys specify each possible cell state to model.
|
|
|
|
| 719 |
"""
|
| 720 |
Plot embeddings, coloring by provided labels.
|
| 721 |
|
| 722 |
+
**Parameters:**
|
| 723 |
+
|
| 724 |
embs : pandas.core.frame.DataFrame
|
| 725 |
| Pandas dataframe containing embeddings output from extract_embs
|
| 726 |
plot_style : str
|
|
|
|
| 736 |
kwargs_dict : dict
|
| 737 |
| Dictionary of kwargs to pass to plotting function.
|
| 738 |
|
| 739 |
+
**Examples:**
|
|
|
|
| 740 |
|
| 741 |
.. code-block :: python
|
| 742 |
|
geneformer/in_silico_perturber.py
CHANGED
|
@@ -100,8 +100,8 @@ class InSilicoPerturber:
|
|
| 100 |
"""
|
| 101 |
Initialize in silico perturber.
|
| 102 |
|
| 103 |
-
Parameters
|
| 104 |
-
|
| 105 |
perturb_type : {"delete", "overexpress", "inhibit", "activate"}
|
| 106 |
| Type of perturbation.
|
| 107 |
| "delete": delete gene from rank value encoding
|
|
@@ -398,8 +398,8 @@ class InSilicoPerturber:
|
|
| 398 |
"""
|
| 399 |
Perturb genes in input data and save as results in output_directory.
|
| 400 |
|
| 401 |
-
Parameters
|
| 402 |
-
|
| 403 |
model_directory : Path
|
| 404 |
| Path to directory containing model
|
| 405 |
input_data_file : Path
|
|
|
|
| 100 |
"""
|
| 101 |
Initialize in silico perturber.
|
| 102 |
|
| 103 |
+
**Parameters:**
|
| 104 |
+
|
| 105 |
perturb_type : {"delete", "overexpress", "inhibit", "activate"}
|
| 106 |
| Type of perturbation.
|
| 107 |
| "delete": delete gene from rank value encoding
|
|
|
|
| 398 |
"""
|
| 399 |
Perturb genes in input data and save as results in output_directory.
|
| 400 |
|
| 401 |
+
**Parameters:**
|
| 402 |
+
|
| 403 |
model_directory : Path
|
| 404 |
| Path to directory containing model
|
| 405 |
input_data_file : Path
|
geneformer/in_silico_perturber_stats.py
CHANGED
|
@@ -652,8 +652,8 @@ class InSilicoPerturberStats:
|
|
| 652 |
"""
|
| 653 |
Initialize in silico perturber stats generator.
|
| 654 |
|
| 655 |
-
Parameters
|
| 656 |
-
|
| 657 |
mode : {"goal_state_shift", "vs_null", "mixture_model", "aggregate_data", "aggregate_gene_shifts"}
|
| 658 |
| Type of stats.
|
| 659 |
| "goal_state_shift": perturbation vs. random for desired cell state shift
|
|
@@ -854,8 +854,8 @@ class InSilicoPerturberStats:
|
|
| 854 |
"""
|
| 855 |
Get stats for in silico perturbation data and save as results in output_directory.
|
| 856 |
|
| 857 |
-
Parameters
|
| 858 |
-
|
| 859 |
input_data_directory : Path
|
| 860 |
| Path to directory containing cos_sim dictionary inputs
|
| 861 |
null_dist_data_directory : Path
|
|
@@ -867,8 +867,8 @@ class InSilicoPerturberStats:
|
|
| 867 |
null_dict_list: dict
|
| 868 |
| List of loaded null distribtion dictionary if more than one comparison vs. the null is to be performed
|
| 869 |
|
| 870 |
-
Outputs
|
| 871 |
-
|
| 872 |
Definition of possible columns in .csv output file.
|
| 873 |
|
| 874 |
| Of note, not all columns will be present in all output files.
|
|
|
|
| 652 |
"""
|
| 653 |
Initialize in silico perturber stats generator.
|
| 654 |
|
| 655 |
+
**Parameters:**
|
| 656 |
+
|
| 657 |
mode : {"goal_state_shift", "vs_null", "mixture_model", "aggregate_data", "aggregate_gene_shifts"}
|
| 658 |
| Type of stats.
|
| 659 |
| "goal_state_shift": perturbation vs. random for desired cell state shift
|
|
|
|
| 854 |
"""
|
| 855 |
Get stats for in silico perturbation data and save as results in output_directory.
|
| 856 |
|
| 857 |
+
**Parameters:**
|
| 858 |
+
|
| 859 |
input_data_directory : Path
|
| 860 |
| Path to directory containing cos_sim dictionary inputs
|
| 861 |
null_dist_data_directory : Path
|
|
|
|
| 867 |
null_dict_list: dict
|
| 868 |
| List of loaded null distribtion dictionary if more than one comparison vs. the null is to be performed
|
| 869 |
|
| 870 |
+
**Outputs:**
|
| 871 |
+
|
| 872 |
Definition of possible columns in .csv output file.
|
| 873 |
|
| 874 |
| Of note, not all columns will be present in all output files.
|
geneformer/tokenizer.py
CHANGED
|
@@ -87,8 +87,8 @@ class TranscriptomeTokenizer:
|
|
| 87 |
"""
|
| 88 |
Initialize tokenizer.
|
| 89 |
|
| 90 |
-
Parameters
|
| 91 |
-
|
| 92 |
custom_attr_name_dict : None, dict
|
| 93 |
| Dictionary of custom attributes to be added to the dataset.
|
| 94 |
| Keys are the names of the attributes in the loom file.
|
|
@@ -138,8 +138,8 @@ class TranscriptomeTokenizer:
|
|
| 138 |
"""
|
| 139 |
Tokenize .loom files in data_directory and save as tokenized .dataset in output_directory.
|
| 140 |
|
| 141 |
-
Parameters
|
| 142 |
-
|
| 143 |
data_directory : Path
|
| 144 |
Path to directory containing loom files or anndata files
|
| 145 |
output_directory : Path
|
|
|
|
| 87 |
"""
|
| 88 |
Initialize tokenizer.
|
| 89 |
|
| 90 |
+
**Parameters:**
|
| 91 |
+
|
| 92 |
custom_attr_name_dict : None, dict
|
| 93 |
| Dictionary of custom attributes to be added to the dataset.
|
| 94 |
| Keys are the names of the attributes in the loom file.
|
|
|
|
| 138 |
"""
|
| 139 |
Tokenize .loom files in data_directory and save as tokenized .dataset in output_directory.
|
| 140 |
|
| 141 |
+
**Parameters:**
|
| 142 |
+
|
| 143 |
data_directory : Path
|
| 144 |
Path to directory containing loom files or anndata files
|
| 145 |
output_directory : Path
|