{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Investigating Gene Pathway Enrichment with Spectra \n", "This notebook uses Spectra to translate raw gene expression into interpretable biological scores. It then uses those scores to test the hypothesis that specific ESR1 perturbations alter cell cycle transitions. The results suggest a detailed look at how different promoters or guides for the same target gene might result in different phenotypic (biological program) outcomes." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#read in the \n", "#import packages\n", "import numpy as np\n", "import json \n", "import scanpy as sc\n", "from collections import OrderedDict\n", "import scipy \n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "#spectra imports \n", "import Spectra as spc\n", "from Spectra import Spectra_util as spc_tl\n", "from Spectra import K_est as kst\n", "from Spectra import default_gene_sets\n", "import anndata as ad\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#form a location\n", "loc=\"alt-prom-crispr-fiveprime/\"\n", "#load adata \n", "adata = ad.read_h5ad(loc+\"files/adata_normalised_cellcycle.h5ad\")\n", "adata.X=adata.layers[\"log1p\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cell type labels in gene set annotation dictionary and AnnData object are identical\n", "removing gene set all_Beta-Ala_metabolism for cell type global which is of length 7 2 genes are found in the data. minimum length is 3\n", "removing gene set all_ascorbate-uptake for cell type global which is of length 3 2 genes are found in the data. minimum length is 3\n", "Your gene set annotation dictionary is now correctly formatted.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10000/10000 [2:05:26<00:00, 1.33it/s] \n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'BTD' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'HLCS' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'TECR' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'TFRC' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'P4HB' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'CDH1' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'HSPA5' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'CTSD' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'PDIA6' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'SCD' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'BMP7' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'IGFBP2' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'SLC5A6' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ACSL3' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ITGA2' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'LAMP2' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'CYP51A1' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'NPY1R' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'EFEMP1' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'CKAP4' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'ITGB1' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'EPCAM' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'CD9' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'HEXB' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/Spectra/Spectra.py:1340: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'PRDX4' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n", " df.iloc[i,j] = id2word[idx_matrix[i,j]]\n" ] } ], "source": [ "annotations = spc.default_gene_sets.load()\n", "#fit the model (We will run this with only 2 epochs to decrease runtime in this tutorial)\n", "model_global = spc.est_spectra(adata = adata, gene_set_dictionary = annotations['global'], # because we do not use the cell types\n", " # we will supply a regular dict\n", " # instead of the nested dict above\n", " use_highly_variable = True, cell_type_key =None, #\"cell_type_annotations\" ,\n", " use_weights = True, lam = 0.1, \n", " delta=0.001,\n", " kappa = None , #change to None 0.00001\n", " rho = 0.001, #rho 0.00001\n", " use_cell_types = False, #set to False to not use the cell type annotations\n", " n_top_vals = 25, #number set to 50\n", " clean_gs=True,\n", " label_factors=True,\n", " num_epochs=10000,\n", " overlap_threshold=0.2,\n", " min_gs_num = 3\n", " )" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n", "/Users/helenking/anaconda3/envs/apu/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "# KeyError: \"Could not find 'umap' or 'X_umap' in .obsm\"\n", "sc.pp.neighbors(adata, use_rep='X')\n", "sc.tl.umap(adata)\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "dataframe_spectra=pd.DataFrame(adata.obsm[\"SPECTRA_cell_scores\"], index=adata.obs_names, columns=adata.uns[\"SPECTRA_overlap\"].index)\n", "dataframe_spectra_columns=adata.uns[\"SPECTRA_overlap\"].index\n", "#subset the dataframe for the cellbarcodes of ESR1_P1 ESR1_P2 and nontargeting\n", "gene=\"ESR1\"\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | 0-X-global-X-all_biotin_metabolism | \n", "1-X-global-X-all_purine_synthesis | \n", "2-X-global-X-all_ethanol_metabolism | \n", "3-X-global-X-all_amino-sugar-nucleotide-sugar_metabolism | \n", "4-X-global-X-all_steroid_metabolism | \n", "5-X-global-X-all_carnitine-shuttle | \n", "6-X-global-X-all_TLR_signaling | \n", "7-X-global-X-all_phosphoinositide_signaling | \n", "8-X-global-X-all_microautophagy-lysosomal | \n", "9-X-global-X-9 | \n", "... | \n", "139-X-global-X-139 | \n", "140-X-global-X-all_galactose_metabolism | \n", "141-X-global-X-all_MYC_targets | \n", "142-X-global-X-all_GLU_metabolism | \n", "143-X-global-X-all_fatty-acid_synthesis | \n", "144-X-global-X-all_pentose-phosphate-pathway | \n", "145-X-global-X-all_ros_response | \n", "146-X-global-X-all_IL6-JAK-STAT3_signaling | \n", "147-X-global-X-all_autophagy-chaperone-mediated | \n", "148-X-global-X-148 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GTGTGCGGTTAGTGGG-1 | \n", "9.237579e-03 | \n", "4.053838e-04 | \n", "4.750246e-11 | \n", "5.322683e-04 | \n", "5.557497e-09 | \n", "4.248724e-10 | \n", "7.161847e-04 | \n", "3.075701e-07 | \n", "3.413473e-11 | \n", "2.085672e-02 | \n", "... | \n", "6.090823e-03 | \n", "1.323262e-10 | \n", "0.000388 | \n", "2.880409e-04 | \n", "0.003124 | \n", "0.029767 | \n", "3.956140e-07 | \n", "0.000363 | \n", "0.057384 | \n", "0.027820 | \n", "
| TCAGATGAGCATCATC-1 | \n", "7.051101e-09 | \n", "1.662875e-04 | \n", "5.717598e-04 | \n", "1.144720e-04 | \n", "3.238609e-04 | \n", "1.071320e-04 | \n", "5.162545e-04 | \n", "3.040417e-04 | \n", "1.782021e-04 | \n", "2.538253e-03 | \n", "... | \n", "4.685277e-03 | \n", "1.341369e-10 | \n", "0.000818 | \n", "1.632836e-10 | \n", "0.002828 | \n", "0.041279 | \n", "5.058674e-04 | \n", "0.000398 | \n", "0.052450 | \n", "0.028108 | \n", "
| GCAATCACACGACGAA-1 | \n", "2.167318e-03 | \n", "3.691382e-04 | \n", "4.885835e-11 | \n", "3.279115e-04 | \n", "4.265923e-04 | \n", "1.508868e-04 | \n", "3.599413e-04 | \n", "2.189579e-04 | \n", "1.859391e-04 | \n", "2.479209e-03 | \n", "... | \n", "1.467027e-02 | \n", "2.486610e-04 | \n", "0.001294 | \n", "1.383442e-04 | \n", "0.002948 | \n", "0.031350 | \n", "3.599227e-04 | \n", "0.000401 | \n", "0.044456 | \n", "0.027079 | \n", "
| CACACAAAGCTGCAAG-1 | \n", "1.413153e-03 | \n", "3.007432e-04 | \n", "1.872481e-03 | \n", "3.030962e-04 | \n", "2.353136e-04 | \n", "1.626892e-09 | \n", "1.210399e-09 | \n", "2.119303e-04 | \n", "2.140315e-10 | \n", "3.126384e-03 | \n", "... | \n", "4.533910e-03 | \n", "4.900877e-10 | \n", "0.002375 | \n", "2.924478e-10 | \n", "0.008609 | \n", "0.031775 | \n", "2.660242e-04 | \n", "0.000281 | \n", "0.052374 | \n", "0.020489 | \n", "
| AGGCCACTCTTACCTA-1 | \n", "1.927258e-10 | \n", "2.121895e-04 | \n", "1.418944e-03 | \n", "3.032002e-08 | \n", "7.462370e-04 | \n", "3.291458e-10 | \n", "8.620481e-04 | \n", "6.641354e-04 | \n", "1.497885e-09 | \n", "2.075204e-02 | \n", "... | \n", "4.979285e-10 | \n", "2.278506e-04 | \n", "0.001540 | \n", "2.485600e-04 | \n", "0.007542 | \n", "0.030187 | \n", "2.048542e-04 | \n", "0.000002 | \n", "0.063139 | \n", "0.023481 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| TAGCCGGAGCGTAATA-1 | \n", "7.170779e-11 | \n", "7.430381e-07 | \n", "1.470620e-03 | \n", "7.200462e-09 | \n", "2.873440e-04 | \n", "1.814014e-04 | \n", "5.772967e-04 | \n", "4.233786e-09 | \n", "4.460185e-11 | \n", "5.374444e-03 | \n", "... | \n", "3.734831e-07 | \n", "1.968945e-04 | \n", "0.002929 | \n", "2.826797e-04 | \n", "0.004844 | \n", "0.042471 | \n", "2.920751e-04 | \n", "0.000505 | \n", "0.047386 | \n", "0.015173 | \n", "
| CTTGGCTTCAGCTTAG-1 | \n", "2.476367e-03 | \n", "2.263839e-04 | \n", "1.547956e-03 | \n", "4.779655e-04 | \n", "3.421056e-08 | \n", "1.699237e-09 | \n", "3.557669e-04 | \n", "3.625548e-04 | \n", "6.701469e-10 | \n", "3.329656e-03 | \n", "... | \n", "4.498193e-07 | \n", "1.046996e-04 | \n", "0.001219 | \n", "2.700598e-04 | \n", "0.007622 | \n", "0.046131 | \n", "1.096286e-07 | \n", "0.000291 | \n", "0.000048 | \n", "0.036125 | \n", "
| CATATGGAGGTGTTAA-1 | \n", "2.570778e-03 | \n", "2.000376e-04 | \n", "1.210368e-03 | \n", "2.292907e-04 | \n", "3.842223e-04 | \n", "2.233571e-10 | \n", "2.944821e-04 | \n", "3.124280e-04 | \n", "3.961496e-11 | \n", "7.104835e-03 | \n", "... | \n", "9.437590e-04 | \n", "1.307871e-08 | \n", "0.000536 | \n", "5.709161e-04 | \n", "0.013282 | \n", "0.033105 | \n", "7.381341e-04 | \n", "0.000377 | \n", "0.049024 | \n", "0.026089 | \n", "
| TCTCTAAAGGAGCGTT-1 | \n", "1.391553e-03 | \n", "3.627104e-04 | \n", "9.115024e-11 | \n", "1.524916e-04 | \n", "2.003262e-08 | \n", "5.953464e-10 | \n", "5.397420e-04 | \n", "1.982464e-04 | \n", "1.601765e-11 | \n", "6.056628e-03 | \n", "... | \n", "1.611285e-06 | \n", "3.034562e-10 | \n", "0.000512 | \n", "3.202521e-04 | \n", "0.000006 | \n", "0.045093 | \n", "7.756426e-04 | \n", "0.000383 | \n", "0.055106 | \n", "0.027756 | \n", "
| GGGAATGTCTTGACGA-1 | \n", "2.001940e-10 | \n", "2.734928e-04 | \n", "1.260464e-03 | \n", "5.065035e-04 | \n", "2.395792e-04 | \n", "1.043869e-04 | \n", "3.210498e-04 | \n", "5.566190e-04 | \n", "4.005243e-11 | \n", "4.522674e-07 | \n", "... | \n", "2.066875e-07 | \n", "2.202848e-04 | \n", "0.001907 | \n", "2.936413e-04 | \n", "0.008942 | \n", "0.045249 | \n", "9.947184e-04 | \n", "0.000402 | \n", "0.045627 | \n", "0.018386 | \n", "
27929 rows × 149 columns
\n", "