[ ]:
import sctoolbox
from sctoolbox import settings
from sctoolbox.utilities import bgcolor
Proportion Analysis
1 - Description
Differential proportion analysis aims to identify clusters showing differential composition between different biological conditions. Scanpro offers a linear regression framework and empirical bayes moderated statistical tests, taking sample-to-sample variation into account. Scanpro also generate pseudo-replicates automatically for unreplicated data.
In this notebook we will use Scanpro. For more information, check the documentation
|3ea053a67c1242ceb8f0e7616e1bd482|
2 - Input/output settings
⬐ Fill in input data here ⬎
⬐ Fill in input data here ⬎
[ ]:
%bgcolor PowderBlue
# In/output paths
settings.adata_input_dir = "../adatas/"
settings.adata_output_dir = "../adatas/"
settings.figure_dir = '../figures/proportion_analysis/'
settings.log_file: "../logs/scanpro_analysis_log.txt"
# Input/Output
last_notebook_adata = "anndata_5.h5ad"
output = "anndata_scanpro.h5ad"
plot_suffix = "scanpro"
3 - Loading packages
[ ]:
import pandas as pd
pd.set_option('display.max_columns', None) #no limit to the number of columns shown
import sctoolbox.utilities as utils
import sctoolbox.utils.decorator as deco
#We will use Scanpro for proportion analysis
from scanpro import scanpro
4 - Load anndata
[ ]:
adata = utils.load_h5ad(last_notebook_adata)
with pd.option_context("display.max.rows", 5, "display.max.columns", None):
display(adata)
display(adata.obs)
display(adata.var)
5 - General Input
⬐ Fill in input data here ⬎
⬐ Fill in input data here ⬎
[ ]:
%bgcolor PowderBlue
#Final clustering or celltype annotation column
clustering_col = "SCSA_pred_celltype"
#Set to None if not available
sample_col = "sample"
#Conditions to compare
condition_col = "chamber"
specific_conds = None # specify conditions to compare: ["cond1", "cond2",...]. If None, all conditions are compared
#Transformation method
trans = 'logit' # can be "logit" or "arcsin".
#Covariates to include in analysis
covariates = None
### For unreplicated data ###
#If sample_col=None, data is assumed unreplicated.
#Parameters for the bootstrapping if data is unreplicated
n_sims = 100 # number of bootstrapping simulations
n_reps = 8 # number of pseudo-replicates to generate for each condition
#P-value Threshold to determine significane
significance_threshold = 0.05
### Plots ###
#Clusters to plot
specific_clusters = None # specify clusters you want to plot: ["c1", "c2",...], None to plot all
#Number of plots per row
n_cols = 4
6 - Proportion analysis with Scanpro
[ ]:
# add decorator to scanpro
scanpro = deco.log_anndata(scanpro)
[ ]:
out = scanpro(adata,
clusters_col=clustering_col,
samples_col=sample_col,
conds_col=condition_col,
conditions=specific_conds,
covariates=covariates,
transform=trans,
n_sims=n_sims,
n_reps=n_reps)
out.results
[ ]:
significant_change = (out.results['adjusted_p_values'] < significance_threshold).to_dict()
significant_change
[ ]:
out.plot(kind='stripplot',
clusters=specific_clusters,
n_columns=n_cols,
save=f'{settings.figure_dir}{plot_suffix}_stripplot.pdf')
[ ]:
out.plot(kind='boxplot',
clusters=specific_clusters,
n_columns=n_cols,
save=f'{settings.figure_dir}{plot_suffix}_boxplot.pdf')
[ ]:
# Save results to uns dictionary
scanpro_uns_dict = {"scanpro": {"results": out.results,
"significance": significant_change,
"proportions": out.props,
"counts": out.counts,
"transformation": trans,
"conditions": out.conditions}}
# Add to adata
adata.uns.update(scanpro_uns_dict)
7 - Saving adata
[ ]:
utils.save_h5ad(adata, output)
[ ]:
settings.close_logfile()