[ ]:
import sctoolbox
from sctoolbox import settings
from sctoolbox.utilities import bgcolor

Proportion Analysis


1 - Description

Differential proportion analysis aims to identify clusters showing differential composition between different biological conditions. Scanpro offers a linear regression framework and empirical bayes moderated statistical tests, taking sample-to-sample variation into account. Scanpro also generate pseudo-replicates automatically for unreplicated data.

In this notebook we will use Scanpro. For more information, check the documentation

|3ea053a67c1242ceb8f0e7616e1bd482|


2 - Input/output settings

⬐ Fill in input data here ⬎

[ ]:
%bgcolor PowderBlue

# In/output paths
settings.adata_input_dir = "../adatas/"
settings.adata_output_dir = "../adatas/"
settings.figure_dir = '../figures/proportion_analysis/'
settings.log_file: "../logs/scanpro_analysis_log.txt"

# Input/Output
last_notebook_adata = "anndata_5.h5ad"
output = "anndata_scanpro.h5ad"
plot_suffix = "scanpro"

3 - Loading packages

[ ]:
import pandas as pd
pd.set_option('display.max_columns', None)  #no limit to the number of columns shown
import sctoolbox.utilities as utils
import sctoolbox.utils.decorator as deco

#We will use Scanpro for proportion analysis
from scanpro import scanpro

4 - Load anndata

[ ]:
adata = utils.load_h5ad(last_notebook_adata)

with pd.option_context("display.max.rows", 5, "display.max.columns", None):
    display(adata)
    display(adata.obs)
    display(adata.var)

5 - General Input

⬐ Fill in input data here ⬎

[ ]:
%bgcolor PowderBlue


#Final clustering or celltype annotation column
clustering_col = "SCSA_pred_celltype"

#Set to None if not available
sample_col = "sample"

#Conditions to compare
condition_col = "chamber"
specific_conds = None  # specify conditions to compare: ["cond1", "cond2",...]. If None, all conditions are compared

#Transformation method
trans = 'logit'  # can be "logit" or "arcsin".

#Covariates to include in analysis
covariates = None

### For unreplicated data ###
#If sample_col=None, data is assumed unreplicated.
#Parameters for the bootstrapping if data is unreplicated
n_sims = 100  # number of bootstrapping simulations
n_reps = 8  # number of pseudo-replicates to generate for each condition

#P-value Threshold to determine significane
significance_threshold = 0.05

### Plots ###
#Clusters to plot
specific_clusters = None  # specify clusters you want to plot: ["c1", "c2",...], None to plot all
#Number of plots per row
n_cols = 4

6 - Proportion analysis with Scanpro

[ ]:
# add decorator to scanpro
scanpro = deco.log_anndata(scanpro)
[ ]:
out = scanpro(adata,
              clusters_col=clustering_col,
              samples_col=sample_col,
              conds_col=condition_col,
              conditions=specific_conds,
              covariates=covariates,
              transform=trans,
              n_sims=n_sims,
              n_reps=n_reps)

out.results
[ ]:
significant_change = (out.results['adjusted_p_values'] < significance_threshold).to_dict()
significant_change
[ ]:
out.plot(kind='stripplot',
         clusters=specific_clusters,
         n_columns=n_cols,
         save=f'{settings.figure_dir}{plot_suffix}_stripplot.pdf')
[ ]:
out.plot(kind='boxplot',
         clusters=specific_clusters,
         n_columns=n_cols,
         save=f'{settings.figure_dir}{plot_suffix}_boxplot.pdf')
[ ]:
# Save results to uns dictionary
scanpro_uns_dict = {"scanpro": {"results": out.results,
                                "significance": significant_change,
                                "proportions": out.props,
                                "counts": out.counts,
                                "transformation": trans,
                                "conditions": out.conditions}}
# Add to adata
adata.uns.update(scanpro_uns_dict)

7 - Saving adata

[ ]:
utils.save_h5ad(adata, output)
[ ]:
settings.close_logfile()