[ ]:
import sctoolbox
from sctoolbox.utilities import bgcolor
0C - Velocity Analysis
1 - Description
Note: Requires notebook 04_clustering to be run prior to this notebook.
1.1 Velocity Analysis
RNA velocity describes the rate of gene expression change for an individual gene at a given time point based on the ratio of its spliced and unspliced messenger RNA (mRNA). A significantly higher unspliced mRNA count for a specific gene indicates that the future cell state would likely show an expression increase. On the other hand, a significantly higher spliced mRNA count for a specific gene indicates that the future cell state would likely show a depletion of expression.
Image source: La Manno, G., Soldatov, R., Zeisel, A. et al. RNA velocity of single cells. Nature 560, 494–498 (2018). https://doi.org/10.1038/s41586-018-0414-6.
2 - Setup
[ ]:
from sctoolbox import settings
import scvelo as scv
import pandas as pd
import sctoolbox.utils as utils
utils.settings_from_config("config.yaml", key="0C")
scv.settings.figdir = settings.figure_dir
3 - Load anndata
⬐ Fill in input data here ⬎
⬐ Fill in input data here ⬎
[ ]:
%bgcolor PowderBlue
# Input filename
last_notebook_adata = "anndata_4.h5ad"
[ ]:
adata = utils.load_h5ad(last_notebook_adata)
with pd.option_context("display.max.rows", 5, "display.max.columns", None):
display(adata)
display(adata.obs)
display(adata.var)
4 - General input
⬐ Fill in input data here ⬎
⬐ Fill in input data here ⬎
[ ]:
%bgcolor PowderBlue
# Path to vdata file
vdata_path = ""
# Column containing final clustering
clustering = "clustering"
# Embedding umap/tsne
embedding = "umap"
# Number of pcs
n_pcs = 10
# Genes of interest
goi = []
# Number of threads
threads = 4
### Plotting parameters ###
## Velocity embeding stream
arrow_color = "k"
stream_arrow_size = 1
## Velocity embedding
## arrow size needs to be <= arrow_length
arrow_length = 3
arrow_size = 3
## Velocity graph
velo_graph_threshold = 0.1
5 - Check for spliced/unspliced/ambiguous layer
[ ]:
layer = set(adata.layers.keys())
if not {"spliced", "unspliced"}.issubset(layer):
cell_count = adata.shape[0]
# load vdata
vdata = scv.read(filename=vdata_path)
if len(adata.obs.index.union(vdata.obs.index)) == 0:
raise ValueError("No overlap between cell barcodes found. Please check adata.obs/vdata.obs.")
# Merge adata vs vdata
scv.utils.merge(adata, vdata, copy=False)
print(f"{adata.shape[0]}/{cell_count} ({int((adata.shape[0]/cell_count)*100)}%) cells remain after merging adata with vdata.")
6 - Preprocessing
[ ]:
scv.pp.filter_and_normalize(adata)
scv.pp.moments(adata, n_pcs=n_pcs)
7 - Calculate velocity
[ ]:
scv.tl.velocity(adata, mode='stochastic')
scv.tl.velocity_graph(adata, n_jobs=threads)
scv.tl.velocity_confidence(adata)
8 - Visualization
8.1 - Proportion
[ ]:
scv.pl.proportions(adata, groupby=clustering, save="velocity_proportion.pdf")
8.2 - Embedding
[ ]:
scv.pl.velocity_embedding_stream(adata,
basis=embedding,
color=clustering,
arrow_color=arrow_color,
arrow_size=stream_arrow_size,
save="velocity_embeding_stream.pdf")
[ ]:
scv.pl.velocity_embedding(adata, color=clustering, arrow_length=arrow_length, arrow_size=arrow_size, save="velocity_embeding.pdf")
8.3 - Velocity length and confidence
[ ]:
keys = ['velocity_length', 'velocity_confidence']
scv.pl.scatter(adata, c=keys, cmap='coolwarm', save="velocity_confidence.pdf")
[ ]:
df = adata.obs.groupby(clustering)[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)
8.4 - Velocity graph
[ ]:
scv.pl.velocity_graph(adata, threshold=velo_graph_threshold, color=clustering)
9 - Pseudotime - scVelo
[ ]:
scv.tl.velocity_pseudotime(adata)
scv.pl.scatter(adata, color='velocity_pseudotime', cmap='gnuplot')
10 - PAGA
[ ]:
scv.tl.paga(adata, groups=clustering)
df = scv.get_df(adata, 'paga/transitions_confidence', precision=2).T
df.style.background_gradient(cmap='Blues').format('{:.2g}')
[ ]:
scv.pl.paga(adata, basis=embedding, color=clustering, size=50, alpha=.1, min_edge_width=2, node_size_scale=1.5)
11 - Velocity genes
[ ]:
scv.tl.rank_velocity_genes(adata, groupby=clustering, min_corr=.3)
[ ]:
velocity_genes = pd.DataFrame(adata.uns['rank_velocity_genes']['names'])
velocity_genes.head()
11.1 - Gene of interest
[ ]:
if goi:
scv.pl.velocity(adata, goi, ncols=2, save="velocity_goi.pdf")
[ ]:
for col in velocity_genes.columns:
genes = velocity_genes[col][:5]
scv.pl.velocity(adata, genes, ncols=2, save=f"marker_velocity_genes_{col}.pdf")
12 - Saving adata
[ ]:
utils.save_h5ad(adata, "anndata_14.h5ad")
[ ]:
settings.close_logfile()