[ ]:
import sctoolbox
from sctoolbox.utilities import bgcolor

0C - Velocity Analysis


1 - Description

Note: Requires notebook 04_clustering to be run prior to this notebook.

This notebook can be used for velocity analysis of scRNA data using the scvelo package.
Most analysis steps are directly taken from the scvelo documentation

1.1 Velocity Analysis

RNA velocity describes the rate of gene expression change for an individual gene at a given time point based on the ratio of its spliced and unspliced messenger RNA (mRNA). A significantly higher unspliced mRNA count for a specific gene indicates that the future cell state would likely show an expression increase. On the other hand, a significantly higher spliced mRNA count for a specific gene indicates that the future cell state would likely show a depletion of expression.

image-2.png Image source: La Manno, G., Soldatov, R., Zeisel, A. et al. RNA velocity of single cells. Nature 560, 494–498 (2018). https://doi.org/10.1038/s41586-018-0414-6.


2 - Setup

[ ]:
from sctoolbox import settings
import scvelo as scv
import pandas as pd
import sctoolbox.utils as utils
utils.settings_from_config("config.yaml", key="0C")
scv.settings.figdir = settings.figure_dir

3 - Load anndata

⬐ Fill in input data here ⬎

[ ]:
%bgcolor PowderBlue

# Input filename
last_notebook_adata = "anndata_4.h5ad"

[ ]:
adata = utils.load_h5ad(last_notebook_adata)

with pd.option_context("display.max.rows", 5, "display.max.columns", None):
    display(adata)
    display(adata.obs)
    display(adata.var)

4 - General input

⬐ Fill in input data here ⬎

[ ]:
%bgcolor PowderBlue

# Path to vdata file
vdata_path = ""
# Column containing final clustering
clustering = "clustering"
# Embedding umap/tsne
embedding = "umap"
# Number of pcs
n_pcs = 10

# Genes of interest
goi = []

# Number of threads
threads = 4

### Plotting parameters ###

## Velocity embeding stream
arrow_color = "k"
stream_arrow_size = 1

## Velocity embedding
## arrow size needs to be <= arrow_length
arrow_length = 3
arrow_size = 3

## Velocity graph
velo_graph_threshold = 0.1

5 - Check for spliced/unspliced/ambiguous layer

[ ]:
layer = set(adata.layers.keys())
if not {"spliced", "unspliced"}.issubset(layer):

    cell_count = adata.shape[0]

    # load vdata
    vdata = scv.read(filename=vdata_path)

    if len(adata.obs.index.union(vdata.obs.index)) == 0:
        raise ValueError("No overlap between cell barcodes found. Please check adata.obs/vdata.obs.")

    # Merge adata vs vdata
    scv.utils.merge(adata, vdata, copy=False)
    print(f"{adata.shape[0]}/{cell_count} ({int((adata.shape[0]/cell_count)*100)}%) cells remain after merging adata with vdata.")

6 - Preprocessing

[ ]:
scv.pp.filter_and_normalize(adata)
scv.pp.moments(adata, n_pcs=n_pcs)

7 - Calculate velocity

[ ]:
scv.tl.velocity(adata, mode='stochastic')
scv.tl.velocity_graph(adata, n_jobs=threads)
scv.tl.velocity_confidence(adata)

8 - Visualization


8.1 - Proportion

[ ]:
scv.pl.proportions(adata, groupby=clustering, save="velocity_proportion.pdf")

8.2 - Embedding

[ ]:
scv.pl.velocity_embedding_stream(adata,
                                 basis=embedding,
                                 color=clustering,
                                 arrow_color=arrow_color,
                                 arrow_size=stream_arrow_size,
                                 save="velocity_embeding_stream.pdf")
[ ]:
scv.pl.velocity_embedding(adata, color=clustering, arrow_length=arrow_length, arrow_size=arrow_size, save="velocity_embeding.pdf")

8.3 - Velocity length and confidence

The speed or rate of differentiation is given by the length of the velocity vector.
The coherence of the vector field (i.e., how a velocity vector correlates with its neighboring velocities) provides a measure of confidence.
[ ]:
keys = ['velocity_length', 'velocity_confidence']
scv.pl.scatter(adata, c=keys, cmap='coolwarm', save="velocity_confidence.pdf")
[ ]:
df = adata.obs.groupby(clustering)[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)

8.4 - Velocity graph

[ ]:
scv.pl.velocity_graph(adata, threshold=velo_graph_threshold, color=clustering)

9 - Pseudotime - scVelo

[ ]:
scv.tl.velocity_pseudotime(adata)
scv.pl.scatter(adata, color='velocity_pseudotime', cmap='gnuplot')

10 - PAGA

[ ]:
scv.tl.paga(adata, groups=clustering)
df = scv.get_df(adata, 'paga/transitions_confidence', precision=2).T
df.style.background_gradient(cmap='Blues').format('{:.2g}')
[ ]:
scv.pl.paga(adata, basis=embedding, color=clustering, size=50, alpha=.1, min_edge_width=2, node_size_scale=1.5)

11 - Velocity genes


[ ]:
scv.tl.rank_velocity_genes(adata, groupby=clustering, min_corr=.3)
[ ]:
velocity_genes = pd.DataFrame(adata.uns['rank_velocity_genes']['names'])
velocity_genes.head()

11.1 - Gene of interest

[ ]:
if goi:
    scv.pl.velocity(adata, goi, ncols=2, save="velocity_goi.pdf")
[ ]:
for col in velocity_genes.columns:
    genes = velocity_genes[col][:5]
    scv.pl.velocity(adata, genes, ncols=2, save=f"marker_velocity_genes_{col}.pdf")

12 - Saving adata

[ ]:
utils.save_h5ad(adata, "anndata_14.h5ad")
[ ]:
settings.close_logfile()