Source code for sctoolbox.tools.tobias

"""Tools for TOBIAS usage."""
import yaml
import os
import sctoolbox.utils as utils

from beartype import beartype
from beartype.typing import Optional, Literal


# from: https://github.com/yaml/pyyaml/issues/127#issuecomment-525800484
class _SpaceDumper(yaml.SafeDumper):
    # HACK: insert blank lines between top-level objects
    # inspired by https://stackoverflow.com/a/44284819/3786245
    def write_line_break(self, data=None):
        super().write_line_break(data)

        if len(self.indents) == 1:
            super().write_line_break()



[docs]
@beartype
def write_TOBIAS_config(out_path: str,
                        bams: list[str] = [],
                        names: Optional[list[str]] = None,
                        fasta: Optional[str] = None,
                        blacklist: Optional[str] = None,
                        gtf: Optional[str] = None,
                        motifs: Optional[str] = None,
                        organism: Literal["human", "mouse", "zebrafish"] = "human",
                        output: str = "TOBIAS_output",
                        plot_comparison: bool = True,
                        plot_correction: bool = True,
                        plot_venn: bool = True,
                        coverage: bool = True,
                        wilson: bool = True) -> None:
    """
    Write a TOBIAS config file from input bams/fasta/blacklist etc.

    Parameters
    ----------
    out_path : str
        Path to output yaml file.
    bams : list[str], default []
        List of paths to bam files.
    names : Optional[list[str]], default None
        List of names for the bams. If None, the names are set to the bam file names with common prefix and suffix removed.
    fasta : Optional[str], default None
        Path to fasta file.
    blacklist : Optional[str], default None
        Path to blacklist file.
    gtf : Optional[str], default None
        Path to gtf file.
    motifs : Optional[str], default None
        Path to motif file.
    organism : Literal["human", "mouse", "zebrafish"], default 'human'
        Organism name. TOBIAS supports 'human', 'mouse' or 'zebrafish'.
    output : str, default 'Tobias_output'
        Output directory of the TOBIAS run.
    plot_comparison : bool, default True
        Flag for the step of plotting comparison of the TOBIAS run.
    plot_correction : bool, default True
        Flag for the step of plotting correction of the TOBIAS run.
    plot_venn : bool, default True
        Flag for the step of plotting venn diagramms of the TOBIAS run.
    coverage : bool, default True
        Flag for coverage step of the TOBIAS run.
    wilson : bool, default True
        Flag for wilson step of the TOBIAS run.
    """

    # Remove any common prefix and suffix from names
    if names is None:
        prefix = os.path.commonprefix(bams)
        suffix = utils.longest_common_suffix(bams)
        names = [utils.remove_prefix(s, prefix) for s in bams]
        names = [utils.remove_suffix(s, suffix) for s in names]

    # Start building yaml
    data = {}
    data["data"] = {names[i]: bams[i] for i in range(len(bams))}
    data["run_info"] = {"organism": organism.lower(),
                        "blacklist": blacklist,
                        "fasta": fasta,
                        "gtf": gtf,
                        "motifs": motifs,
                        "output": output}

    # Flags for parts of pipeline to include/exclude
    data["flags"] = {"plot_comparison": plot_comparison,
                     "plot_correction": plot_correction,
                     "plot_venn": plot_venn,
                     "coverage": coverage,
                     "wilson": wilson}

    # Default module parameters
    data["macs"] = "--nomodel --shift -100 --extsize 200 --broad"
    data["atacorrect"] = ""
    data["footprinting"] = ""
    data["bindetect"] = ""

    # Write dict to yaml file
    with open(out_path, 'w') as f:
        yaml.dump(data, f, Dumper=_SpaceDumper, default_flow_style=False, sort_keys=False)

    print(f"Wrote TOBIAS config yaml to '{out_path}'")