Source code for sctoolbox.tools.tobias

"""Tools for TOBIAS usage."""
import yaml
import os
import sctoolbox.utils as utils

from beartype import beartype
from beartype.typing import Optional, Literal


# from: https://github.com/yaml/pyyaml/issues/127#issuecomment-525800484
class _SpaceDumper(yaml.SafeDumper):
    # HACK: insert blank lines between top-level objects
    # inspired by https://stackoverflow.com/a/44284819/3786245
    def write_line_break(self, data=None):
        super().write_line_break(data)

        if len(self.indents) == 1:
            super().write_line_break()


[docs] @beartype def write_TOBIAS_config(out_path: str, bams: list[str] = [], names: Optional[list[str]] = None, fasta: Optional[str] = None, blacklist: Optional[str] = None, gtf: Optional[str] = None, motifs: Optional[str] = None, organism: Literal["human", "mouse", "zebrafish"] = "human", output: str = "TOBIAS_output", plot_comparison: bool = True, plot_correction: bool = True, plot_venn: bool = True, coverage: bool = True, wilson: bool = True) -> None: """ Write a TOBIAS config file from input bams/fasta/blacklist etc. Parameters ---------- out_path : str Path to output yaml file. bams : list[str], default [] List of paths to bam files. names : Optional[list[str]], default None List of names for the bams. If None, the names are set to the bam file names with common prefix and suffix removed. fasta : Optional[str], default None Path to fasta file. blacklist : Optional[str], default None Path to blacklist file. gtf : Optional[str], default None Path to gtf file. motifs : Optional[str], default None Path to motif file. organism : Literal["human", "mouse", "zebrafish"], default 'human' Organism name. TOBIAS supports 'human', 'mouse' or 'zebrafish'. output : str, default 'Tobias_output' Output directory of the TOBIAS run. plot_comparison : bool, default True Flag for the step of plotting comparison of the TOBIAS run. plot_correction : bool, default True Flag for the step of plotting correction of the TOBIAS run. plot_venn : bool, default True Flag for the step of plotting venn diagramms of the TOBIAS run. coverage : bool, default True Flag for coverage step of the TOBIAS run. wilson : bool, default True Flag for wilson step of the TOBIAS run. """ # Remove any common prefix and suffix from names if names is None: prefix = os.path.commonprefix(bams) suffix = utils.longest_common_suffix(bams) names = [utils.remove_prefix(s, prefix) for s in bams] names = [utils.remove_suffix(s, suffix) for s in names] # Start building yaml data = {} data["data"] = {names[i]: bams[i] for i in range(len(bams))} data["run_info"] = {"organism": organism.lower(), "blacklist": blacklist, "fasta": fasta, "gtf": gtf, "motifs": motifs, "output": output} # Flags for parts of pipeline to include/exclude data["flags"] = {"plot_comparison": plot_comparison, "plot_correction": plot_correction, "plot_venn": plot_venn, "coverage": coverage, "wilson": wilson} # Default module parameters data["macs"] = "--nomodel --shift -100 --extsize 200 --broad" data["atacorrect"] = "" data["footprinting"] = "" data["bindetect"] = "" # Write dict to yaml file with open(out_path, 'w') as f: yaml.dump(data, f, Dumper=_SpaceDumper, default_flow_style=False, sort_keys=False) print(f"Wrote TOBIAS config yaml to '{out_path}'")