Source code for sctoolbox.plotting.genometracks

"""Class to create a genome track plot via pyGenomeTracks."""

import os
import tempfile
import subprocess
import matplotlib.pyplot as plt

from beartype import beartype
from beartype.typing import Optional, Any, Literal, Iterable

import sctoolbox.utils as utils
from sctoolbox._settings import settings
logger = settings.logger


[docs] @beartype class GenomeTracks(): """Class for creating a genome track plot via pyGenomeTracks by collecting different tracks and writing the .ini file. Examples -------- .. plot:: :context: close-figs :nofigs: import sctoolbox.plotting as pl G = pl.GenomeTracks() #Add bigwig tracks G.add_track("data/tracks/bigwig1.bw", color="red") G.add_track("data/tracks/bigwig2.bw", color="blue", orientation="inverted") #Add hlines to previous bigwig track G.add_hlines([100, 200], color="red") G.add_hlines([250], color="blue", line_style="dashed") #Add links G.add_track("data/tracks/links.arcs", orientation="inverted") #Add one line between tracks G.add_hline() #Add .bed-file regions G.add_track("data/tracks/tad_classification.bed", title="bed") G.add_track("data/tracks/tad_classification.bed", color="Reds", title="bed colored by score column") #Add vlines and highlight G.add_track("data/tracks/vlines.bed", file_type="vlines") G.add_track("data/tracks/vhighlight.bed", file_type="vhighlight") #Add a spacer G.add_spacer() #Add genes G.add_track("data/tracks/genes.gtf", gene_rows=5) #Add x-axis G.add_spacer() G.add_xaxis() # Plot G.plot(region="X:3000000-3500000", output="genometrack_X.png", trackLabelFraction=0.2) .. image:: genometrack_X.png """ def __init__(self): """Initialize the GenomeTracks object.""" self.tracks = [] # dictionary of tracks self.type_count = {} self.global_defaults = {"height": 2} # dictionary of default values self.type_defaults = {"gtf": {"merge_transcripts": True, "fontsize": 12}, "hlines": {"overlay_previous": "share-y"}} self.available_types = ["bed", "bedgraph", "bedgraph_matrix", "bigwig", "domains", "epilogos", "fasta", "gtf", "hic_matrix", "hic_matrix_square", "links", "maf", "narrow_peak", "scalebar", "vhighlight", "vlines"] self.output = None # path to the output file if written def __repr__(self): """Return a string representation of the GenomeTracks object.""" n_tracks = len(self.tracks) return f"GenomeTracks object with {n_tracks} track(s). See <obj>.tracks for details."
[docs] def add_track(self, file: str, file_type: Optional[str] = None, name: Optional[str] = None, **kwargs: Any): """Add a track to the GenomeTracks object. The track will be added to the configuration file as one element, e.g. .add_track("file1.bed", file_type="bed", name="my_bed") will add the following to the configuration file: ``` [my_bed] file = file1.bed file_type = bed ``` Additional parameters are decided by <obj>.global_defaults and <obj>.type_defaults, or can be given by kwargs. All options and parameters are available at: https://pygenometracks.readthedocs.io/en/latest/content/all_tracks.html Parameters ---------- file : str Path to the file containing information to be plotted. Can be .bed, .bw, .gtf etc. file_type : str, default None Specify the 'file_type' argument for pyGenomeTracks. If None, the type will be predicted from the file ending. name : str, default None Name of the track. If None, the name will be estimated from the file_type e.g. 'bigwig 1'. or 'bed 2'. If the file_type is not available, the name will be the file path. **kwargs : arguments Additional arguments to be passed to pyGenomeTracks track configuration, for example `height=5` or `title="My track"`. Raises ------ ValueError If the file_type is not valid. """ # Setup track_dict = self.global_defaults.copy() track_dict["file"] = file # Predict file type if file_type is None: file_type = self._predict_type(file) else: # Check if file_type is valid if file_type not in self.available_types: if file_type == "spacer": raise ValueError("file_type 'spacer' is not valid. Use GenomeTracks.add_spacer() instead.") elif file_type == "x-axis": raise ValueError("file_type 'x-axis' is not valid. Use GenomeTracks.add_xaxis() instead.") elif file_type == "hlines": raise ValueError("file_type 'hlines' is not valid. Use GenomeTracks.add_hlines() instead.") else: raise ValueError(f"file_type '{file_type}' not valid. Choose from {self.available_types}") # If filetype was predicted or given; add to track dict if file_type is not None: if file_type in ["vlines", "vhighlight"]: track_dict["type"] = file_type # file_type = type for some options else: track_dict["file_type"] = file_type track_dict.update(self.type_defaults.get(file_type, {})) # add type defaults # Set title depending on file_type if file_type in ["vlines", "vhighlight"]: del track_dict["height"] else: track_dict["title"] = os.path.basename(file) # per default, can be overwritten with kwargs # Final overwrite with kwargs track_dict.update(kwargs) # Count file-types if file_type not in self.type_count: self.type_count[file_type] = 1 else: self.type_count[file_type] += 1 # Add track to tracks dictionary if name is None: if file_type is None: name = file else: name = file_type + " " + str(self.type_count[file_type]) self.tracks.append({name: track_dict})
[docs] def add_hlines(self, y_values: Iterable[int | float], overlay_previous: Literal["share-y", "no"] = "share-y", **kwargs: Any): """Add horizontal lines to the previous plot. Parameters ---------- y_values : list of int or float List of y values to plot horizontal lines at. overlay_previous : str, default "share-y" Whether to plot the lines on the same y-axis as the previous plot ("share-y") or on a new y-axis ("no"). **kwargs : arguments Additional arguments to be passed to pyGenomeTracks track configuration, for example `title="My lines"`. """ y_values = [str(y) for y in y_values] d = {"hlines": {"y_values": ", ".join(y_values), "overlay_previous": overlay_previous, "title": ""}} d["hlines"].update(kwargs) d["hlines"]["file_type"] = "hlines" self.tracks.append(d)
[docs] def add_hline(self, height: int | float = 1, line_width: int | float = 2, **kwargs: Any): """Add a horizontal line between tracks, not within a track. Can be used to visually separate tracks. Parameters ---------- height : int, default 1 Height of the track with the line in the middle. line_width : int, default 2 Width of the line. **kwargs : arguments Additional arguments to be passed to pyGenomeTracks track configuration, for example `title="A line"`. """ d = {} d["height"] = height d["line_width"] = line_width d["show_data_range"] = kwargs.get("show_data_range", False) # default is False d["overlay_previous"] = "no" self.add_hlines([1], min_value=0, max_value=2, **d) # line is in the middle of the track
[docs] def add_spacer(self, height: int | float = 1): """Add a spacer between tracks. Parameters ---------- height : int, default 1 Height of the spacer track. """ d = {"spacer": {"height": height}} self.tracks.append(d)
[docs] def add_xaxis(self, height: int | float = 1, **kwargs: Any): """Add the x-axis to the plot. Parameters ---------- height : int, default 1 Height of the x-axis track. **kwargs : arguments Additional arguments to be passed to pyGenomeTracks track configuration. """ d = {"height": height} d.update(kwargs) self.tracks.append({"x-axis": d})
def _predict_type(self, file: str) -> str | None: """Predict the file type from the file ending or the contents of the file. Parameters ---------- file : str Path of the file to be plotted. Returns ------- str | None String of predicted file type or None if the file type could not be predicted. """ if file.endswith(".bed"): return "bed" elif file.endswith(".bw"): return "bigwig" elif file.endswith(".gtf"): return "gtf" else: logger.warning(f"Could not predict file type for '{file}'. pyGenometracks will try to predict the file type. For more control, please specify 'file_type' manually in '.add_track'.") def _create_config_str(self) -> str: """Create configuration string based on tracks list. Returns ------- config_str : str String containing the configuration file content """ config_str = "" for d in self.tracks: track_name = list(d.keys())[0] track_dict = d[track_name] config_str += f"[{track_name}]\n" for key, value in track_dict.items(): config_str += f"{key} = {value}\n" config_str += "\n" return config_str def _write_config(self, config_file: Optional[str] = None) -> str: """Write the configuration file to disk. Parameters ---------- config_file : str, default None Path to the configuration file to create. If None, a temporary file will be created in the system's temp directory. Returns ------- config_file : str Path to the configuration file. """ if config_file is None: config_file = os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names())) config_str = self._create_config_str() with open(config_file, "w") as f: f.write(config_str) return config_file
[docs] def show_plot(self): """Display the plot.""" if self.output is None: raise ValueError("No output file was created. Run GenomeTracks.plot() first.") if utils._is_notebook(): from IPython.display import Image, IFrame, display if self.output.endswith(".png"): display(Image(filename=self.output)) elif self.output.endswith(".pdf"): display(IFrame(self.output)) else: import matplotlib.image as mpimg if self.output.endswith(".png"): img = mpimg.imread(self.output) plt.imshow(img) plt.axis('off') plt.show() else: logger.warning("Only .png files can be shown in the console.")
[docs] def show_config(self): """Show the current configuration file as a string.""" config_str = self._create_config_str() print(config_str)
[docs] def plot(self, region: str, output: Optional[str] = "genometracks.png", config_file: Optional[str] = None, title: Optional[str] = None, show: bool = True, dpi: int = 300, **kwargs: Any): """ Plot the final GenomeTracks plot based on the collected tracks. Runs pyGenomeTracks with the configuration file and the given parameters, and saves the output to the given file. Parameters ---------- region : str Region to plot, e.g. "chr1:1000000-2000000". output : str, default "genometracks.png" Path to the output file. config_file : str, default None Path to the configuration file to create. If None, a temporary file will be created in the system's temp directory. title : str, default None Title of the plot. If None, no title will be shown. show : bool, default True If the function is run in a jupyter notebook, 'show' controls whether to show the plot at the end of the function run. dpi : int, default 300 DPI of the plot. **kwargs : arguments Additional arguments to be passed to pyGenomeTracks, for example `trackLabelFraction=0.2`. Raises ------ ValueError If the pyGenomeTracks command fails. """ kwargs["title"] = f"'{title}'" if title is not None else None kwargs["dpi"] = dpi # create the .ini file ini_file = self._write_config(config_file=config_file) # Build command pgtracks_path = utils.get_binary_path("pyGenomeTracks") cmd = f"{pgtracks_path} --tracks {ini_file} --region {region} --outFileName {output} " # Add additional kwargs for key, value in kwargs.items(): if value is not None: # title might be None cmd += f" --{key} {value} " # Run pygenometracks logger.debug(f"Running command: '{cmd}'") try: subprocess.run(cmd, shell=True, check=True) except subprocess.CalledProcessError as e: self.output = None # reset output in case it was previously plotted raise ValueError(f"Error while running pyGenomeTracks: {e.output}") # Remove config file if config_file is None: # config_file was created by _write_config os.remove(ini_file) # Show in notebook if show: self.output = output self.show_plot()