Source code for cockpit.plotter

"""Plotting Part of the Cockpit."""

import glob
import os
import warnings
from collections import defaultdict

import json_tricks
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from PIL import Image

from cockpit import instruments
from cockpit.cockpit import Cockpit
from cockpit.instruments import utils_plotting


[docs]class CockpitPlotter: """Cockpit Plotter Class.""" def __init__(self, secondary_screen=False): """Initialize the cockpit plotter. Args: secondary_screen (bool): Whether to plot other experimental quantities on a secondary screen. """ self._mpl_default_backend = plt.get_backend() self._mpl_no_show_backend = "Agg" self._secondary_screen = secondary_screen # Set plotting parameters self._set_plotting_params() self._set_layout_params() def __update_problem_info(self, source): """Try extracting and storing info about model, optimizer, dataset.""" for key, value in utils_plotting._extract_problem_info(source).items(): setattr(self, key, value) def _set_layout_params(self): """Initialize parameters that define the plot layout.""" # Individual parts are managed separetely but (for now) they share a layout self.inner_num_rows = 5 self.inner_num_cols = 3 self.inner_width_ratios = [0.1, 1, 0.1] self.inner_height_ratios = [0.0, 1, 1, 1, 0.00] self.inner_hspace = 0.8 def _set_backend(self, show_plot): """Use a backend that does (not) show plots.""" current = plt.get_backend() new = self._mpl_default_backend if show_plot else self._mpl_no_show_backend if current != new: mpl.use(new)
[docs] def plot( self, source, show_plot=True, block=False, save_plot=False, savedir=None, savename="cockpit", savename_append=None, savefig_kwargs=None, show_log_iter=False, discard=None, plot_title=None, debug=False, ): """Plot the cockpit for the current state of the log file. Args: source (Cockpit or str): ``Cockpit`` instance, or string containing the path to a .json log produced with ``Cockpit.write``, where information will be fetched from. show_plot (bool, optional): Whether the plot should be shown on screen. Defaults to True. block (bool, optional): Whether the halt the computation after blocking or not. Defaults to False. save_plot (bool, optional): Whether the plot should be saved to disk. Defaults to False. savedir (str, optional): Directory where to save the plot. savename (str, optional): Filename of the saved plot. savename_append (str, optional): Optional appendix to the savefile name. Defaults to None. savefig_kwargs (dict, optional): Additional keyword arguments that are passed to `fig.savefig` such as fileformat or dpi. show_log_iter (bool, optional): Whether the instruments should use a log scale for the iterations. Defaults to False. discard (int, optional): Global step after which information should be discarded. plot_title (str, optional): Cockpit's plot title. Defauts to None. In this case Cockpit tries to infer the optimizer/problem/etc. from the logpath and show it as the title. Which can be manually overwritten with by passing this string. debug (bool, optional): Enable debug mode.. Defaults to False. Raises: ValueError: Raises ValueError if source is a ``Cockpit`` instance, but no savedir is given. """ self.__update_problem_info(source) self._set_backend(show_plot) self.debug = debug self.show_log_iter = show_log_iter if not hasattr(self, "fig"): self.fig = plt.figure("Primary screen", constrained_layout=False) # read in results self._read_tracking_results(source, discard=discard) # Plotting self.fig.clf() # clear the cockpit figure to replace it # Subplot grid: Currently looks like this. # +-----------------------+------------------------+--------------------+ # | STEP SIZE: | GRADIENTS: | CURVATURE | # | | | | # | Alpha Gauge | Gradient Tests Gauge | MaxEV | # | Distance | 1D Histogram | Trace (layerwise) | # | Grad Norm | 2D Histogram | TIC | # | | # | Hyperparameter Gauge | Performance Gauge | # +-----------------------+------------------------+--------------------+ # Build the larger grid (for the three categories, and the two bottom plots) outer_widths = [1, 1, 1] outer_heights = [3, 1] self.grid_spec = self.fig.add_gridspec( ncols=3, nrows=2, width_ratios=outer_widths, height_ratios=outer_heights, wspace=0.1, hspace=0.1, ) self._plot_step(self.grid_spec[0, 0]) self._plot_gradients(self.grid_spec[0, 1]) self._plot_curvature(self.grid_spec[0, 2]) self._plot_hyperparams(self.grid_spec[1, 0]) self._plot_performance(self.grid_spec[1, 1:]) # Post Process Title, Legend etc. self._post_process_plot(plot_title) if self._secondary_screen: self._plot_secondary_screen() # Show or Save plots if show_plot: msg = "[cockpit|plot] Showing current Cockpit." msg += " Blocking. Close plot to continue." if block else "" print(msg) plt.show(block=block) plt.pause(0.001) if save_plot: if savedir is None: if isinstance(source, str): savedir = source else: raise ValueError("Please specify savedir when plotting a Cockpit.") self._save( savedir, savename, savename_append, savefig_kwargs, screen="primary", ) if self._secondary_screen: self._save( savedir, savename, savename_append, savefig_kwargs, screen="secondary", )
def _plot_step(self, grid_spec): """Plot all instruments having to do with step size in the given gridspec. Args: grid_spec (matplotlib.gridspec): GridSpec where the plot should be placed """ # Use grid_spec with a "dummy plot" to set Group title and color self.ax_step = self.fig.add_subplot(grid_spec) self.ax_step.set_title("STEP SIZE", fontweight="bold", fontsize="x-large") self.ax_step.set_facecolor(self.bg_color_one) self.ax_step.set_xticklabels([]) self.ax_step.set_yticklabels([]) # Build inner structure of this plotting group # We use additional "dummy" gridspecs to position the instruments self.gs_step = grid_spec.subgridspec( self.inner_num_rows, self.inner_num_cols, width_ratios=self.inner_width_ratios, height_ratios=self.inner_height_ratios, hspace=self.inner_hspace, ) instruments.alpha_gauge(self, self.fig, self.gs_step[1, 1]) instruments.distance_gauge(self, self.fig, self.gs_step[2, 1]) instruments.grad_norm_gauge(self, self.fig, self.gs_step[3, 1]) def _plot_gradients(self, grid_spec): """Plot all instruments having to do with the gradients in the given gridspec. Args: grid_spec (matplotlib.gridspec): GridSpec where the plot should be placed """ # Use grid_spec with a "dummy plot" to set Group title and color self.ax_gradients = self.fig.add_subplot(grid_spec) self.ax_gradients.set_title("GRADIENTS", fontweight="bold", fontsize="x-large") self.ax_gradients.set_facecolor(self.bg_color_two) self.ax_gradients.set_xticklabels([]) self.ax_gradients.set_yticklabels([]) # Build inner structure of this plotting group # We use additional "dummy" gridspecs to position the instruments self.gs_gradients = grid_spec.subgridspec( self.inner_num_rows, self.inner_num_cols, width_ratios=self.inner_width_ratios, height_ratios=self.inner_height_ratios, hspace=self.inner_hspace, ) instruments.gradient_tests_gauge(self, self.fig, self.gs_gradients[1, 1]) instruments.histogram_1d_gauge(self, self.fig, self.gs_gradients[2, 1]) instruments.histogram_2d_gauge(self, self.fig, self.gs_gradients[3, 1]) def _plot_curvature(self, grid_spec): """Plot all instruments having to do with curvature in the given gridspec. Args: grid_spec (matplotlib.gridspec): GridSpec where the plot should be placed """ # Use grid_spec with a "dummy plot" to set Group title and color self.ax_curvature = self.fig.add_subplot(grid_spec) self.ax_curvature.set_title("CURVATURE", fontweight="bold", fontsize="x-large") self.ax_curvature.set_facecolor(self.bg_color_three) self.ax_curvature.set_xticklabels([]) self.ax_curvature.set_yticklabels([]) # Build inner structure of this plotting group # We use additional "dummy" gridspecs to position the instruments inner_width_ratios_curvature = self.inner_width_ratios[:] inner_width_ratios_curvature[2] = 0.0 inner_width_ratios_curvature[0] = 1.5 * inner_width_ratios_curvature[0] self.gs_curvature = grid_spec.subgridspec( self.inner_num_rows, self.inner_num_cols, width_ratios=inner_width_ratios_curvature, height_ratios=self.inner_height_ratios, hspace=self.inner_hspace, ) instruments.max_ev_gauge(self, self.fig, self.gs_curvature[1, 1]) instruments.trace_gauge(self, self.fig, self.gs_curvature[2, 1]) instruments.tic_gauge(self, self.fig, self.gs_curvature[3, 1]) def _plot_hyperparams(self, grid_spec): """Plot all instruments showing the hyperparameters. Args: grid_spec (matplotlib.gridspec): GridSpec where the plot should be placed """ instruments.hyperparameter_gauge(self, self.fig, grid_spec) def _plot_performance(self, grid_spec): """Plot all instruments having to do with the networks performance. Args: grid_spec (matplotlib.gridspec): GridSpec where the plot should be placed """ instruments.performance_gauge(self, self.fig, grid_spec) def build_animation( self, logpath, duration=200, loop=0, ): """Build an animation from the stored images during training. TODO Make this independant of stored images. Instead generate those images in hindsight and ideally use fixed axis. Args: logpath (str): Full logpath to the JSON file. duration (int, optional): Time to display each frame, in milliseconds. Defaults to 200. loop (int, optional): Number of times the GIF should loop. Defaults to 0 which means it will loop forever. :meta private: """ screens = ["primary"] if self._secondary_screen: screens.append("secondary") for screen in screens: fp_out = os.path.splitext(logpath)[0] + f"__{screen}.gif" self._animate(logpath, screen, fp_out, duration, loop) def _animate(self, logpath, screen, fp_out, duration, loop): """Generate animation from paths to images and save.""" # load frames pattern = os.path.splitext(logpath)[0] + f"__{screen}__epoch__*.png" frame_paths = sorted(glob.glob(pattern)) frame, *frames = [Image.open(f) for f in frame_paths] # Collect images and create Animation print(f"[cockpit|animate] Saving GIF in {fp_out}") frame.save( fp=fp_out, format="GIF", append_images=frames, save_all=True, duration=duration, loop=loop, ) def _set_plotting_params(self): """Set the general plotting options, such as plot size, style, etc.""" # Settings: plt.ion() # turn on interactive mode, so programm continues while plotting. plot_size_default = [30, 15] plot_scale = 1.0 # 0.7 works well for the MacBook sns.set_style("dark") sns.set_context("paper", font_scale=1.0) self.save_format = "png" # how the plots should be saved # Colors # self.primary_color = (0.29, 0.45, 0.68, 1.0) # blue #4a73ad self.secondary_color = (0.95, 0.50, 0.20, 1.0) # orange #f28033 self.tertiary_color = (0.30, 0.60, 0.40, 1.0) # green #339966 # Background colors for the plotting groups alpha = 0.75 self.bg_color_one = self.primary_color[:-1] + (alpha,) self.bg_color_two = self.secondary_color[:-1] + (alpha,) self.bg_color_three = self.tertiary_color[:-1] + (alpha,) self.cmap = plt.cm.viridis # primary color map self.cmap2 = plt.cm.cool # secondary color map self.alpha_cmap = utils_plotting._alpha_cmap(self.primary_color) self.bg_color_instruments = (1.0, 1.0, 1.0) self.bg_color_instruments2 = "#ababba" # highlight color of summary plots self.EMA_alpha = 0.2 # Decay factor of the exponential moving avg. # Apply the settings mpl.rcParams["figure.figsize"] = [plot_scale * e for e in plot_size_default] def _read_tracking_results(self, source, discard=None): """Read the tracking results from the JSON file into an internal DataFrame. Args: source (Cockpit or str): ``Cockpit`` instance, or string containing the path to a .json log produced with ``Cockpit.write``, where information will be fetched from. discard (int, optional): Global step after which information should be discarded. Raises: ValueError: If `source` is neither a ``Cockpit```instance or string. """ if isinstance(source, Cockpit): data = source.get_output() elif isinstance(source, str): with open(source + ".json") as f: # defaultdict to be consistent with fetching from Cockpit data = defaultdict(dict, json_tricks.load(f)) else: raise ValueError(f"Source must be Cockpit or path to .json. Got {source}") # Read data into a DataFrame self.tracking_data = pd.DataFrame.from_dict(data, orient="index") # Change data type of index to numeric self.tracking_data.index = pd.to_numeric(self.tracking_data.index) # Sort by this index self.tracking_data = self.tracking_data.sort_index() # Rename index to 'iteration' and store it in seperate column self.tracking_data = self.tracking_data.rename_axis("iteration").reset_index() if discard is not None: self.tracking_data = self.tracking_data[self.tracking_data.index <= discard] def _save( self, savedir, savename, savename_append, savefig_kwargs, screen="primary", ): """Save the (internal) figure to file. Args: savedir (str): Directory where to save the plot. savename (str): Filename of the saved plot. savename_append (str, optional): Optional appendix to the savefile name. Defaults to None. savefig_kwargs (dict, optional): Additional keyword arguments that are passed to `fig.savefig` such as fileformat or dpi. screen (str): String that specifies screen figure should be saved. Possible options are ``'primary'`` and ``'secondary'``. Raises: ValueError: If screen is neither ``primary`` nor ``secondary``. """ if savename_append is None: savename_append = "" else: savename_append = "__" + savename_append file_path = os.path.join(savedir, savename + f"__{screen}" + savename_append) if savefig_kwargs is not None and "format" in savefig_kwargs: file_path += "." + savefig_kwargs["format"] else: file_path += "." + self.save_format if screen == "primary": fig = self.fig elif screen == "secondary": fig = self.secondary_fig else: raise ValueError(f"screen must be 'primary' or 'secondar'y. Got {screen}") print(f"[cockpit|plot] Saving figure in {file_path}") os.makedirs(savedir, exist_ok=True) if savefig_kwargs is None: fig.savefig(file_path) else: fig.savefig(file_path, **savefig_kwargs) def _post_process_plot(self, plot_title): """Process the plotting figure, by adding a title, legend, etc.""" # Set Title if not plot_title: plot_title = ( "Cockpit for " + self.optimizer if self.optimizer else "Cockpit" ) self.fig.suptitle(plot_title, fontsize="xx-large", fontweight="bold") def _plot_secondary_screen(self): """Plot a second figure with experimental quantities.""" if not hasattr(self, "secondary_fig"): self.secondary_fig = plt.figure( "Secondary screen", constrained_layout=False ) self.secondary_fig.clf() secondary_outer_widths = [1] secondary_outer_heights = [1, 3] self.secondary_grid_spec = self.secondary_fig.add_gridspec( ncols=1, nrows=2, width_ratios=secondary_outer_widths, height_ratios=secondary_outer_heights, wspace=0.1, hspace=0.1, ) self.__set_ax_auxiliary(self.secondary_grid_spec[0, 0]) self._plot_auxiliary(self.secondary_grid_spec[0, 0]) self.__set_ax_layerwise(self.secondary_grid_spec[1, 0]) self._plot_layerwise(self.secondary_grid_spec[1, 0]) def __set_ax_auxiliary(self, grid_spec): """Use grid_spec with a "dummy plot" to set Group title and color.""" self.ax_auxiliary = self.secondary_fig.add_subplot(grid_spec) self.ax_auxiliary.set_title("AUXILIARY", fontweight="bold", fontsize="x-large") self.ax_auxiliary.set_facecolor(self.bg_color_one) self.ax_auxiliary.set_xticklabels([]) self.ax_auxiliary.set_yticklabels([]) def _plot_auxiliary(self, grid_spec): """Plot auxiliary quantities to the secondary screen.""" # Build inner structure of this plotting group # We use additional "dummy" gridspecs to position the instruments self.gs_auxiliary = grid_spec.subgridspec( 3, 7, width_ratios=[0.05, 1, 0.05, 1, 0.05, 1, 0.05], height_ratios=[0.0, 1, 0.0], hspace=self.inner_hspace, ) # plot mean GS NR instruments.mean_gsnr_gauge(self, self.secondary_fig, self.gs_auxiliary[1, 1]) instruments.cabs_gauge(self, self.secondary_fig, self.gs_auxiliary[1, 3]) instruments.early_stopping_gauge( self, self.secondary_fig, self.gs_auxiliary[1, 5] ) def __set_ax_layerwise(self, grid_spec): """Use grid_spec with a "dummy plot" to set Group title and color.""" self.ax_layerwise = self.secondary_fig.add_subplot(grid_spec) self.ax_layerwise.set_title("LAYERWISE", fontweight="bold", fontsize="x-large") self.ax_layerwise.set_facecolor(self.bg_color_two) self.ax_layerwise.set_xticklabels([]) self.ax_layerwise.set_yticklabels([]) def _plot_layerwise(self, grid_spec, fig=None): """Plot layerwise 2d histograms to the secondary screen.""" # Build inner structure try: param_groups = int( self.tracking_data[["param_groups"]].dropna().tail(1).to_numpy() ) except KeyError: warnings.warn("Cannot create layerwise plots (missing 'param_groups')") return def get_layout(num_plots, min_rows=2, min_cols=2): """Step-wise increase rows and columns until they can fit all plots.""" dims = [min_rows, min_cols] increase_next = 0 while dims[0] * dims[1] < num_plots: dims[increase_next] = dims[increase_next] + 1 increase_next = (increase_next + 1) % 2 return dims num_rows, num_cols = get_layout(param_groups) self.gs_layerwise = grid_spec.subgridspec( 2 * num_rows + 1, 2 * num_cols + 1, width_ratios=num_cols * [0.05, 1] + [0.05], height_ratios=num_rows * [0.0, 1] + [0.0], hspace=self.inner_hspace, ) def to_grid(idx): """Map one-dimension index to coordinates in 2d layout. Need to take into account the padding around actual plots. Args: idx (int): One-dimensional index. Returns: tupel: Tupel of x, y coordinates of index in 2d layout. """ assert 0 <= idx < param_groups x_unpadded, y_unpadded = divmod(idx, num_cols) return 2 * x_unpadded + 1, 2 * y_unpadded + 1 if fig is None: fig = self.secondary_fig for idx in range(param_groups): x, y = to_grid(idx) instruments.histogram_2d_gauge(self, fig, self.gs_layerwise[x, y], idx=idx)