Source code for chemicalchecker.util.plot.ccstatsplot

"""CC statistics plots for CC web page."""
import os
import copy
import inspect
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import cm
from matplotlib import patches
from .util import canvas, cc_grid, cc_colors, homogenous_ticks, set_style, cc_coords_name

from chemicalchecker.util import logged

set_style()


[docs]@logged class CCStatsPlot(object): """CCStatsPlot class.""" def __init__(self, cc, width=30, height=30, dpi=70, transparent=True, save=True, save_format='png', save_dir='./'): """Initialize a CCStatsPlot instance. The plotter works on data precomputed mainly using :mod:`~chemicalchecker.core.diagnostics`. Args: cc (ChemicalChecker): A ChemicalChecker object. """ self.cc = cc self.width = width self.height = height self.dpi = dpi self.transparent = transparent self.save_format = save_format self.save_dir = save_dir
[docs] def plot_all(self): """Run all plots reported in the 'available' table.""" # TODO: take kwargs dict for each plot for method, _ in self.available().values: self.__log.info('Plotting: %s' % method) eval('self.%s()' % method)
[docs] def available(self): """Resume of possible plots.""" d = { "matrices": "Number of molecules and signature lengths in each of the 25 Chemical Checker datasets. Signature lengths can be read as a measure of complexity or sparsity of the data.", "moa_validations": "Chemical Checker datasets correlates with mechanisms of action (MoA). The receiver-operating characteristic (ROC) curves measure how similar molecules tend to share MoA. Note that the almost-perfect performance in the 'Mechanism of action' dataset is trivial.", "correlations": "Degree of correlation between data types measured as the ability to use one signature (x-axis) to recover neighbors defined with another signature (y-axis). Red denotes high correlation and blue denotes low correlation.", } R = [] for k in sorted(d.keys()): R += [(k, d[k])] df = pd.DataFrame(R, columns=["method", "description"]) return df
def save(self, fig): filename = '%s.%s' % (inspect.stack()[1][3], self.save_format) fig.savefig(os.path.join(self.save_dir, filename), bbox_inches='tight', transparent=self.transparent) def matrices(self, cctype='sign0', molset='full', max_mols=6.5, max_feat=4.5): # create the grid fig, grid = canvas(width=self.width, height=self.height, dpi=self.dpi) axes = cc_grid(fig, grid, legend_out=False, cc_space_names=True, hspace=0.2, wspace=0.2, shared_ylabel='Molecules (log10)', shared_xlabel='Variables (log10)') # check maximum sizes dims = dict() for ds in self.cc.datasets_exemplary(): try: nr_mol, nr_feat = self.cc.metadata[ 'dimensions'][molset][ds][cctype] except Exception as ex: self.__log.error('Cannot fetch cc.metadata: %s' % str(ex)) continue max_mols = np.max([np.log10(nr_mol), max_mols]) max_feat = np.max([np.log10(nr_feat), max_feat]) dims[ds] = (nr_mol, nr_feat) # plot a rectangle for each space for ax, ds in zip(axes, self.cc.datasets_exemplary()): color = cc_colors(ds, lighness=.3, alternate=True, dark_first=True) if ds not in dims: continue nr_mol, nr_feat = dims[ds] rect = patches.Rectangle( (0, 0), np.log10(nr_feat), np.log10(nr_mol), facecolor=color, edgecolor='k', linewidth=1, alpha=0.9) ax.add_patch(rect) ax.set_ylim(0, max_mols) ax.set_xlim(0, max_feat) ax.set_aspect('auto') ax.set_yticks(range(int(np.ceil(max_mols)))) ax.set_xticks(range(int(np.ceil(max_feat)))) if 'E' in ds[:2]: ax.set_xticklabels(range(int(np.ceil(max_feat)))) if '1' in ds[:2]: ax.set_yticklabels(range(int(np.ceil(max_mols)))) # save or return if self.save: self.save(fig) else: return fig def moa_validations(self, cctype='sign1'): fig, grid = canvas(width=self.width, height=self.height, dpi=self.dpi) axes = cc_grid(fig, grid, legend_out=False, cc_space_names=True, hspace=0.2, wspace=0.2, shared_ylabel='True positive rate', shared_xlabel='False positive rate') for ax, ds in zip(axes, self.cc.datasets_exemplary()): color = cc_colors(ds, lighness=.3, alternate=True, dark_first=True) sign = self.cc.signature(ds, cctype) diag = sign.diagnosis(ref_cc=self.cc) try: diag.plotter.moa_roc(title=False, color=color, xylabels=False, ax=ax) except Exception as ex: self.__log.error('Cannot fetch moa_roc: %s' % str(ex)) continue ax.set_aspect('auto') ax.set_xticks([0,0.5,1]) ax.set_xticklabels(['0','.5','1']) ax.set_yticks([0,0.5,1]) ax.set_yticklabels(['0','.5','1']) if 'E' not in ds[:2]: ax.set_xticklabels('') if '1' not in ds[:2]: ax.set_yticklabels('') # save or return if self.save: self.save(fig) else: return fig def correlations(self, cctype='sign1'): rocauc_matrix = np.full((25, 25), np.nan) for row, ds1 in enumerate(self.cc.datasets_exemplary()): sign = self.cc.signature(ds1, cctype) diag = sign.diagnosis(ref_cc=self.cc) try: res = diag._load_diagnosis_pickle('across_roc.pkl') except Exception as ex: self.__log.error('Cannot fetch across_roc: %s' % str(ex)) continue for col, ds2 in enumerate(self.cc.datasets_exemplary()): if ds2 not in res or res[ds2] is None: continue rocauc_matrix[row, col] = res[ds2]['auc'] fig, grid = canvas(width=self.width, height=self.height, dpi=self.dpi) ax = fig.add_subplot(grid[:]) cmap = copy.copy(cm.get_cmap("RdYlBu_r")) cmap.set_bad(".5") names = [cc_coords_name(ds[:2]) for ds in self.cc.datasets_exemplary()] sns.heatmap( rocauc_matrix, vmin=0.5, vmax=1, square=True, mask=np.isnan(rocauc_matrix), xticklabels=names, yticklabels=names, linewidth=1, linecolor='.5', cbar=False, annot=False, cmap=cmap, ax=ax) for i in range(0, 30, 5): ax.hlines(i, 0, 25, color='k', lw=2, capstyle='round').set_clip_on(False) ax.vlines(i, 0, 25, color='k', lw=2, capstyle='round').set_clip_on(False) # save or return if self.save: self.save(fig) else: return fig