Source code for chemicalchecker.util.plot.diagnosticsplot

"""Diagnostics CC plots."""
import os
import pickle
import collections
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import cm
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from matplotlib.font_manager import FontProperties
from .util import coord_color, set_style, homogenous_ticks

from chemicalchecker.util import logged
#from chemicalchecker.util.decorator import safe_return

set_style()

pad_factor = 0


[docs]@logged class DiagnosisPlot(object): """DiagnosisPlot class.""" def __init__(self, diag): """Initialize a DiagnosisPlot instance. The plotter works on data precomputed using :mod:`~chemicalchecker.core.diagnostics`. Args: diag (Diagnosis): A Diagnosis object. """ self.diag = diag @staticmethod def _get_ax(ax): if ax is None: fig, ax = plt.subplots(1, 1, figsize=(5, 5)) return ax def _get_color(self, color): if color is None: return coord_color(self.diag.sign.dataset) else: return color @staticmethod def _categorical_colors(n): norm = mpl.colors.Normalize(vmin=1., vmax=n) cmap = cm.get_cmap("tab20b") colors = cmap(norm([i + 1 for i in range(0, n)])) return colors def load_diagnosis_pickle(self, fn): with open(os.path.join(self.diag.path, fn), "rb") as f: results = pickle.load(f) return results def available(self): d = { "across_coverage": "Coverage of other CC signatures", "across_roc": "ROC against other CC signatures", "atc_roc": "ROC for the ATC CC space (E1)", "cosine_distances": "Cosine distance distribution", "cross_coverage": "Coverage of another signature", "cross_pr": "PR curve against another signature", "cross_roc": "ROC curve against another signature", "cluster_sizes": "Size of identified clusters", "clusters_projection": "Projection of the clusters", "dimensions": "Latent dimensions", "euclidean_distances": "Euclidean distance distribution", "features_bins": "Binned features values", "features_iqr": "IQR of the features values", "global_ranks_agreement": "Agreement between similarity ranks and a CC consensus signature", "global_ranks_agreement_projection": "Projections of global ranks agreements", "image": "Signature seen as a heatmap", "intensities": "Intensities of the signatures", "intensities_projection": "Projection with intensities", "confidence": "Confidence of the signatures", "confidence_projection": "Projection with confidences", "keys_bins": "Binned keys values", "key_coverage": "Dataset coverage of the keys across the CC", "key_coverage_projection": "Projections with coverage of keys across the CC", "keys_iqr": "IQR of the keys values", "moa_roc": "ROC for the MoA CC space (B1)", "roc": "ROC for any specified space", "orthogonality": "Orthogonality of features", "outliers": "Detected outliers", "pr": "Precision recall for any specified space", "projection": "tSNE 2D projection", "ranks_agreement": "Agreement between similarity ranks across the CC", "ranks_agreement_projection": "Projections of ranks agreements", "redundancy": "Redundant keys", "values": "Values distibution of the signature" } R = [] for k in sorted(d.keys()): R += [(k, d[k])] df = pd.DataFrame(R, columns=["method", "description"]) return df # @safe_return(None) def cross_coverage(self, results=None, sign_qualified_name=None, ax=None, title=None, color=None): ax = self._get_ax(ax) color = self._get_color(color) fn = os.path.join(self.diag.path, "cross_coverage_%s.pkl" % sign_qualified_name) if results is None: results = self.load_diagnosis_pickle(fn) ax.bar([0, 1], [results["my_overlap"], results["vs_overlap"]], hatch="////", edgecolor=color, lw=2, color="white") ax.set_ylim(0, 1.05) ax.set_ylabel("Overlap") ax.set_xticks([0, 1]) ax.set_xticklabels(["T / R", "R / T"]) if title is None: title = "T = %s | R = %s" % (self.diag.sign.qualified_name, sign_qualified_name) ax.set_title(title) def _roc(self, ax, results, color, dataset_code=None, alpha=0.25, label=None, xylabels=True): step = 0.001 fpr = np.arange(0, 1 + step, step) tpr = np.interp(fpr, results["fpr"], results["tpr"]) if color is None: color = coord_color(dataset_code) ax.plot(fpr, tpr, color=color, label=label) ax.plot([0, 1], [0, 1], color="gray", linestyle="--") ax.fill_between(fpr, tpr, color=color, alpha=alpha) ax.set_xlim(-0.05, 1.05) ax.set_ylim(-0.05, 1.05) if xylabels: ax.set_xlabel("FPR") ax.set_ylabel("TPR") return ax def _pr(self, ax, results, color, dataset_code=None, alpha=0.25, label=None, xylabels=True): recall = results["recall"] precision = results["precision"] if color is None: color = coord_color(dataset_code) ax.plot(recall, precision, color=color, label=label) ax.axhline(0.5, color="gray", linestyle="--") ax.fill_between(recall, precision, color=color, alpha=alpha) ax.set_xlim(-0.05, 1.05) ax.set_ylim(-0.05, 1.05) if xylabels: ax.set_xlabel("Recall") ax.set_ylabel("Precision") return ax def _pr(self, ax, results, color, dataset_code=None, alpha=0.25, label=None): recall = results["recall"] precision = results["precision"] if color is None: color = coord_color(dataset_code) ax.plot(recall, precision, color=color, label=label) ax.axhline(0.5, color="gray", linestyle="--") ax.fill_between(recall, precision, color=color, alpha=alpha) ax.set_xlim(-0.05, 1.05) ax.set_ylim(-0.05, 1.05) ax.set_xlabel("Recall") ax.set_ylabel("Precision") return ax # @safe_return(None) def cross_roc(self, results=None, sign=None, ax=None, title=None, color=None, xylabels=True): ax = self._get_ax(ax) color = self._get_color(color) fn = os.path.join(self.diag.path, "cross_roc_%s.pkl" % sign.qualified_name) if results is None: results = self.load_diagnosis_pickle(fn) ax = self._roc(ax, results, color, xylabels=xylabels) if title != False: if title is None: title = "%s | %s (%.3f)" % (self.diag.sign.qualified_name, sign.qualified_name, results["auc"]) ax.set_title(title) return ax # @safe_return(None) def cross_pr(self, results=None, sign=None, ax=None, title=None, color=None, xylabels=True): ax = self._get_ax(ax) color = self._get_color(color) fn = os.path.join(self.diag.path, "cross_roc_%s.pkl" % sign.qualified_name) if results is None: results = self.load_diagnosis_pickle(fn) ax = self._pr(ax, results, color, xylabels=xylabels) if title != False: if title is None: title = "%s | %s (%.3f)" % (self.diag.sign.qualified_name, sign.qualified_name, results["average_precision_score"]) ax.set_title(title) return ax # @safe_return(None) def atc_roc(self, results=None, ax=None, title=None, color=None, xylabels=True): ax = self._get_ax(ax) if color is None: color = coord_color("E1.001") if results is None: results = self.load_diagnosis_pickle("atc_roc.pkl") ax = self._roc(ax, results, color, xylabels=xylabels) if title != False: if title is None: title = "ATC (%.3f)" % results["auc"] ax.set_title(title) return ax # @safe_return(None) def moa_roc(self, results=None, ax=None, title=None, color=None, xylabels=True): ax = self._get_ax(ax) if color is None: color = coord_color("B1.001") if results is None: results = self.load_diagnosis_pickle("moa_roc.pkl") ax = self._roc(ax, results, color, xylabels=xylabels) if title != False: if title is None: title = "MoA (%.3f)" % results["auc"] ax.set_title(title) return ax # @safe_return(None) def roc(self, ds, results=None, ax=None, title=None): ax = self._get_ax(ax) color = coord_color(ds) if results is None: results = self.load_diagnosis_pickle("roc.pkl") ax = self._roc(ax, results, color) if title is None: title = "AUROC (%.3f)" % results["auc"] ax.set_title(title) return ax # @safe_return(None) def pr(self, ds, results=None, ax=None, title=None): ax = self._get_ax(ax) color = coord_color(ds) if results is None: results = self.load_diagnosis_pickle("pr.pkl") ax = self._pr(ax, results, color) if title is None: title = "Average precision score (%.3f)" % results["average_precision_score"] ax.set_title(title) return ax # @safe_return(None) def neigh_roc(self, ds, results=None, ax=None, title=None): ax = self._get_ax(ax) color = coord_color(ds) if results is None: results = self.load_diagnosis_pickle("neigh_roc.pkl") for nn, res in results.items(): if res is None: continue ax = self._roc( ax, res, color, label='NN {:<5} (AUC {:.3f})'.format(nn, res['auc'])) ax.legend() return ax # @safe_return(None) def image(self, results=None, ax=None, title=None, cmap="coolwarm", cap_percentile=None): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("image.pkl") X = results["X"] eps = cap_percentile if eps is None: eps = np.clip(np.std(X), 0, 1) ax.imshow(X, cmap=cmap, aspect="auto", vmin=np.percentile(X, eps), vmax=np.percentile(X, 100 - eps), interpolation='nearest') if title is None: title = "Image" ax.set_ylabel("Keys") ax.set_xlabel("Features") ax.set_title(title) ax.grid(True) return ax def _proj_lims(self, P): xlim = [np.min(P[:, 0]), np.max(P[:, 0])] ylim = [np.min(P[:, 1]), np.max(P[:, 1])] xscale = (xlim[1] - xlim[0]) * 0.05 yscale = (ylim[1] - ylim[0]) * 0.05 xlim[0] -= xscale xlim[1] += xscale ylim[0] -= yscale ylim[1] += yscale return xlim, ylim # @safe_return(None) def projection(self, results=None, ax=None, density=True, color=None, title=None, focus_keys=None): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("projection.pkl") P = results["P"] x = P[:, 0] y = P[:, 1] if density: from scipy.stats import gaussian_kde xy = np.vstack([x, y]) z = gaussian_kde(xy)(xy) idx = z.argsort() x, y, z = x[idx], y[idx], z[idx] ax.scatter(x, y, c=z, s=10, edgecolor=None) else: color = self._get_color(color) ax.scatter(x, y, s=10, color=color, alpha=0.5) if focus_keys is not None: focus_keys = list(set(focus_keys) & set(results["keys"])) self.__log.debug("%d focus keys found" % len(focus_keys)) focus_idxs = np.isin(results["keys"], focus_keys) P_focus = P[focus_idxs] else: P_focus = None if P_focus is not None: x = P_focus[:, 0] y = P_focus[:, 1] ax.scatter(x, y, edgecolor="black", color="white") xlim, ylim = self._proj_lims(P) ax.set_xlim(xlim) ax.set_ylim(ylim) homogenous_ticks(ax, 4) ax.set_xlabel("t-SNE 1") ax.set_ylabel("t-SNE 2") if title is None: title = "2D projection" ax.set_title(title) return ax def _distance_distribution(self, results, ax=None, color=None): """Distance distribution plot""" ax = self._get_ax(ax) color = self._get_color(color) dists = results["dists"] sns.kdeplot(dists, ax=ax, fill=True, color=color, alpha=0.25, linewidth=1.5) ax.set_ylabel("Density") ax.set_yticklabels([]) return ax # @safe_return(None) def euclidean_distances(self, results=None, ax=None, color=None, title=None): if results is None: results = self.load_diagnosis_pickle("euclidean_distances.pkl") ax = self._distance_distribution(results, ax=ax, color=color) ax.set_xlabel("Euclidean") if title is None: title = "Euclidean dist." ax.set_title(title) # @safe_return(None) def cosine_distances(self, results=None, ax=None, color=None, title=None): if results is None: results = self.load_diagnosis_pickle("cosine_distances.pkl") ax = self._distance_distribution(results, ax=ax, color=color) if title is None: title = "Cosine dist." ax.set_title(title) ax.set_xlabel("Cosine") # @safe_return(None) def values(self, results=None, ax=None, s=1, cmap="coolwarm", title=None): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("values.pkl") x = results["x"] y = results["y"] ax.scatter(x, y, c=x, cmap=cmap, s=s) ax.set_xlabel("Value") ax.set_ylabel("Density") if title is None: title = "Values distr." ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) ax.set_yticklabels([]) def _iqr(self, results, ax): ax = self._get_ax(ax) p50 = results["p50"] idxs = np.argsort(-p50) p25 = results["p25"][idxs] p50 = p50[idxs] p75 = results["p75"][idxs] x = [i for i in range(len(p50))] ax.scatter(x, p50, c=p50, cmap="Spectral", s=10, zorder=100) ax.fill_between(x, p75, p25, color="lightgray", alpha=0.5, zorder=1) ax.set_ylabel("Value") ax.axhline(0, color="black", lw=1) return ax # @safe_return(None) def features_iqr(self, results=None, ax=None, title=None): if results is None: results = self.load_diagnosis_pickle("features_iqr.pkl") ax = self._iqr(results, ax=ax) ax.set_xlabel("Features (sorted)") if title is None: title = "Values by feat." ax.set_title(title) # @safe_return(None) def keys_iqr(self, results=None, ax=None, title=None): if results is None: results = self.load_diagnosis_pickle("keys_iqr.pkl") ax = self._iqr(results, ax=ax) ax.set_xlabel("Keys (sorted)") if title is None: title = "Values by key" ax.set_title(title) def _bins(self, results, ax, scaling, cmap): ax = self._get_ax(ax) H = results["H"] p50 = results["p50"] bins = results["bins"] idxs = np.argsort(-p50) p50 = p50[idxs] x_ = [i + 1 for i in range(0, H.shape[1])] y_ = [(bins[i - 1] + bins[i]) / 2 for i in range(1, len(bins))] x = np.array(x_ * H.shape[0]) y = np.array([[yy] * H.shape[1] for yy in y_]).ravel() v = H.ravel() ax.scatter(x, y, c=v, s=np.sqrt(v) / scaling, cmap=cmap, alpha=1, zorder=1) ax.set_xlabel("Value") return ax # @safe_return(None) def features_bins(self, results=None, ax=None, title=None, scaling=30, cmap="coolwarm"): if results is None: results = self.load_diagnosis_pickle("features_bins.pkl") ax = self._bins(results, ax=ax, scaling=scaling, cmap=cmap) ax.set_xlabel("Features (sorted)") if title is None: title = "Values by feature" ax.set_title(title) # @safe_return(None) def keys_bins(self, results=None, ax=None, title=None, scaling=30, cmap="coolwarm"): if results is None: results = self.load_diagnosis_pickle("keys_bins.pkl") ax = self._bins(results, ax=ax, scaling=scaling, cmap=cmap) ax.set_xlabel("Keys (sorted)") if title is None: title = "Values by key" ax.set_title(title) def _across(self, values, datasets, ax, title, exemplary, cctype, molset, vertical=False, numeral_marker=True): from matplotlib.path import Path from matplotlib.textpath import TextPath ax = self._get_ax(ax) datasets = np.array(datasets) values = np.array(values) idxs = np.array(list(pd.DataFrame( {"ds": datasets, "vl": -values}).sort_values(["vl", "ds"]).index) ).astype(np.int) datasets = datasets[idxs] values = values[idxs] colors = np.array([coord_color(ds) for ds in datasets]) x = np.array([i + 1 for i in range(0, len(values))]) if numeral_marker: top_pad = 0.03 if vertical: for num in ['1', '2', '3', '4', '5']: fp = FontProperties(family="monospace", weight='bold', style='normal', size=16) path_num = TextPath((0, 0), num, size=50, prop=fp,) path_num_centered = Path(path_num.vertices - path_num.vertices.mean(axis=0)) mask = np.array([ds[1] == num for ds in datasets]) ax.scatter(values[mask]+top_pad, x[mask], color=colors[mask], marker=path_num_centered, linewidths=0.02, edgecolors='k', zorder=2) else: for num in ['1', '2', '3', '4', '5']: fp = FontProperties(family="monospace", weight='bold', style='normal', size=16) path_num = TextPath((0, 0), num, size=50, prop=fp) path_num_centered = Path(path_num.vertices - path_num.vertices.mean(axis=0)) mask = np.array([ds[1] == num for ds in datasets]) ax.scatter(x[mask], values[mask]+top_pad, color=colors[mask], marker=path_num_centered, linewidths=0.04, edgecolors='k', zorder=2) else: if vertical: ax.scatter(values, x, color=colors, zorder=1) else: ax.scatter(x, values, color=colors, zorder=1) for i, x_ in enumerate(x): if vertical: ax.plot([-1, values[i]], [x_, x_], color=colors[i], zorder=1) else: ax.plot([x_, x_], [-1, values[i]], color=colors[i], zorder=1) if title is None: title = "%s | %s_%s" % ( self.diag.sign.qualified_name, cctype, molset) ax.set_title(title) if vertical: ax.set_yticks(x) ax.set_yticklabels([ds[1] for ds in datasets]) ax.set_ylabel("Datasets") else: ax.set_xticks(x) ax.set_xticklabels([ds[1] for ds in datasets]) ax.set_xlabel("Datasets") return ax # @safe_return(None) def across_coverage(self, results=None, ax=None, title=None, exemplary=True, cctype="sign1", molset="full", vs=True, numeral_marker=False): if results is None: results = self.load_diagnosis_pickle("across_coverage.pkl") datasets = [] covs = [] if vs: pref = "vs" else: pref = "my" for k, v in results.items(): datasets += [k] covs += [v["%s_overlap" % pref]] ax = self._across(covs, datasets, ax=ax, title=title, exemplary=exemplary, cctype=cctype, molset=molset, numeral_marker=numeral_marker) ax.set_ylabel("Coverage") if vs: ax.set_ylim(-np.max(covs) * 0.05, np.min([1.05, np.max(covs) * 1.1])) else: ax.set_ylim(-0.05, 1.05) if title is None: if vs: title = "CC wrt Sign" else: title = "Sign wrt CC" ax.set_title(title) # @safe_return(None) def across_roc(self, results=None, ax=None, title=None, exemplary=True, cctype="sign1", molset="full", vertical=False, numeral_marker=False): if results is None: results = self.load_diagnosis_pickle("across_roc.pkl") datasets = [] rocs = [] for k, v in results.items(): if v is None: continue datasets += [k] rocs += [v["auc"]] ax = self._across(rocs, datasets, ax=ax, title=title, exemplary=exemplary, cctype=cctype, molset=molset, vertical=vertical, numeral_marker=numeral_marker) if vertical: ax.set_xlabel("ROC-AUC") ax.set_xlim(0.45, 1.05) else: ax.set_ylabel("ROC-AUC") ax.set_ylim(0.45, 1.05) if title is None: title = "ROC across CC" ax.set_title(title) # @safe_return(None) def dimensions(self, results=None, ax=None, title=None, exemplary=True, cctype="sign1", molset="full", highligth=True): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("dimensions.pkl") datasets = [] colors = [] x = [] y = [] for k, v in results.items(): if k == "MY": continue datasets += [k] colors += [coord_color(k)] y += [v["keys"]] x += [v["features"]] x = np.log10(x) y = np.log10(y) ax.scatter(x, y, color=colors) # max_x = np.max(x) # max_y = np.max(y) v = results["MY"] y = [v["keys"]] x = [v["features"]] x = np.log10(x) y = np.log10(y) if highligth: ax.scatter(x, y, color="white", edgecolor="black", s=80) ax.set_xlabel("Features (log10)") ax.set_ylabel("Keys (log10)") if title is None: title = "Keys: %d / Feat: %d" % ( v["keys"], v["features"]) ax.set_title(title) return ax # @safe_return(None) def redundancy(self, results=None, ax=None, title=None): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("redundancy.pkl") counts = results["counts"] x = [i + 1 for i in range(0, len(counts))] y = [np.log10(c[1]) for c in counts] ax.scatter(x, y, c=y, cmap="Spectral", s=10, zorder=100) if title is None: title = "Redund. (%.1f%%)" % ( 100 - results["n_ref"] / results["n_full"] * 100) yticks = sorted(set(np.array(ax.get_yticks(), np.int))) if len(yticks) == 1: yticks = [0, 1] ax.set_yticks(yticks) ax.set_xlabel("Non-red. keys") ax.set_ylabel("Red. keys (log10)") ax.set_title(title) ax.set_ylim(-0.1, max(1, np.max(y)) + 0.1) return ax # @safe_return(None) def cluster_sizes(self, results=None, ax=None, max_clusters=20, s=5, show_outliers=False, title=None): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("clusters.pkl") y = np.array([r[1] for r in results["lab_counts"] if r[0] != -1]) / results["P"].shape[0] y = np.cumsum(y) x = [i + 1 for i in range(0, len(y))] xticks = [1, max_clusters, max_clusters * 2] xticklabels = [1, max_clusters, len(x)] # plot first part y_ = y[:max_clusters] x_ = x[:max_clusters] colors = self._categorical_colors(len(x_)) ax.scatter(x_, y_, color=colors, zorder=100, s=s) if len(x) > max_clusters: ax.axvline(max_clusters, color="gray", lw=1, linestyle="--") # plot second part y_ = y[max_clusters:] if len(y_) > 0: xmax = max_clusters * 2 x_ = list(np.linspace(max_clusters + 1 / len(y_), xmax, len(y_))) ax.plot(x_, y_, color="gray", zorder=10) if show_outliers: ax.plot([xmax, xmax], [np.max(y), 1], lw=1, color="red") ax.set_xticks(xticks) ax.set_xticklabels(xticklabels) ax.set_ylim(-0.05, 1.05) ax.set_xlabel("Clusters") ax.set_ylabel("Prop. of keys") if title is None: title = "Cluster sizes (%d)" % results["n_clusters"] ax.set_title(title) return ax # @safe_return(None) def clusters_projection(self, results=None, ax=None, max_clusters=20, s=1, show_beyond=False, show_outliers=False, title=None): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("clusters.pkl") P = results["P"] labels = results["labels"] labs = [r[0] for r in results["lab_counts"] if r[0] != -1] labs_ = labs[:max_clusters] colors = self._categorical_colors(len(labs_)) for lab, col in zip(labs_, colors): mask = labels == lab ax.scatter(P[mask, 0], P[mask, 1], color=col, s=s, zorder=3) if show_beyond: for lab in labs: if lab in labs_: continue mask = labels == lab ax.scatter(P[mask, 0], P[mask, 1], color="gray", s=s, zorder=2) if show_outliers: mask = labels == -1 ax.scatter(P[mask, 0], P[mask, 1], color="red", s=s, zorder=1) xlim, ylim = self._proj_lims(P) ax.set_xlim(xlim) ax.set_ylim(ylim) homogenous_ticks(ax, 2) ax.set_ylabel("t-SNE 2") ax.set_xlabel("t-SNE 1") if title is None: title = "Top clusters" ax.set_title(title) return ax # @safe_return(None) def intensities(self, results=None, ax=None, s=1, title=None, cmap="Spectral"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("intensities.pkl") x = results["x"] y = results["y"] vmin = np.min(x) vmax = np.max(x) pad = (vmax - vmin) * pad_factor ax.scatter(x, y, c=x, cmap=cmap, s=s, vmin=vmin + pad, vmax=vmax - pad) ax.set_xlabel("Intensity") ax.set_ylabel("Density") if title is None: title = "Intensities" ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) ax.set_yticklabels([]) return ax # @safe_return(None) def confidences(self, results=None, ax=None, s=1, title=None, cmap="Spectral"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("confidences.pkl") x = results["x"] y = results["y"] vmin = np.min(x) vmax = np.max(x) pad = (vmax - vmin) * pad_factor ax.scatter(x, y, c=x, cmap=cmap, s=s, vmin=vmin + pad, vmax=vmax - pad) ax.set_xlabel("Confidence") ax.set_ylabel("Density") if title is None: title = "Confidence" ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) return ax def _binned_projection(self, ax, results, cmap, s, vmin=None, vmax=None): ax = self._get_ax(ax) H = results["H"] S = results["S"] bins_x = results["bins_x"] bins_y = results["bins_y"] # cmap scores = results["scores"] if vmin is None: vmin = np.min(scores) if vmax is None: vmax = np.max(scores) pad = (vmax - vmin) * pad_factor norm = mpl.colors.Normalize( vmin=vmin + pad_factor, vmax=vmax - pad_factor) cmap = cm.get_cmap(cmap) x = [] y = [] z = [] v = [] for j in range(0, len(bins_x)): for i in range(0, len(bins_y)): if H[i, j] == 0: continue x += [bins_x[j]] y += [bins_y[i]] z += [S[i, j]] v += [H[i, j]] x = np.array(x) y = np.array(y) z = np.array(z) v = np.array(v) idxs = np.argsort(-v) x = x[idxs] y = y[idxs] z = z[idxs] v = v[idxs] colors = cmap(norm(z)) v = v / np.max(v) #ax.scatter(x,y,color=colors, s=np.sqrt(v)*s) ax.scatter(x, y, c=z, cmap=cmap, s=np.sqrt(v) * s) xlim, ylim = self._proj_lims(results["lims"]) ax.set_xlim(xlim) ax.set_ylim(ylim) homogenous_ticks(ax, 2) ax.set_xlabel("t-SNE 1") ax.set_ylabel("t-SNE 2") return ax # @safe_return(None) def intensities_projection(self, results=None, ax=None, s=10, title=None, cmap="Spectral"): if results is None: results = self.load_diagnosis_pickle("intensities_projection.pkl") ax = self._binned_projection(ax, results, cmap, s) if title is None: title = "Intensities" ax.set_title(title) return ax # @safe_return(None) def confidences_projection(self, results=None, ax=None, s=10, title=None, cmap="Spectral"): if results is None: results = self.load_diagnosis_pickle("confidences_projection.pkl") ax = self._binned_projection(ax, results, cmap, s) if title is None: title = "Confidence" ax.set_title(title) return ax # @safe_return(None) def key_coverage(self, results=None, ax=None, exemplary=True, s=5, title=None, cmap="Spectral"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("key_coverage.pkl") counts = collections.defaultdict(int) maxv = 0 for k, v in results["counts"].items(): counts[v] += 1 maxv = np.max([maxv, v]) if exemplary: maxv = 25 x = np.arange(0, maxv + 1) y = np.array([counts[x_] for x_ in x]) y = y / np.sum(y) vmin = 0 vmax = np.max(x) pad = (vmax - vmin) * pad_factor norm = mpl.colors.Normalize(vmin=vmin + pad, vmax=vmax - pad) cmap = cm.get_cmap(cmap) colors = cmap(norm(x)) ax.scatter(x, y, color=colors, s=5) for i, x_ in enumerate(x): y_ = y[i] ax.plot([x_, x_], [0, y_], color=colors[i]) ax.set_ylabel("Prop. keys") ax.set_xlabel("Datasets") if title is None: title = "Key coverage" ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) return ax # @safe_return(None) def key_coverage_projection(self, results=None, ax=None, exemplary=True, s=10, title=None, cmap="coolwarm"): if results is None: results = self.load_diagnosis_pickle("key_coverage_projection.pkl") if exemplary: vmin = 0 vmax = 25 else: vmin = None vmax = None ax = self._binned_projection( ax, results, cmap, s, vmin=vmin, vmax=vmax) if title is None: title = "Key coverage" ax.set_title(title) return ax # @safe_return(None) def ranks_agreement(self, results=None, ax=None, stat="mean", s=1, title=None, exemplary=True, cctype="sign1", molset="full", cmap="Spectral"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("ranks_agreement.pkl") scores = results[stat] scores = scores[~np.isnan(scores)] kernel = gaussian_kde(scores) x = np.linspace(np.min(scores), np.max(scores), 1000) y = kernel(x) vmin = np.min(x) vmax = np.max(x) pad = (vmax - vmin) * pad_factor ax.scatter(x, y, c=x, cmap=cmap, s=s, vmin=vmin + pad, vmax=vmax - pad) ax.set_xlabel("RBO") ax.set_ylabel("Density") if title is None: title = "CC ranks agree." ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) ax.set_yticklabels([]) return ax # @safe_return(None) def ranks_agreement_projection(self, results=None, ax=None, s=10, title=None, cmap="Spectral"): if results is None: results = self.load_diagnosis_pickle( "ranks_agreement_projection.pkl") ax = self._binned_projection(ax, results, cmap, s) if title is None: title = "CC ranks agree." ax.set_title(title) return ax # @safe_return(None) def global_ranks_agreement(self, results=None, ax=None, stat="mean", s=1, title=None, cmap="Spectral"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("global_ranks_agreement.pkl") scores = results[stat] scores = scores[~np.isnan(scores)] kernel = gaussian_kde(scores) x = np.linspace(np.min(scores), np.max(scores), 1000) y = kernel(x) vmin = np.min(x) vmax = np.max(x) pad = (vmax - vmin) * pad_factor ax.scatter(x, y, c=x, cmap=cmap, s=s, vmin=vmin + pad, vmax=vmax - pad) ax.set_xlabel("RBO") ax.set_ylabel("Density") if title is None: title = "CC ranks agree." ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) ax.set_yticklabels([]) return ax # @safe_return(None) def global_ranks_agreement_projection(self, results=None, ax=None, s=10, title=None, cmap="Spectral"): if results is None: results = self.load_diagnosis_pickle( "global_ranks_agreement_projection.pkl") ax = self._binned_projection(ax, results, cmap, s) if title is None: title = "CC ranks agree." ax.set_title(title) return ax # @safe_return(None) def orthogonality(self, results=None, ax=None, title=None, s=1, cmap="coolwarm"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("orthogonality.pkl") scores = results["dots"] kernel = gaussian_kde(scores) x = np.linspace(np.min(scores), np.max(scores), 1000) y = kernel(x) ax.scatter(x, y, c=x, cmap=cmap, s=s, vmin=-1, vmax=1) ax.set_xlabel("Dot product") ax.set_ylabel("Density") if title is None: title = "Orthogonality" ax.set_title(title) ax.set_ylim(0, np.max(y) * 1.05) ax.set_xlim(-1.05, 1.05) ax.set_yticklabels([]) return ax # @safe_return(None) def outliers(self, results=None, ax=None, title=None, s=1, cmap="coolwarm_r"): ax = self._get_ax(ax) if results is None: results = self.load_diagnosis_pickle("outliers.pkl") scs = -results["scores"] pds = results["pred"] idxs = np.argsort(scs) scs = scs[idxs] pds = pds[idxs] x = [i + 1 for i in range(0, len(scs))] norm = mpl.colors.Normalize(vmin=-0.5, vmax=0.5) cmap = cm.get_cmap(cmap) colors = cmap(norm(np.clip(scs, -0.5, 0.5))) ax.scatter(x=x, y=scs, color=colors, s=s) if title is None: title = "Outliers" ax.set_title(title) ax.set_xlabel("Keys") ax.set_ylabel("Anomaly score") ax.axhline(0, color="gray", linestyle="--") xlim = ax.get_xlim() ax.set_xlim(xlim) return ax def legend(self, ax=None, s=10): ax = self._get_ax(ax) colors = [coord_color(x) for x in "ABCDE"] ax.scatter([0] * 5, [1, 2, 3, 4, 5], color=colors) R = [("A", "Chemistry"), ("B", "Targets"), ("C", "Networks"), ("D", "Cells"), ("E", "Clinics")] for i, r in enumerate(R): ax.text(0.1, i + 1, s="%s: %s" % r, va="center") ax.set_axis_off() ax.set_ylim(6, 0) ax.set_xlim(-0.1, 1) ax.set_title("CC levels") return ax def canvas_small(self, title, skip_plots): fig = plt.figure(figsize=(14, 4)) gs = fig.add_gridspec(2, 6, wspace=0.6, hspace=0.6) if "image" not in skip_plots: self.image(ax=fig.add_subplot(gs[0, :2])) if "features_bins" not in skip_plots: self.features_bins(ax=fig.add_subplot(gs[1, 0])) if "keys_bins" not in skip_plots: self.keys_bins(ax=fig.add_subplot(gs[1, 1])) if "values" not in skip_plots: self.values(ax=fig.add_subplot(gs[0, 2])) if "redundancy" not in skip_plots: self.redundancy(ax=fig.add_subplot(gs[1, 2])) if "projection" not in skip_plots: self.projection(ax=fig.add_subplot(gs[0:2, 3:5])) if "euclidean_distances" not in skip_plots: self.euclidean_distances(ax=fig.add_subplot(gs[0, 5])) if "cosine_distances" not in skip_plots: self.cosine_distances(ax=fig.add_subplot(gs[1, 5])) if title is None: title = "%s %s" % (self.diag.sign.dataset, self.diag.sign.cctype) fig.suptitle(title, fontweight="bold", y=1, size='xx-large') plt.close() return fig def canvas_medium(self, title, skip_plots): fig = plt.figure(figsize=(14, 14)) gs = fig.add_gridspec(6, 6, wspace=0.6, hspace=0.6) ax = fig.add_subplot(gs[0, 0]) self.legend(ax=ax) if "redundancy" not in skip_plots: ax = fig.add_subplot(gs[1, 5]) self.redundancy(ax=ax) if "outliers" not in skip_plots: ax = fig.add_subplot(gs[0, 5]) self.outliers(ax=ax) if "values" not in skip_plots: ax = fig.add_subplot(gs[1, 0]) self.values(ax=ax) if "confidences" or "intensities" not in skip_plots: ax = fig.add_subplot(gs[0, 1]) if self.diag.sign.cctype == 'sign3': if "confidences" not in skip_plots: self.confidences(ax=ax) if "confidences_projection" not in skip_plots: ax = fig.add_subplot(gs[0, 2]) self.confidences_projection(ax=ax) else: if "intensities" not in skip_plots: self.intensities(ax=ax) if "intensities_projection" not in skip_plots: ax = fig.add_subplot(gs[0, 2]) self.intensities_projection(ax=ax) if "key_coverage" not in skip_plots: ax = fig.add_subplot(gs[1, 1]) self.key_coverage(ax=ax) if "key_coverage_projection" not in skip_plots: ax = fig.add_subplot(gs[1, 2]) self.key_coverage_projection(ax=ax) if "global_ranks_agreement" not in skip_plots: ax = fig.add_subplot(gs[0, 4]) self.global_ranks_agreement(ax=ax) if "global_ranks_agreement_projection" not in skip_plots: ax = fig.add_subplot(gs[0, 3]) self.global_ranks_agreement_projection(ax=ax) if "cluster_sizes" not in skip_plots: ax = fig.add_subplot(gs[1, 4]) self.cluster_sizes(ax=ax) if "clusters_projection" not in skip_plots: ax = fig.add_subplot(gs[1, 3]) self.clusters_projection(ax=ax) if "euclidean_distances" not in skip_plots: ax = fig.add_subplot(gs[2, 4]) self.euclidean_distances(ax=ax) if "cosine_distances" not in skip_plots: ax = fig.add_subplot(gs[2, 5]) self.cosine_distances(ax=ax) if "features_bins" not in skip_plots: ax = fig.add_subplot(gs[3, 0]) self.features_bins(ax=ax) if "keys_bins" not in skip_plots: ax = fig.add_subplot(gs[3, 1]) self.keys_bins(ax=ax) if "projection" not in skip_plots: ax = fig.add_subplot(gs[2:4, 2:4]) self.projection(ax=ax) if "image" not in skip_plots: ax = fig.add_subplot(gs[2, :2]) self.image(ax=ax) if "moa_roc" not in skip_plots: ax = fig.add_subplot(gs[3, 4]) self.moa_roc(ax=ax) if "atc_roc" not in skip_plots: ax = fig.add_subplot(gs[3, 5]) self.atc_roc(ax=ax) if "dimensions" not in skip_plots: ax = fig.add_subplot(gs[-2:, :2]) self.dimensions(ax=ax) if "across_coverage" not in skip_plots: ax = fig.add_subplot(gs[-2, 2:4]) self.across_coverage(ax=ax, vs=True) ax = fig.add_subplot(gs[-1, 2:4]) self.across_coverage(ax=ax, vs=False) if "across_roc" not in skip_plots: ax = fig.add_subplot(gs[-2:, 4:6]) self.across_roc(ax=ax) if title is None: title = "%s %s" % (self.diag.sign.dataset, self.diag.sign.cctype) fig.suptitle(title, fontweight="bold", y=0.92, size='xx-large') plt.close() return fig def canvas_large(self, title, skip_plots): pass def custom_comparative_vertical(self, title, skip_plots): fig = plt.figure(figsize=(9, 9)) gs = fig.add_gridspec(4, 4, wspace=0.6, hspace=0.6) #plt.figtext(0.02, 0.5, 'TEST', fontsize=12, fontweight="bold") ax = fig.add_subplot(gs[0:2, 0:2]) self.projection(ax=ax) #text = ax.text(-55,45,"A)",fontsize=12, fontweight="bold") ax = fig.add_subplot(gs[2, 1]) self.legend(ax=ax) ax = fig.add_subplot(gs[0, 2]) if self.diag.sign.cctype == 'sign3': self.confidences(ax=ax) else: self.intensities(ax=ax) #text1 = ax.text(-1,1,"B)",fontsize=12, fontweight="bold") ax = fig.add_subplot(gs[0, 3]) self.clusters_projection(ax=ax) #text1 = ax.text(-50,-50,"C)",fontsize=12, fontweight="bold") ax = fig.add_subplot(gs[1, 2]) self.moa_roc(ax=ax) ax = fig.add_subplot(gs[1, 3]) self.atc_roc(ax=ax) ax = fig.add_subplot(gs[2:4, 2:4]) self.across_roc(ax=ax) if title is None: title = "%s %s" % (self.diag.sign.dataset, self.diag.sign.cctype) fig.suptitle(title, fontweight="bold", y=0.95, size='xx-large') plt.close() return fig def canvas(self, size="medium", title=None, skip_plots=[]): self.__log.debug("Plotting Canvas %s" % size) if size == "small": return self.canvas_small(title=title, skip_plots=skip_plots) elif size == "medium": return self.canvas_medium(title=title, skip_plots=skip_plots) elif size == "large": return self.canvas_large(title=title, skip_plots=skip_plots) elif size == "compare_v": return self.custom_comparative_vertical(title=title, skip_plots=skip_plots) else: return None