Source code for chemicalchecker.tool.targetmate.nonconformist.cp

from .icp import *

# TODO: move contents from nonconformist.icp here

# -----------------------------------------------------------------------------
# TcpClassifier
# -----------------------------------------------------------------------------
[docs]class TcpClassifier(BaseEstimator, ClassifierMixin): """Transductive conformal classifier. Parameters ---------- nc_function : BaseScorer Nonconformity scorer object used to calculate nonconformity of calibration examples and test patterns. Should implement ``fit(x, y)`` and ``calc_nc(x, y)``. smoothing : boolean Decides whether to use stochastic smoothing of p-values. Attributes ---------- train_x : numpy array of shape [n_cal_examples, n_features] Inputs of training set. train_y : numpy array of shape [n_cal_examples] Outputs of calibration set. nc_function : BaseScorer Nonconformity scorer object used to calculate nonconformity scores. classes : numpy array of shape [n_classes] List of class labels, with indices corresponding to output columns of TcpClassifier.predict() See also -------- IcpClassifier References ---------- .. [1] Vovk, V., Gammerman, A., & Shafer, G. (2005). Algorithmic learning in a random world. Springer Science & Business Media. Examples -------- >>> import numpy as np >>> from sklearn.datasets import load_iris >>> from sklearn.svm import SVC >>> from nonconformist.base import ClassifierAdapter >>> from nonconformist.cp import TcpClassifier >>> from nonconformist.nc import ClassifierNc, MarginErrFunc >>> iris = load_iris() >>> idx = np.random.permutation(iris.target.size) >>> train = idx[:int(idx.size / 2)] >>> test = idx[int(idx.size / 2):] >>> model = ClassifierAdapter(SVC(probability=True)) >>> nc = ClassifierNc(model, MarginErrFunc()) >>> tcp = TcpClassifier(nc) >>> tcp.fit(iris.data[train, :], iris.target[train]) >>> tcp.predict(iris.data[test, :], significance=0.10) ... # doctest: +SKIP array([[ True, False, False], [False, True, False], ..., [False, True, False], [False, True, False]], dtype=bool) """ def __init__(self, nc_function, condition=None, smoothing=True): self.train_x, self.train_y = None, None self.nc_function = nc_function super(TcpClassifier, self).__init__() # Check if condition-parameter is the default function (i.e., # lambda x: 0). This is so we can safely clone the object without # the clone accidentally having self.conditional = True. default_condition = lambda x: 0 is_default = (callable(condition) and (condition.__code__.co_code == default_condition.__code__.co_code)) if is_default: self.condition = condition self.conditional = False elif callable(condition): self.condition = condition self.conditional = True else: self.condition = lambda x: 0 self.conditional = False self.smoothing = smoothing self.base_icp = IcpClassifier( self.nc_function, self.condition, self.smoothing ) self.classes = None def fit(self, x, y): self.train_x, self.train_y = x, y self.classes = np.unique(y)
[docs] def predict(self, x, significance=None): """Predict the output values for a set of input patterns. Parameters ---------- x : numpy array of shape [n_samples, n_features] Inputs of patters for which to predict output values. significance : float or None Significance level (maximum allowed error rate) of predictions. Should be a float between 0 and 1. If ``None``, then the p-values are output rather than the predictions. Returns ------- p : numpy array of shape [n_samples, n_classes] If significance is ``None``, then p contains the p-values for each sample-class pair; if significance is a float between 0 and 1, then p is a boolean array denoting which labels are included in the prediction sets. """ n_test = x.shape[0] n_train = self.train_x.shape[0] p = np.zeros((n_test, self.classes.size)) for i in range(n_test): for j, y in enumerate(self.classes): train_x = np.vstack([self.train_x, x[i, :]]) train_y = np.hstack([self.train_y, y]) self.base_icp.fit(train_x, train_y) scores = self.base_icp.nc_function.score(train_x, train_y) ngt = (scores[:-1] > scores[-1]).sum() neq = (scores[:-1] == scores[-1]).sum() p[i, j] = calc_p(n_train, ngt, neq, self.smoothing) if significance is not None: return p > significance else: return p
[docs] def predict_conf(self, x): """Predict the output values for a set of input patterns, using the confidence-and-credibility output scheme. Parameters ---------- x : numpy array of shape [n_samples, n_features] Inputs of patters for which to predict output values. Returns ------- p : numpy array of shape [n_samples, 3] p contains three columns: the first column contains the most likely class for each test pattern; the second column contains the confidence in the predicted class label, and the third column contains the credibility of the prediction. """ p = self.predict(x, significance=None) label = p.argmax(axis=1) credibility = p.max(axis=1) for i, idx in enumerate(label): p[i, idx] = -np.inf confidence = 1 - p.max(axis=1) return np.array([label, confidence, credibility]).T