Source code for chemicalchecker.tool.targetmate.nonconformist.cp

from .icp import *

# TODO: move contents from nonconformist.icp here

# -----------------------------------------------------------------------------
# TcpClassifier
# -----------------------------------------------------------------------------
[docs]class TcpClassifier(BaseEstimator, ClassifierMixin):
	"""Transductive conformal classifier.

	Parameters
	----------
	nc_function : BaseScorer
		Nonconformity scorer object used to calculate nonconformity of
		calibration examples and test patterns. Should implement ``fit(x, y)``
		and ``calc_nc(x, y)``.

	smoothing : boolean
		Decides whether to use stochastic smoothing of p-values.

	Attributes
	----------
	train_x : numpy array of shape [n_cal_examples, n_features]
		Inputs of training set.

	train_y : numpy array of shape [n_cal_examples]
		Outputs of calibration set.

	nc_function : BaseScorer
		Nonconformity scorer object used to calculate nonconformity scores.

	classes : numpy array of shape [n_classes]
		List of class labels, with indices corresponding to output columns
		 of TcpClassifier.predict()

	See also
	--------
	IcpClassifier

	References
	----------
	.. [1] Vovk, V., Gammerman, A., & Shafer, G. (2005). Algorithmic learning
	in a random world. Springer Science & Business Media.

	Examples
	--------
	>>> import numpy as np
	>>> from sklearn.datasets import load_iris
	>>> from sklearn.svm import SVC
	>>> from nonconformist.base import ClassifierAdapter
	>>> from nonconformist.cp import TcpClassifier
	>>> from nonconformist.nc import ClassifierNc, MarginErrFunc
	>>> iris = load_iris()
	>>> idx = np.random.permutation(iris.target.size)
	>>> train = idx[:int(idx.size / 2)]
	>>> test = idx[int(idx.size / 2):]
	>>> model = ClassifierAdapter(SVC(probability=True))
	>>> nc = ClassifierNc(model, MarginErrFunc())
	>>> tcp = TcpClassifier(nc)
	>>> tcp.fit(iris.data[train, :], iris.target[train])
	>>> tcp.predict(iris.data[test, :], significance=0.10)
	...             # doctest: +SKIP
	array([[ True, False, False],
		[False,  True, False],
		...,
		[False,  True, False],
		[False,  True, False]], dtype=bool)
	"""

	def __init__(self, nc_function, condition=None, smoothing=True):
		self.train_x, self.train_y = None, None
		self.nc_function = nc_function
		super(TcpClassifier, self).__init__()

		# Check if condition-parameter is the default function (i.e.,
		# lambda x: 0). This is so we can safely clone the object without
		# the clone accidentally having self.conditional = True.
		default_condition = lambda x: 0
		is_default = (callable(condition) and
		              (condition.__code__.co_code ==
		               default_condition.__code__.co_code))

		if is_default:
			self.condition = condition
			self.conditional = False
		elif callable(condition):
			self.condition = condition
			self.conditional = True
		else:
			self.condition = lambda x: 0
			self.conditional = False

		self.smoothing = smoothing

		self.base_icp = IcpClassifier(
			self.nc_function,
			self.condition,
			self.smoothing
		)

		self.classes = None

	def fit(self, x, y):
		self.train_x, self.train_y = x, y
		self.classes = np.unique(y)

[docs]	def predict(self, x, significance=None):
		"""Predict the output values for a set of input patterns.

		Parameters
		----------
		x : numpy array of shape [n_samples, n_features]
			Inputs of patters for which to predict output values.

		significance : float or None
			Significance level (maximum allowed error rate) of predictions.
			Should be a float between 0 and 1. If ``None``, then the p-values
			are output rather than the predictions.

		Returns
		-------
		p : numpy array of shape [n_samples, n_classes]
			If significance is ``None``, then p contains the p-values for each
			sample-class pair; if significance is a float between 0 and 1, then
			p is a boolean array denoting which labels are included in the
			prediction sets.
		"""
		n_test = x.shape[0]
		n_train = self.train_x.shape[0]
		p = np.zeros((n_test, self.classes.size))
		for i in range(n_test):
			for j, y in enumerate(self.classes):
				train_x = np.vstack([self.train_x, x[i, :]])
				train_y = np.hstack([self.train_y, y])
				self.base_icp.fit(train_x, train_y)
				scores = self.base_icp.nc_function.score(train_x, train_y)
				ngt = (scores[:-1] > scores[-1]).sum()
				neq = (scores[:-1] == scores[-1]).sum()

				p[i, j] = calc_p(n_train, ngt, neq, self.smoothing)

		if significance is not None:
			return p > significance
		else:
			return p

[docs]	def predict_conf(self, x):
		"""Predict the output values for a set of input patterns, using
		the confidence-and-credibility output scheme.

		Parameters
		----------
		x : numpy array of shape [n_samples, n_features]
			Inputs of patters for which to predict output values.

		Returns
		-------
		p : numpy array of shape [n_samples, 3]
			p contains three columns: the first column contains the most
			likely class for each test pattern; the second column contains
			the confidence in the predicted class label, and the third column
			contains the credibility of the prediction.
		"""
		p = self.predict(x, significance=None)
		label = p.argmax(axis=1)
		credibility = p.max(axis=1)
		for i, idx in enumerate(label):
			p[i, idx] = -np.inf
		confidence = 1 - p.max(axis=1)

		return np.array([label, confidence, credibility]).T