Source code for chemicalchecker.tool.targetmate.models.vanillaconfigs

from sklearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold

SEED = 42

[docs]class VanillaClassifierConfigs: def __init__(self, base_mod, n_jobs, **kwargs): if base_mod == "logistic_regression": from sklearn.linear_model import LogisticRegressionCV self.base_mod = LogisticRegressionCV( cv=3, class_weight="balanced", max_iter=1000, n_jobs=n_jobs) if base_mod == "random_forest": from sklearn.ensemble import RandomForestClassifier self.base_mod = RandomForestClassifier( n_estimators=500, class_weight="balanced", max_features="sqrt", random_state=SEED, n_jobs=n_jobs) if base_mod == "naive_bayes": from sklearn.naive_bayes import GaussianNB self.base_mod = GaussianNB() if base_mod == "balanced_random_forest": from imblearn.ensemble import BalancedRandomForestClassifier self.base_mod = BalancedRandomForestClassifier( n_estimators=500, class_weight="balanced", n_jobs=n_jobs) if base_mod == "xgboost": from xgboost.sklearn import XGBClassifier self.base_mod = XGBClassifier(n_estimators=500, n_jobs=n_jobs) if base_mod == "SVM": from sklearn.svm import SVC self.base_mod = SVC(probability=True, class_weight = 'balanced') if base_mod == "LinearSVM": from sklearn.svm import SVC self.base_mod = SVC(kernel = 'linear', probability=True, class_weight = 'balanced') if base_mod == "UnbalancedSVM": from sklearn.svm import SVC self.base_mod = SVC(probability=True) if base_mod == "UnbalancedLinearSVM": from sklearn.svm import SVC self.base_mod = SVC(kernel='linear', probability=True) def as_pipeline(self, X=None, y=None, **kwargs): return self.base_mod
#return Pipeline([('variance_threshold_0', VarianceThreshold()), # ('classify', self.base_mod)])