Source code for skmine.itemsets.slim_classifier

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted

from ..itemsets.slim import SLIM


[docs]class SlimClassifier(BaseEstimator, ClassifierMixin): """ Classifier using the SLIM compression algorithm. Works for binary and multi-class problems. This classifier uses one SLIM instance per class in the database, resulting in a code table per class. To classify a transaction, we simply assign the class belonging to the code table that provides the minimal encoded length for the transaction. Parameters ---------- items: set, default=None The list of items in the complete dataset not only the training set. This improves the accuracy of the model. Without this set of items, the classifier works but is less good in particular with small datasets. pruning: bool, default=False Indicates whether each SLIM classifier enables pruning Attributes ---------- classes_ : array-like All the unique classes models_ : list A list of SLIM instances corresponding to *classes_* classes_X_ : list A list where each element is a subset of X and each element contains the transactions of X associated with the class from *classes_* of the same index """ def __init__(self, items=None, pruning=False): self.items = items self.pruning = pruning def _more_tags(self): return {"non_deterministic": True, "no_validation": True, "preserves_dtype": []}
[docs] def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X: iterable, {array_like} containing n_transactions containing themselves n_items y: array-like of shape (n_samples,) Target vector relative to X. Returns ------- self : object An instance of the estimator """ self._validate_data(X, y, reset=True, validate_separately=False, force_all_finite=False, accept_sparse=False, ensure_2d=False, ensure_min_samples=0, dtype=list) self.classes_ = np.unique(y) self.classes_X_ = [] self.models_ = [] for c in self.classes_: transactions_classes = [transaction for transaction, target in zip(X, y) if target == c] self.classes_X_.append(transactions_classes) self.models_.append(SLIM(items=self.items)) for model, data in zip(self.models_, self.classes_X_): model.fit(data) return self
[docs] def predict(self, X): """Perform classification on samples in X Parameters ---------- X : iterable containing n_transactions containing themselves n_items Returns ------- y_pred : np.array of shape (n_samples,) Class labels for samples in X """ check_is_fitted(self, "classes_") self.models_scores = np.vstack([model.decision_function(X).values for model in self.models_]).T return self.classes_[self.models_scores.argmax(axis=1)]
def __copy__(self): return SlimClassifier(items=self.items, pruning=self.pruning)