Source code for skmine.itemsets.slim_classifier

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted

from ..itemsets.slim import SLIM


[docs]class SlimClassifier(BaseEstimator, ClassifierMixin):
    """
    Classifier using the SLIM compression algorithm. Works for binary and multi-class problems.

    This classifier uses one SLIM instance per class in the database, resulting in a code table per class.
    To classify a transaction, we simply assign the class belonging to the code table that provides the minimal encoded
    length for the transaction.

    Parameters
    ----------
    items: set, default=None
        The list of items in the complete dataset not only the training set. This improves the accuracy of the model.
        Without this set of items, the classifier works but is less good in particular with small datasets.

    pruning: bool, default=False
        Indicates whether each SLIM classifier enables pruning

    Attributes
    ----------
    classes_ : array-like
        All the unique classes

    models_ : list
        A list of SLIM instances corresponding to *classes_*

    classes_X_ : list
        A list where each element is a subset of X and each element contains the transactions of X associated
        with the class from *classes_*  of the same index
    """

    def __init__(self, items=None, pruning=False):
        self.items = items
        self.pruning = pruning

    def _more_tags(self):
        return {"non_deterministic": True,
                "no_validation": True,
                "preserves_dtype": []}

[docs]    def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X: iterable, {array_like}
            containing n_transactions containing themselves n_items

        y: array-like of shape (n_samples,)
            Target vector relative to X.

        Returns
        -------
        self : object
            An instance of the estimator
        """
        self._validate_data(X, y, reset=True, validate_separately=False, force_all_finite=False,
                            accept_sparse=False, ensure_2d=False, ensure_min_samples=0, dtype=list)
        self.classes_ = np.unique(y)
        self.classes_X_ = []
        self.models_ = []

        for c in self.classes_:
            transactions_classes = [transaction for transaction, target in zip(X, y) if target == c]
            self.classes_X_.append(transactions_classes)
            self.models_.append(SLIM(items=self.items))

        for model, data in zip(self.models_, self.classes_X_):
            model.fit(data)

        return self

[docs]    def predict(self, X):
        """Perform classification on samples in X

        Parameters
        ----------
        X : iterable containing n_transactions containing themselves n_items

        Returns
        -------
        y_pred : np.array of shape (n_samples,)
            Class labels for samples in X
        """
        check_is_fitted(self, "classes_")
        self.models_scores = np.vstack([model.decision_function(X).values for model in self.models_]).T

        return self.classes_[self.models_scores.argmax(axis=1)]

    def __copy__(self):
        return SlimClassifier(items=self.items, pruning=self.pruning)