Multi-class, Multi-label, Ordinal Classification With Sklearn
Solution 1:
This may not be the precise answer you're looking for, this article outlines a technique as follows:
We can take advantage of the ordered class value by transforming a k-class ordinal regression problem to a k-1 binary classification problem, we convert an ordinal attribute A* with ordinal value V1, V2, V3, … Vk into k-1 binary attributes, one for each of the original attribute’s first k − 1 values. The ith binary attribute represents the test A* > Vi
Essentially, aggregate multiple binary classifiers (predict target > 1, target > 2, target > 3, target > 4) to be able to predict whether a target is 1, 2, 3, 4 or 5. The author creates an OrdinalClassifier class that stores multiple binary classifiers in a Python dictionary.
classOrdinalClassifier():
def__init__(self, clf):
self.clf = clf
self.clfs = {}
deffit(self, X, y):
self.unique_class = np.sort(np.unique(y))
if self.unique_class.shape[0] > 2:
for i inrange(self.unique_class.shape[0]-1):
# for each k - 1 ordinal value we fit a binary classification problem
binary_y = (y > self.unique_class[i]).astype(np.uint8)
clf = clone(self.clf)
clf.fit(X, binary_y)
self.clfs[i] = clf
defpredict_proba(self, X):
clfs_predict = {k:self.clfs[k].predict_proba(X) for k in self.clfs}
predicted = []
for i,y inenumerate(self.unique_class):
if i == 0:
# V1 = 1 - Pr(y > V1)
predicted.append(1 - clfs_predict[y][:,1])
elif y in clfs_predict:
# Vi = Pr(y > Vi-1) - Pr(y > Vi)
predicted.append(clfs_predict[y-1][:,1] - clfs_predict[y][:,1])
else:
# Vk = Pr(y > Vk-1)
predicted.append(clfs_predict[y-1][:,1])
return np.vstack(predicted).T
defpredict(self, X):
return np.argmax(self.predict_proba(X), axis=1)
The technique originates in A Simple Approach to Ordinal Classification
Solution 2:
Here is an example using KNN that should be tuneable in an sklearn pipeline or grid search.
from sklearn.neighbors import KNeighborsClassifier
from sklearn.base import clone, BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils.multiclassimport check_classification_targets
classKNeighborsOrdinalClassifier(BaseEstimator, ClassifierMixin):
def__init__(self, n_neighbors=5, *, weights='uniform',
algorithm='auto', leaf_size=30, p=2,
metric='minkowski', metric_params=None, n_jobs=None):
self.n_neighbors = n_neighbors
self.weights = weights
self.algorithm = algorithm
self.leaf_size = leaf_size
self.p = p
self.metric = metric
self.metric_params = metric_params
self.n_jobs = n_jobs
deffit(self, X, y):
X, y = check_X_y(X, y)
check_classification_targets(y)
self.clf_ = KNeighborsClassifier(**self.get_params())
self.clfs_ = {}
self.classes_ = np.sort(np.unique(y))
if self.classes_.shape[0] > 2:
for i inrange(self.classes_.shape[0]-1):
# for each k - 1 ordinal value we fit a binary classification problem
binary_y = (y > self.classes_[i]).astype(np.uint8)
clf = clone(self.clf_)
clf.fit(X, binary_y)
self.clfs_[i] = clf
return self
defpredict_proba(self, X):
X = check_array(X)
check_is_fitted(self, ['classes_', 'clf_', 'clfs_'])
clfs_predict = {k:self.clfs_[k].predict_proba(X) for k in self.clfs_}
predicted = []
for i,y inenumerate(self.classes_):
if i == 0:
# V1 = 1 - Pr(y > V1)
predicted.append(1 - clfs_predict[y][:,1])
elif y in clfs_predict:
# Vi = Pr(y > Vi-1) - Pr(y > Vi)
predicted.append(clfs_predict[y-1][:,1] - clfs_predict[y][:,1])
else:
# Vk = Pr(y > Vk-1)
predicted.append(clfs_predict[y-1][:,1])
return np.vstack(predicted).T
defpredict(self, X):
X = check_array(X)
check_is_fitted(self, ['classes_', 'clf_', 'clfs_'])
return np.argmax(self.predict_proba(X), axis=1)
Post a Comment for "Multi-class, Multi-label, Ordinal Classification With Sklearn"