Source code for direpack.preprocessing.gsspp

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Created on Wed Mar 25 09:01:53 2020

# @author: Sven Serneels, Ponalytics, Mar 2020. 


__all__ = ['GenSpatialSignPrePprocessor','gen_ss_pp','gen_ss_covmat']

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from .robcent import VersatileScaler, versatile_scale
from ._preproc_utilities import *
from ..utils.utils import _check_input
from ._gsspp_utils import *
from ._gsspp_utils import _norms, _gsspp

__all__ = ['GenSpatialSignPreProcessor', 'gen_ss_covmat', 'gen_ss_pp']
    
[docs]class GenSpatialSignPreProcessor(TransformerMixin,BaseEstimator): """ GenSpatialSignPreProcessor Generalized Spatial Sign Pre-Processing as a scikit-learn compatible object that can be used in ML pipelines. Parameters ---------- center: str or function, location estimator for centring.str options: 'mean', 'median', 'l1median', 'kstepLTS', 'None' fun: str or function, radial transformation function, str options: 'ss' (the non-generalized spatial sign, equivalent to sklearn's Normalizer), 'ball', 'shell', 'quad' (quadratic), 'winsor', or 'linear_redescending' Methods: sklearn API: `fit(X)`, `transform(X)` and `fit_transform(X)` with Attributes ---------- Attributes always provided : - `gss_` : the generalized spatial signs - `Xm_` : the centred data - `centring_` : VersatileScaler centring object - `X_gss_pp_` : Data preprocessed by Generalized Spatial Sign """
[docs] def __init__(self,center='l1median',fun='linear_redescending'): self.center = center self.fun = fun
def fit(self,X): """ Calculate and store generalized spatial signs """ X = _check_input(X) n,p = X.shape if type(self.fun) is str: fun = eval(self.fun) else: fun = self.fun vs = VersatileScaler(center=self.center,scale='None') Xm = vs.fit_transform(X) gss_ = fun(_norms(Xm),p,n) setattr(self,'gss_',gss_) setattr(self,'Xm_',Xm) setattr(self,'centring_',vs) def transform(self,X): """ Calculate Generalized Spatial Sign Pre-Pprocessed Data """ check_is_fitted(self,('gss_','Xm_')) Xgss = np.multiply(self.Xm_,self.gss_) setattr(self,'X_gsspp_',Xgss) return(Xgss) def fit_transform(self,X): self.fit(X) self.transform(X) return(self.X_gsspp_)
def gen_ss_pp(X,center='l1median',fun='linear_redescending'): """ Generalized Spatial Sign Pre-Processing as a one pass function Inputs: X: Data matrix center: str or function, location estimator for centring. str options: 'mean', 'median', 'l1median', 'kstepLTS', 'None' fun: str or function, radial transformation function, str options: 'ss' (the non-generalized spatial sign, equivalent to sklearn's Normalizer), 'ball', 'shell', 'quad' (quadratic), 'winsor', or 'linear_redescending' Outputs: the pre-processed data """ if type(center) is str: center = eval(center) if type(fun) is str: fun = eval(fun) X = _check_input(X) n = X.shape if len(n) > 1: p = n[1] else: p = 1 n = n[0] if center != 'None': X = versatile_scale(X,center=center,scale='None') return(_gsspp(X,p,n,fun=fun)) def gen_ss_covmat(X,center='kstepLTS',fun=linear_redescending): """ Generalized Spatial Sign Covariance Matrix Is equivalent to the covariance matrix of generalized spatial sign pre-processed data. First published in: A generalized spatial sign covariance matrix, Jakob Raymaekers, Peter Rousseeuw, Journal of Multivariate Analysis, 171 (2019), 94–111. Inputs: X: Data matrix center: str or function, location estimator for centring. str options: 'mean', 'median', 'l1median', 'kstepLTS', 'None' fun: str or function, radial transformation function, str options: 'ss' (the non-generalized spatial sign, equivalent to sklearn's Normalizer), 'ball', 'shell', 'quad' (quadratic), 'winsor', or 'linear_redescending' Outputs: the generalized spatial sign covariance matrix """ X = _check_input(X) rc = VersatileScaler(center=center, scale='None') n,p = X.shape Xm = rc.fit_transform(X) Xgss = _gsspp(Xm,p,n,fun=fun) return(Xgss.T*Xgss/n)