Source code for pimkl.utils.preprocessing.standardizer

"""Data standardization utilities."""
import pandas as pd
import numpy as np
from .core import enforce_pandas_dataframe_on_second_argument


[docs]class Standardizer(object): """Object for data standardization.""" parameters = None @enforce_pandas_dataframe_on_second_argument def _apply(self, data): return (data - self.parameters['mean']) / self.parameters['std'] @enforce_pandas_dataframe_on_second_argument def _apply_and_fillna(self, data): to_be_filled = self.parameters['std'] == 0.0 transformed = self._apply(data) to_be_filled_slice = to_be_filled.index[to_be_filled.values] transformed[to_be_filled_slice] = ( transformed[to_be_filled_slice].replace([np.inf, -np.inf], np.nan).fillna(0.0) ) return transformed @enforce_pandas_dataframe_on_second_argument def apply(self, data): """Learn and apply the standardization.""" stds = data.std() means = data.mean() self.parameters = pd.DataFrame({'mean': means, 'std': stds}) return self._apply_and_fillna(data) @enforce_pandas_dataframe_on_second_argument def reapply(self, data): """Re-apply the standardization.""" return self._apply_and_fillna(data) @enforce_pandas_dataframe_on_second_argument def unapply(self, data): """Unapply the standardization.""" if self.parameters is not None: return (data * self.parameters['std'] + self.parameters['mean']) else: return data