xs = np.random.randn(100, )
scaler = MinMaxScaler()
transformed_xs = scaler.fit_transform(xs)
assert transformed_xs.shape == (100, )
assert np.allclose(xs, scaler.inverse_transform(transformed_xs))
# Test correctness
assert np.allclose(
transformed_xs,
skp.MinMaxScaler().fit_transform(xs.reshape(100, 1)).reshape(100,)
)
# Also work with 2D array
xs = xs.reshape(100, 1)
scaler = MinMaxScaler()
transformed_xs = scaler.fit_transform(xs)
assert np.allclose(xs, scaler.inverse_transform(transformed_xs))
assert np.allclose(
transformed_xs,
skp.MinMaxScaler().fit_transform(xs.reshape(100, 1))
)Data Preprocessors
relax.data_utils.preprocessing.DataPreprocessor
class relax.data_utils.preprocessing.DataPreprocessor (name=None)
Base class for data preprocessors.
Parameters:
- name (
<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.
Methods
fit (xs, y=None)
Fit the preprocessor with xs and y.
transform (xs)
Transform xs.
fit_transform (xs, y=None)
Fit the preprocessor with xs and y, then transform xs.
inverse_transform (xs)
Inverse transform xs.
to_dict ()
Convert the preprocessor to a dictionary.
from_dict (params)
Load the attributes of the preprocessor from a dictionary.
relax.data_utils.preprocessing.MinMaxScaler
class relax.data_utils.preprocessing.MinMaxScaler ()
Base class for data preprocessors.
Methods
fit (xs, y=None)
Fit the preprocessor with xs and y.
transform (xs)
Transform xs.
fit_transform (xs, y=None)
Fit the preprocessor with xs and y, then transform xs.
inverse_transform (xs)
Inverse transform xs.
to_dict ()
Convert the preprocessor to a dictionary.
from_dict (params)
Load the attributes of the preprocessor from a dictionary.
MinMaxScaler only supports scaling a single feature.
xs = xs.reshape(50, 2)
scaler = MinMaxScaler()
test_fail(lambda: scaler.fit_transform(xs),
contains="`MinMaxScaler` only supports array with a single feature")Convert to a dictionary (or the pytree representations).
xs = xs.reshape(-1, 1)
scaler = MinMaxScaler().fit(xs)
scaler_1 = MinMaxScaler().from_dict(scaler.to_dict())
assert np.allclose(scaler.transform(xs), scaler_1.transform(xs))relax.data_utils.preprocessing.EncoderPreprocessor
class relax.data_utils.preprocessing.EncoderPreprocessor (name=None)
Encode categorical features as an integer array.
Parameters:
- name (
<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.
Methods
fit (xs, y=None)
Fit the preprocessor with xs and y.
transform (xs)
Transform xs.
fit_transform (xs, y=None)
Fit the preprocessor with xs and y, then transform xs.
inverse_transform (xs)
Inverse transform xs.
to_dict ()
Convert the preprocessor to a dictionary.
from_dict (params)
Load the attributes of the preprocessor from a dictionary.
relax.data_utils.preprocessing.OrdinalPreprocessor
class relax.data_utils.preprocessing.OrdinalPreprocessor (name=None)
Ordinal encoder for a single feature.
Parameters:
- name (
<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.
Methods
fit (xs, y=None)
Fit the preprocessor with xs and y.
transform (xs)
Transform xs.
fit_transform (xs, y=None)
Fit the preprocessor with xs and y, then transform xs.
inverse_transform (xs)
Inverse transform xs.
to_dict ()
Convert the preprocessor to a dictionary.
from_dict (params)
Load the attributes of the preprocessor from a dictionary.
xs = np.random.choice(['a', 'b', 'c'], size=(100, 1))
enc = OrdinalPreprocessor().fit(xs)
transformed_xs = enc.transform(xs)
assert np.all(enc.inverse_transform(transformed_xs) == xs)
# Test from_dict and to_dict
enc_1 = OrdinalPreprocessor().from_dict(enc.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))
xs = np.array(['a', 'b', 'c', np.nan, 'a', 'b', 'c', np.nan], dtype=object).reshape(-1, 1)
enc = OrdinalPreprocessor().fit(xs)
# Check categories_
assert np.array_equiv(enc.categories_, np.array(['a', 'b', 'c', np.nan], dtype=str))
transformed_xs = enc.transform(xs)
assert transformed_xs.shape == (8, 1)
inverse_transformed_xs = enc.inverse_transform(transformed_xs)
assert np.all(inverse_transformed_xs == xs.astype(str))
# Test from_dict and to_dict
enc_1 = OrdinalPreprocessor().from_dict(enc.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))
assert np.array_equal(enc.categories_, enc_1.categories_)
xs = np.random.choice(['a', 'b', 'c'], size=(100, ))
test_fail(lambda: OrdinalPreprocessor().fit_transform(xs),
contains="OrdinalPreprocessor only supports 2D array with a single feature")relax.data_utils.preprocessing.OneHotEncoder
class relax.data_utils.preprocessing.OneHotEncoder (name=None)
One-hot encoder for a single categorical feature.
Parameters:
- name (
<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.
Methods
fit (xs, y=None)
Fit the preprocessor with xs and y.
transform (xs)
Transform xs.
fit_transform (xs, y=None)
Fit the preprocessor with xs and y, then transform xs.
inverse_transform (xs)
Inverse transform xs.
to_dict ()
Convert the preprocessor to a dictionary.
from_dict (params)
Load the attributes of the preprocessor from a dictionary.
xs = np.random.choice(['a', 'b', 'c'], size=(100, 1))
enc = OneHotEncoder().fit(xs)
transformed_xs = enc.transform(xs)
assert np.all(enc.inverse_transform(transformed_xs) == xs)
# Test from_dict and to_dict
enc_1 = OneHotEncoder().from_dict(enc.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))
xs = np.array(['a', 'b', 'c', np.nan, 'a', 'b', 'c', np.nan], dtype=object).reshape(-1, 1)
enc = OneHotEncoder().fit(xs)
# Check categories_
assert np.array_equiv(enc.categories_, np.array(['a', 'b', 'c', np.nan], dtype=str))
transformed_xs = enc.transform(xs)
assert np.all(enc.inverse_transform(transformed_xs) == xs.astype(str))
assert np.array_equal(
transformed_xs, skp.OneHotEncoder(sparse_output=False).fit_transform(xs)
)
# Test from_dict and to_dict
enc_1 = OneHotEncoder().from_dict(enc.to_dict())
enc_2 = OneHotEncoder()
enc_2.from_dict(enc_1.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))
assert np.all(enc.transform(xs) == enc_2.transform(xs))
xs = np.random.choice(['a', 'b', 'c'], size=(100, ))
test_fail(lambda: OneHotEncoder().fit_transform(xs),
contains="OneHotEncoder only supports 2D array with a single feature")