Data Preprocessors

relax.data_utils.preprocessing.DataPreprocessor

[source]

class relax.data_utils.preprocessing.DataPreprocessor (name=None)

Base class for data preprocessors.

Parameters:

  • name (<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.

Methods

[source]

fit (xs, y=None)

Fit the preprocessor with xs and y.

[source]

transform (xs)

Transform xs.

[source]

fit_transform (xs, y=None)

Fit the preprocessor with xs and y, then transform xs.

[source]

inverse_transform (xs)

Inverse transform xs.

[source]

to_dict ()

Convert the preprocessor to a dictionary.

[source]

from_dict (params)

Load the attributes of the preprocessor from a dictionary.

relax.data_utils.preprocessing.MinMaxScaler

[source]

class relax.data_utils.preprocessing.MinMaxScaler ()

Base class for data preprocessors.

Methods

[source]

fit (xs, y=None)

Fit the preprocessor with xs and y.

[source]

transform (xs)

Transform xs.

[source]

fit_transform (xs, y=None)

Fit the preprocessor with xs and y, then transform xs.

[source]

inverse_transform (xs)

Inverse transform xs.

[source]

to_dict ()

Convert the preprocessor to a dictionary.

[source]

from_dict (params)

Load the attributes of the preprocessor from a dictionary.

xs = np.random.randn(100, )
scaler = MinMaxScaler()
transformed_xs = scaler.fit_transform(xs)
assert transformed_xs.shape == (100, )
assert np.allclose(xs, scaler.inverse_transform(transformed_xs))
# Test correctness 
assert np.allclose(
    transformed_xs, 
    skp.MinMaxScaler().fit_transform(xs.reshape(100, 1)).reshape(100,)
)
# Also work with 2D array
xs = xs.reshape(100, 1)
scaler = MinMaxScaler()
transformed_xs = scaler.fit_transform(xs)
assert np.allclose(xs, scaler.inverse_transform(transformed_xs))
assert np.allclose(
    transformed_xs, 
    skp.MinMaxScaler().fit_transform(xs.reshape(100, 1))
)

MinMaxScaler only supports scaling a single feature.

xs = xs.reshape(50, 2)
scaler = MinMaxScaler()
test_fail(lambda: scaler.fit_transform(xs), 
          contains="`MinMaxScaler` only supports array with a single feature")

Convert to a dictionary (or the pytree representations).

xs = xs.reshape(-1, 1)
scaler = MinMaxScaler().fit(xs)
scaler_1 = MinMaxScaler().from_dict(scaler.to_dict())
assert np.allclose(scaler.transform(xs), scaler_1.transform(xs))

relax.data_utils.preprocessing.EncoderPreprocessor

[source]

class relax.data_utils.preprocessing.EncoderPreprocessor (name=None)

Encode categorical features as an integer array.

Parameters:

  • name (<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.

Methods

[source]

fit (xs, y=None)

Fit the preprocessor with xs and y.

[source]

transform (xs)

Transform xs.

[source]

fit_transform (xs, y=None)

Fit the preprocessor with xs and y, then transform xs.

[source]

inverse_transform (xs)

Inverse transform xs.

[source]

to_dict ()

Convert the preprocessor to a dictionary.

[source]

from_dict (params)

Load the attributes of the preprocessor from a dictionary.

relax.data_utils.preprocessing.OrdinalPreprocessor

[source]

class relax.data_utils.preprocessing.OrdinalPreprocessor (name=None)

Ordinal encoder for a single feature.

Parameters:

  • name (<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.

Methods

[source]

fit (xs, y=None)

Fit the preprocessor with xs and y.

[source]

transform (xs)

Transform xs.

[source]

fit_transform (xs, y=None)

Fit the preprocessor with xs and y, then transform xs.

[source]

inverse_transform (xs)

Inverse transform xs.

[source]

to_dict ()

Convert the preprocessor to a dictionary.

[source]

from_dict (params)

Load the attributes of the preprocessor from a dictionary.

xs = np.random.choice(['a', 'b', 'c'], size=(100, 1))
enc = OrdinalPreprocessor().fit(xs)
transformed_xs = enc.transform(xs)
assert np.all(enc.inverse_transform(transformed_xs) == xs)
# Test from_dict and to_dict
enc_1 = OrdinalPreprocessor().from_dict(enc.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))

xs = np.array(['a', 'b', 'c', np.nan, 'a', 'b', 'c', np.nan], dtype=object).reshape(-1, 1)
enc = OrdinalPreprocessor().fit(xs)
# Check categories_
assert np.array_equiv(enc.categories_, np.array(['a', 'b', 'c', np.nan], dtype=str)) 
transformed_xs = enc.transform(xs)
assert transformed_xs.shape == (8, 1)
inverse_transformed_xs = enc.inverse_transform(transformed_xs)
assert np.all(inverse_transformed_xs == xs.astype(str))
# Test from_dict and to_dict
enc_1 = OrdinalPreprocessor().from_dict(enc.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))
assert np.array_equal(enc.categories_, enc_1.categories_)

xs = np.random.choice(['a', 'b', 'c'], size=(100, ))
test_fail(lambda: OrdinalPreprocessor().fit_transform(xs), 
    contains="OrdinalPreprocessor only supports 2D array with a single feature")

relax.data_utils.preprocessing.OneHotEncoder

[source]

class relax.data_utils.preprocessing.OneHotEncoder (name=None)

One-hot encoder for a single categorical feature.

Parameters:

  • name (<class 'str'>, default=None) – The name of the preprocessor. If None, the class name will be used.

Methods

[source]

fit (xs, y=None)

Fit the preprocessor with xs and y.

[source]

transform (xs)

Transform xs.

[source]

fit_transform (xs, y=None)

Fit the preprocessor with xs and y, then transform xs.

[source]

inverse_transform (xs)

Inverse transform xs.

[source]

to_dict ()

Convert the preprocessor to a dictionary.

[source]

from_dict (params)

Load the attributes of the preprocessor from a dictionary.

xs = np.random.choice(['a', 'b', 'c'], size=(100, 1))
enc = OneHotEncoder().fit(xs)
transformed_xs = enc.transform(xs)
assert np.all(enc.inverse_transform(transformed_xs) == xs)
# Test from_dict and to_dict
enc_1 = OneHotEncoder().from_dict(enc.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))

xs = np.array(['a', 'b', 'c', np.nan, 'a', 'b', 'c', np.nan], dtype=object).reshape(-1, 1)
enc = OneHotEncoder().fit(xs)
# Check categories_
assert np.array_equiv(enc.categories_, np.array(['a', 'b', 'c', np.nan], dtype=str)) 
transformed_xs = enc.transform(xs)
assert np.all(enc.inverse_transform(transformed_xs) == xs.astype(str))
assert np.array_equal(
    transformed_xs, skp.OneHotEncoder(sparse_output=False).fit_transform(xs)
) 
# Test from_dict and to_dict
enc_1 = OneHotEncoder().from_dict(enc.to_dict())
enc_2 = OneHotEncoder()
enc_2.from_dict(enc_1.to_dict())
assert np.all(enc.transform(xs) == enc_1.transform(xs))
assert np.all(enc.transform(xs) == enc_2.transform(xs))

xs = np.random.choice(['a', 'b', 'c'], size=(100, ))
test_fail(lambda: OneHotEncoder().fit_transform(xs), 
    contains="OneHotEncoder only supports 2D array with a single feature")