preprocessing data in python code example

Example 1: feature scaling in python

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
from sklearn.linear_model import Ridge
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data,
                                                   random_state = 0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Example 2: The function scale provides a quick and easy way to perform

# Standardization

from sklearn import preprocessing
import numpy as np
X_train = np.array([[1., -1., 2.],
                    [2., 0., 0.],
                    [0., 1., -1.]])
X_scaled = preprocessing.scale(X_train)

X_scaled
# array([[ 0.  ..., -1.22...,  1.33...],
#        [ 1.22...,  0.  ..., -0.26...],
#        [-1.22...,  1.22..., -1.06...]])

# Scaled data has zero mean and unit variance:

X_scaled.mea(axis=0)
# array([0., 0., 0.])
X_scaled.std(axis=0)
# array([1., 1., 1.])

scaler = preprocessing.StandardScaler().fit(X_train)
scaler
# StandardScaler()

scaler.mean_
# array([1. ..., 0. ..., 0.33...])

scaler.scale_
# array([0.81..., 0.81..., 1.24...])

scaler.transform(X_train)
array([[ 0.  ..., -1.22...,  1.33...],
       [ 1.22...,  0.  ..., -0.26...],
       [-1.22...,  1.22..., -1.06...]])

X_test = [[-1., 1., 0.]]
scaler.transform(X_test)
# array([[-2.44...,  1.22..., -0.26...]])