handle na python code example
Example 1: replace missing values, encoded as np.nan, using the mean value of the columns
import numpy as np
from sklearn.impute import SimpleImputer
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit([[1, 2], [np.nan, 3], [7, 6]])
X = [[np.nan, 2], [6, np.nan], [7, 6]]
print(imp.transform(X))
import pandas as pd
df = pd.DataFrame([["a", "x"],
[np.nan, "y"],
["a", np.nan],
["b", "y"]], dtype="category")
imp = SimpleImputer(strategy="most_frequent")
print(imp.fit_transform(df))
Example 2: whow i fill the data if most values are nan in jupyter notebook
import pandas as pd
list_of_rows = [
{'start_station': 1, 'end_station': 1},
{'start_station': None, 'end_station': 1},
{'start_station': 1, 'end_station': 2},
{'start_station': 1, 'end_station': 3},
{'start_station': 2, 'end_station': None},
{'start_station': 2, 'end_station': 3},
{'start_station': 2, 'end_station': 3},
]
df = pd.DataFrame(list_of_rows)
def fill_NaNs_in_end_station(row):
if pd.isnull(row['end_station']):
start_station = row['start_station']
return df[df['start_station']==start_station].end_station.value_counts().first_valid_index()
return row['end_station']
df['end_station'] = df.apply(lambda row: fill_NaNs_in_end_station(row), axis=1)