python missing code example

Example 1: filling the missing data in pandas

note:to fill a specific value

varable = 1
def fill_mod_acc(most_related_coloum_name,missing_data_coloum):
    if np.isnan(missing_data_coloum):
        return varable[most_related_coloum_name]
    else:
        return missing_data_coloum

df['missing_data_coloum'] = df.apply(lambda x:fill_mod_acc(x['most_related_coloum_name'],x['missing_data_coloum']),axis=1)


Note:to fill mean from existing closley related coloum

varable = df.groupby('most_related_coloum_name').mean()['missing_data_coloum']

def fill_mod_acc(most_related_coloum_name,missing_data_coloum):
    if np.isnan(missing_data_coloum):
        return varable[most_related_coloum_name]
    else:
        return missing_data_coloum

df['missing_data_coloum'] = df.apply(lambda x:fill_mod_acc(x['most_related_coloum_name'],x['missing_data_coloum']),axis=1)

Example 2: whow i fill the data if most values are nan in jupyter notebook

# import pandas
import pandas as pd

# make a sample data
list_of_rows = [
  {'start_station': 1, 'end_station': 1},
  {'start_station': None, 'end_station': 1},
  {'start_station': 1, 'end_station': 2},
  {'start_station': 1, 'end_station': 3},
  {'start_station': 2, 'end_station': None},
  {'start_station': 2, 'end_station': 3},
  {'start_station': 2, 'end_station': 3},
]

# make a pandas data frame
df = pd.DataFrame(list_of_rows)

# define a function
def fill_NaNs_in_end_station(row):
    if pd.isnull(row['end_station']):
        start_station = row['start_station']
        return df[df['start_station']==start_station].end_station.value_counts().first_valid_index()
    return row['end_station']

# apply function to dataframe
df['end_station'] = df.apply(lambda row: fill_NaNs_in_end_station(row), axis=1)