missing values in python code example
Example 1: missing values in a dataset python
df.isnull().sum()
Example 2: whow i fill the data if most values are nan in jupyter notebook
# import pandas
import pandas as pd
# make a sample data
list_of_rows = [
{'start_station': 1, 'end_station': 1},
{'start_station': None, 'end_station': 1},
{'start_station': 1, 'end_station': 2},
{'start_station': 1, 'end_station': 3},
{'start_station': 2, 'end_station': None},
{'start_station': 2, 'end_station': 3},
{'start_station': 2, 'end_station': 3},
]
# make a pandas data frame
df = pd.DataFrame(list_of_rows)
# define a function
def fill_NaNs_in_end_station(row):
if pd.isnull(row['end_station']):
start_station = row['start_station']
return df[df['start_station']==start_station].end_station.value_counts().first_valid_index()
return row['end_station']
# apply function to dataframe
df['end_station'] = df.apply(lambda row: fill_NaNs_in_end_station(row), axis=1)
Example 3: handling missing dvalues denoted by a '?' in pandas
# Making a list of missing value typesmissing_values = ["n/a", "na", "--"]df = pd.read_csv("property data.csv", na_values = missing_values)