dataloader batchsize python code example
Example: pytorch data
cat_cols = ["Hour","AMorPM", "Weekday"]
cont_cols = ['pickup_longitude',
'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude',
'passenger_count', 'dist_km']
y_col = ["fare_amount"]
for cat in cat_cols:
df[cat] = df[cat].astype("category")
df = shuffle(df, random_state=101)
df.reset_index(drop=True, inplace=True)
cats = np.stack([df[col].cat.codes.values for col in cat_cols], 1)
conts = np.stack([df[col].values for col in cont_cols],1)
cats = torch.tensor(cats, dtype=torch.int64)
conts = torch.tensor(conts, dtype=torch.float)
y = torch.tensor(df[y_col].values,dtype=torch.float).reshape(-1,1)
cat_sizes = [len(df[col].cat.categories) for col in cat_cols]
emb_sizes = [(size, min(50, (size+1)//2)) for size in cat_sizes]
b = 30000
t = 5000
cat_train = cats[:b-t]
cat_test = cats[b-t:b]
con_train = conts[:b-t]
con_test = conts[b-t:b]
y_train = y[:b-t]
y_test = y[b-t:b]