pytorch datasets code example

Example 1: torchvision.datasets.datasetfolder example

def load_data(data_folder, batch_size, train, kwargs):
    transform = {
        'train': transforms.Compose(
            [transforms.Resize([256, 256]),
                transforms.RandomCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])]),
        'test': transforms.Compose(
            [transforms.Resize([224, 224]),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])])
        }
    data = datasets.ImageFolder(root = data_folder, transform=transform['train' if train else 'test'])
    data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, **kwargs, drop_last = True if train else False)
    return data_loader

Example 2: imagefolder pytorch

main_dir/
     0/
         img1_digit0.jpg
         img2_digit0.jpg
     1/
         img3_digit1.jpg
         ....
     ....
     9/
        ...

Example 3: pytorch data

cat_cols = ["Hour","AMorPM", "Weekday"]
cont_cols = ['pickup_longitude',
       'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude',
       'passenger_count', 'dist_km']

y_col = ["fare_amount"]

for cat in cat_cols:
    df[cat] = df[cat].astype("category")
    
df = shuffle(df, random_state=101)
df.reset_index(drop=True, inplace=True)

cats = np.stack([df[col].cat.codes.values for col in cat_cols], 1)
conts = np.stack([df[col].values for col in cont_cols],1)
cats = torch.tensor(cats, dtype=torch.int64)
conts = torch.tensor(conts, dtype=torch.float)
y = torch.tensor(df[y_col].values,dtype=torch.float).reshape(-1,1) #regression
# classification: y = torch.tensor(df[y_col].values,dtype=torch.float).flatten()

cat_sizes = [len(df[col].cat.categories) for col in cat_cols]
emb_sizes = [(size, min(50, (size+1)//2)) for size in cat_sizes]

b = 30000 # suggested batch size
t = 5000  # suggested test size

cat_train = cats[:b-t]
cat_test  = cats[b-t:b]
con_train = conts[:b-t]
con_test  = conts[b-t:b]
y_train   = y[:b-t]
y_test    = y[b-t:b]