h5py, access data in Datasets in SVHN
I'll answer my question here, after read the docs of h5py
, here is my code
def get_box_data(index, hdf5_data):
"""
get `left, top, width, height` of each picture
:param index:
:param hdf5_data:
:return:
"""
meta_data = dict()
meta_data['height'] = []
meta_data['label'] = []
meta_data['left'] = []
meta_data['top'] = []
meta_data['width'] = []
def print_attrs(name, obj):
vals = []
if obj.shape[0] == 1:
vals.append(obj[0][0])
else:
for k in range(obj.shape[0]):
vals.append(int(hdf5_data[obj[k][0]][0][0]))
meta_data[name] = vals
box = hdf5_data['/digitStruct/bbox'][index]
hdf5_data[box[0]].visititems(print_attrs)
return meta_data
def get_name(index, hdf5_data):
name = hdf5_data['/digitStruct/name']
return ''.join([chr(v[0]) for v in hdf5_data[name[index][0]].value])
Here the hdf5_data
is train_data = h5py.File('./train/digitStruct.mat')
, it works fine!
Update
Here is some sample code to use the above two functions
mat_data = h5py.File(os.path.join(folder, 'digitStruct.mat'))
size = mat_data['/digitStruct/name'].size
for _i in tqdm.tqdm(range(size)):
pic = get_name(_i, mat_data)
box = get_box_data(_i, mat_data)
The above function shows how to get the name and the bbox data of each entry of the data!