Dask DataFrame - Prediction of Keras Model
I found the answer. It is an issue with keras or tensorflow: https://github.com/keras-team/keras/issues/2397
Below code worked and using dask shaved 50% from the time versus standard pandas groupby.
#dask
model=keras.models.load_model('/home/embedding_model.h5')
#this part
import tensorflow as tf
global graph
graph = tf.get_default_graph()
def calc_HR_ind_dsk(grp):
topk=10
x=[grp['user'].values,grp['item'].values]
with graph.as_default(): #and this part from https://github.com/keras-team/keras/issues/2397
pred_act=list(zip(model.predict(x)[:,0],grp['respond'].values))
top=sorted(pred_act, key=lambda x: -x[0])[0:topk]
hit=sum([x[1] for x in top])
return(hit)
import dask.dataframe as dd
df = dd.read_csv('/home/test_coded_final.csv',dtype='int64')
results=df.groupby('user').apply(calc_HR_ind_dsk).compute()