Load Pretrained glove vectors in python
glove model files are in a word - vector format. You can open the textfile to verify this. Here is a small snippet of code you can use to load a pretrained glove file:
import numpy as np
def load_glove_model(File):
print("Loading Glove Model")
glove_model = {}
with open(File,'r') as f:
for line in f:
split_line = line.split()
word = split_line[0]
embedding = np.array(split_line[1:], dtype=np.float64)
glove_model[word] = embedding
print(f"{len(glove_model)} words loaded!")
return glove_model
You can then access the word vectors by simply using the gloveModel variable.
print(gloveModel['hello'])
You can do it much faster with pandas:
import pandas as pd
import csv
words = pd.read_table(glove_data_file, sep=" ", index_col=0, header=None, quoting=csv.QUOTE_NONE)
Then to get the vector for a word:
def vec(w):
return words.loc[w].as_matrix()
And to find the closest word to a vector:
words_matrix = words.as_matrix()
def find_closest_word(v):
diff = words_matrix - v
delta = np.sum(diff * diff, axis=1)
i = np.argmin(delta)
return words.iloc[i].name