build dictionary doc2vec code example

Example: taggeddocument gensim example

def test_mixed_tag_types(self):
    """Ensure alternating int/string tags don't share indexes in doctag_syn0"""
    mixed_tag_corpus = [doc2vec.TaggedDocument(words, [i, words[0]]) for i, words in enumerate(raw_sentences)]
    model = doc2vec.Doc2Vec()
    model.build_vocab(mixed_tag_corpus)
    expected_length = len(sentences) + len(model.docvecs.doctags)  # 9 sentences, 7 unique first tokens
    self.assertEquals(len(model.docvecs.doctag_syn0), expected_length)

Tags:

Misc Example