spacy nlp code example

Example 1: spacy tokenize

# Construction 1
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
nlp = English()
# Create a blank Tokenizer with just the English vocab
tokenizer = Tokenizer(nlp.vocab)

# Construction 2
from spacy.lang.en import English
nlp = English()
# Create a Tokenizer with the default settings for English
# including punctuation rules and exceptions
tokenizer = nlp.Defaults.create_tokenizer(nlp)

Example 2: spacy tokineze stream

texts = ["One document.", "...", "Lots of documents"]
for doc in tokenizer.pipe(texts, batch_size=50):
    pass

Example 3: spacy nlp load

import spacy
nlp = spacy.load("en_core_web_sm")

Example 4: nlp spacy medium

df[‘spacy_doc’] = list(nlp.pipe(df.text))