Lemmatyzacja
nlp = spacy.load("en_core_web_sm")
def text_cleaning(text, stop_words = stopwords.words('english'),allow_postags = set(['NOUN', 'VERB', 'ADJ', 'ADV', 'PROPN'])):
text = re.sub("[^A-Za-z" "]+"," ",text).lower()
text = re.sub("[0-9" "]+"," ",text)
words = []
for token in nlp(text):
if token.text not in stop_words and token.pos_ in allow_postags:
words.append(token.lemma_)
return' '.join(words)
Lazy Leopard