from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer def preprocess(sentence): lemmatizer = WordNetLemmatizer() sentence = sentence.lower() tokens = word_tokenize(sentence) tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum()] return tokens