26 lines
573 B
Python
26 lines
573 B
Python
import markovify
|
|
import re
|
|
import spacy
|
|
|
|
SEPARATOR = r"\s*@@note@@\s*"
|
|
|
|
nlp = spacy.load("en_core_web_sm")
|
|
|
|
class Text(markovify.Text):
|
|
def word_split(self, sentence):
|
|
ret = []
|
|
|
|
for word in nlp(sentence):
|
|
if word.pos_ == 'PUNCT':
|
|
continue
|
|
|
|
ret.append("::".join((word.orth_, word.pos_)))
|
|
|
|
return ret
|
|
|
|
def word_join(self, words):
|
|
sentence = " ".join(word.split("::")[0] for word in words)
|
|
return sentence
|
|
|
|
def sentence_split(self, text):
|
|
return re.split(SEPARATOR, text)
|