initial
This commit is contained in:
commit
2618b264f3
6 changed files with 122 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
*.json
|
||||
*.py[oc]
|
||||
/secrets__.py
|
||||
/venv
|
||||
/cron.log
|
26
chain.py
Normal file
26
chain.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
import markovify
|
||||
import re
|
||||
import spacy
|
||||
|
||||
SEPARATOR = r"\s*@@note@@\s*"
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
class Text(markovify.Text):
|
||||
def word_split(self, sentence):
|
||||
ret = []
|
||||
|
||||
for word in nlp(sentence):
|
||||
if word.pos_ == 'PUNCT':
|
||||
continue
|
||||
|
||||
ret.append("::".join((word.orth_, word.pos_)))
|
||||
|
||||
return ret
|
||||
|
||||
def word_join(self, words):
|
||||
sentence = " ".join(word.split("::")[0] for word in words)
|
||||
return sentence
|
||||
|
||||
def sentence_split(self, text):
|
||||
return re.split(SEPARATOR, text)
|
29
generate.py
Normal file
29
generate.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import chain
|
||||
import sys
|
||||
import requests
|
||||
|
||||
import secrets__
|
||||
|
||||
model_f = open("model.json")
|
||||
model = chain.Text.from_json(model_f.read())
|
||||
|
||||
generated = False
|
||||
text = None
|
||||
|
||||
while not generated:
|
||||
text = model.make_short_sentence(80, tries=900, min_words=3)
|
||||
generated = text is not None
|
||||
|
||||
text = text.replace('@','@').replace('#','#')
|
||||
print(text)
|
||||
|
||||
requests.post("https://brain.d.on-t.work/api/notes/create", json={
|
||||
'i': secrets__.TOKEN,
|
||||
|
||||
'visibility': 'home',
|
||||
'noExtractMentions': True,
|
||||
'noExtractHashtags': True,
|
||||
|
||||
'text': text,
|
||||
'cw': 'markov chain generated post'
|
||||
})
|
54
import-misskey.py
Normal file
54
import-misskey.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
print('[+] loading nlp enhanced markov chain')
|
||||
import chain
|
||||
|
||||
MENTION = re.compile(r'@[a-zA-Z0-9.-_]+(@[a-zA-Z0-9.]+-_)?')
|
||||
MFM_BEGIN = re.compile(r'\$\[[a-z0-9.,=]+')
|
||||
MFM_END = re.compile(r'\]+')
|
||||
HTML = re.compile(r'</?[a-z]+>')
|
||||
SPACE = re.compile(r'[ \n]+')
|
||||
CONTRACTION = re.compile(r"(\w+)'(\w+)")
|
||||
|
||||
print('[+] loading note json')
|
||||
export_f = sys.argv[1]
|
||||
export = open(export_f)
|
||||
export_json = json.load(export)
|
||||
|
||||
corpus = []
|
||||
|
||||
for note in export_json:
|
||||
if note.get('visibility') not in ['public', 'unlisted']:
|
||||
continue
|
||||
|
||||
if note.get('localOnly'):
|
||||
continue
|
||||
|
||||
if note.get('cw'):
|
||||
continue
|
||||
|
||||
text = note.get('text')
|
||||
if not text:
|
||||
continue
|
||||
|
||||
text = text.lower()
|
||||
text = re.sub(MENTION, '', text)
|
||||
text = re.sub(MFM_BEGIN, '', text)
|
||||
text = re.sub(MFM_END, '', text)
|
||||
text = re.sub(HTML, '', text)
|
||||
text = re.sub(SPACE, ' ', text)
|
||||
text = re.sub(CONTRACTION, r'\1\2', text)
|
||||
text = text.strip()
|
||||
|
||||
print(f" - {text}")
|
||||
corpus.append(text)
|
||||
|
||||
print('[+] building markov chain')
|
||||
model = chain.Text("@@note@@".join(corpus), well_formed=False)
|
||||
model_json = model.compile().to_json()
|
||||
|
||||
print('[+] exporting')
|
||||
export = open(export_f.replace('.json', '.model.json'), 'w')
|
||||
export.write(model_json)
|
5
readme.txt
Normal file
5
readme.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
pip install -r requirements.txt
|
||||
python -m spacy download en_core_web_sm
|
||||
python import-misskey.py notes-XXXX-XX-XX-XX-XX-XX.json
|
||||
echo "TOKEN=''" > secrets__.py
|
||||
python generate.py
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
markovify
|
||||
requests
|
||||
spacy
|
Loading…
Reference in a new issue