From 2618b264f3b32b67c479833f4ece9c6dcf6d0d92 Mon Sep 17 00:00:00 2001 From: ShittyKopper Date: Fri, 23 Feb 2024 15:05:37 +0300 Subject: [PATCH] initial --- .gitignore | 5 +++++ chain.py | 26 +++++++++++++++++++++++ generate.py | 29 +++++++++++++++++++++++++ import-misskey.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++ readme.txt | 5 +++++ requirements.txt | 3 +++ 6 files changed, 122 insertions(+) create mode 100644 .gitignore create mode 100644 chain.py create mode 100644 generate.py create mode 100644 import-misskey.py create mode 100644 readme.txt create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21d30b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.json +*.py[oc] +/secrets__.py +/venv +/cron.log diff --git a/chain.py b/chain.py new file mode 100644 index 0000000..895ce1c --- /dev/null +++ b/chain.py @@ -0,0 +1,26 @@ +import markovify +import re +import spacy + +SEPARATOR = r"\s*@@note@@\s*" + +nlp = spacy.load("en_core_web_sm") + +class Text(markovify.Text): + def word_split(self, sentence): + ret = [] + + for word in nlp(sentence): + if word.pos_ == 'PUNCT': + continue + + ret.append("::".join((word.orth_, word.pos_))) + + return ret + + def word_join(self, words): + sentence = " ".join(word.split("::")[0] for word in words) + return sentence + + def sentence_split(self, text): + return re.split(SEPARATOR, text) diff --git a/generate.py b/generate.py new file mode 100644 index 0000000..a90c43e --- /dev/null +++ b/generate.py @@ -0,0 +1,29 @@ +import chain +import sys +import requests + +import secrets__ + +model_f = open("model.json") +model = chain.Text.from_json(model_f.read()) + +generated = False +text = None + +while not generated: + text = model.make_short_sentence(80, tries=900, min_words=3) + generated = text is not None + +text = text.replace('@','@​').replace('#','#​') +print(text) + +requests.post("https://brain.d.on-t.work/api/notes/create", json={ + 'i': secrets__.TOKEN, + + 'visibility': 'home', + 'noExtractMentions': True, + 'noExtractHashtags': True, + + 'text': text, + 'cw': 'markov chain generated post' +}) diff --git a/import-misskey.py b/import-misskey.py new file mode 100644 index 0000000..7e199c3 --- /dev/null +++ b/import-misskey.py @@ -0,0 +1,54 @@ +import re +import json +import sys + +print('[+] loading nlp enhanced markov chain') +import chain + +MENTION = re.compile(r'@[a-zA-Z0-9.-_]+(@[a-zA-Z0-9.]+-_)?') +MFM_BEGIN = re.compile(r'\$\[[a-z0-9.,=]+') +MFM_END = re.compile(r'\]+') +HTML = re.compile(r'') +SPACE = re.compile(r'[ \n]+') +CONTRACTION = re.compile(r"(\w+)'(\w+)") + +print('[+] loading note json') +export_f = sys.argv[1] +export = open(export_f) +export_json = json.load(export) + +corpus = [] + +for note in export_json: + if note.get('visibility') not in ['public', 'unlisted']: + continue + + if note.get('localOnly'): + continue + + if note.get('cw'): + continue + + text = note.get('text') + if not text: + continue + + text = text.lower() + text = re.sub(MENTION, '', text) + text = re.sub(MFM_BEGIN, '', text) + text = re.sub(MFM_END, '', text) + text = re.sub(HTML, '', text) + text = re.sub(SPACE, ' ', text) + text = re.sub(CONTRACTION, r'\1\2', text) + text = text.strip() + + print(f" - {text}") + corpus.append(text) + +print('[+] building markov chain') +model = chain.Text("@@note@@".join(corpus), well_formed=False) +model_json = model.compile().to_json() + +print('[+] exporting') +export = open(export_f.replace('.json', '.model.json'), 'w') +export.write(model_json) diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..34112f6 --- /dev/null +++ b/readme.txt @@ -0,0 +1,5 @@ + pip install -r requirements.txt + python -m spacy download en_core_web_sm + python import-misskey.py notes-XXXX-XX-XX-XX-XX-XX.json + echo "TOKEN=''" > secrets__.py + python generate.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..af83f66 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +markovify +requests +spacy