From 2618b264f3b32b67c479833f4ece9c6dcf6d0d92 Mon Sep 17 00:00:00 2001
From: ShittyKopper <shittykopper@w.on-t.work>
Date: Fri, 23 Feb 2024 15:05:37 +0300
Subject: [PATCH] initial

---
 .gitignore        |  5 +++++
 chain.py          | 26 +++++++++++++++++++++++
 generate.py       | 29 +++++++++++++++++++++++++
 import-misskey.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 readme.txt        |  5 +++++
 requirements.txt  |  3 +++
 6 files changed, 122 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 chain.py
 create mode 100644 generate.py
 create mode 100644 import-misskey.py
 create mode 100644 readme.txt
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..21d30b6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.json
+*.py[oc]
+/secrets__.py
+/venv
+/cron.log
diff --git a/chain.py b/chain.py
new file mode 100644
index 0000000..895ce1c
--- /dev/null
+++ b/chain.py
@@ -0,0 +1,26 @@
+import markovify
+import re
+import spacy
+
+SEPARATOR = r"\s*@@note@@\s*"
+
+nlp = spacy.load("en_core_web_sm")
+
+class Text(markovify.Text):
+    def word_split(self, sentence):
+        ret = []
+
+        for word in nlp(sentence):
+            if word.pos_ == 'PUNCT':
+                continue
+
+            ret.append("::".join((word.orth_, word.pos_)))
+
+        return ret
+
+    def word_join(self, words):
+        sentence = " ".join(word.split("::")[0] for word in words)
+        return sentence
+    
+    def sentence_split(self, text):
+        return re.split(SEPARATOR, text)
diff --git a/generate.py b/generate.py
new file mode 100644
index 0000000..a90c43e
--- /dev/null
+++ b/generate.py
@@ -0,0 +1,29 @@
+import chain
+import sys
+import requests
+
+import secrets__
+
+model_f = open("model.json")
+model = chain.Text.from_json(model_f.read())
+
+generated = False
+text = None
+
+while not generated:
+    text = model.make_short_sentence(80, tries=900, min_words=3)
+    generated = text is not None
+
+text = text.replace('@','@​').replace('#','#​')
+print(text)
+
+requests.post("https://brain.d.on-t.work/api/notes/create", json={
+    'i': secrets__.TOKEN,
+
+    'visibility': 'home',
+    'noExtractMentions': True,
+    'noExtractHashtags': True,
+
+    'text': text,
+    'cw': 'markov chain generated post'
+})
diff --git a/import-misskey.py b/import-misskey.py
new file mode 100644
index 0000000..7e199c3
--- /dev/null
+++ b/import-misskey.py
@@ -0,0 +1,54 @@
+import re
+import json
+import sys
+
+print('[+] loading nlp enhanced markov chain')
+import chain
+
+MENTION = re.compile(r'@[a-zA-Z0-9.-_]+(@[a-zA-Z0-9.]+-_)?')
+MFM_BEGIN = re.compile(r'\$\[[a-z0-9.,=]+')
+MFM_END = re.compile(r'\]+')
+HTML = re.compile(r'</?[a-z]+>')
+SPACE = re.compile(r'[ \n]+')
+CONTRACTION = re.compile(r"(\w+)'(\w+)")
+
+print('[+] loading note json')
+export_f = sys.argv[1]
+export = open(export_f)
+export_json = json.load(export)
+
+corpus = []
+
+for note in export_json:
+    if note.get('visibility') not in ['public', 'unlisted']:
+        continue
+
+    if note.get('localOnly'):
+        continue
+
+    if note.get('cw'):
+        continue
+
+    text = note.get('text')
+    if not text:
+        continue
+
+    text = text.lower() 
+    text = re.sub(MENTION, '', text)
+    text = re.sub(MFM_BEGIN, '', text)
+    text = re.sub(MFM_END, '', text)
+    text = re.sub(HTML, '', text)
+    text = re.sub(SPACE, ' ', text)
+    text = re.sub(CONTRACTION, r'\1\2', text)
+    text = text.strip()
+
+    print(f"     - {text}")
+    corpus.append(text)
+
+print('[+] building markov chain')
+model = chain.Text("@@note@@".join(corpus), well_formed=False)
+model_json = model.compile().to_json()
+
+print('[+] exporting')
+export = open(export_f.replace('.json', '.model.json'), 'w')
+export.write(model_json)
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 0000000..34112f6
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,5 @@
+	pip install -r requirements.txt
+	python -m spacy download en_core_web_sm
+	python import-misskey.py notes-XXXX-XX-XX-XX-XX-XX.json
+	echo "TOKEN=''" > secrets__.py
+	python generate.py 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..af83f66
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+markovify
+requests
+spacy