1
0
Fork 0
isopod.cool/stuff/rssfilter.php

287 lines
10 KiB
PHP

<?php
// Here's my embarrassing ass source code. It is public domain, do with it what you will
/*ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);*/
if($_GET['downloadsource'] == 'yes') { // just serve the source code as plain text and exit
header('Content-Type: text/plain');
echo file_get_contents('rssfilter.php');
die();
}
$feed = $_GET['feed'];
$filter = $_GET['filter'];
$filtertype = $_GET['type'];
$useregex = false;
if($_GET['regex'] == 'yes' or $_GET['regex'] == 'y') {
$useregex = true;
}
function rss_testforfilter($item) { // I wrote two different functions for filtering RSS and Atom feeds under the assumption that they would be radically different
// in some way. The only real difference ended up being how the categories are stored within the category tags. I could probably
// consolidate them into one function, but these boolean expressions are big and complicated and I'm too scared to touch it now.
global $useregex; // PHP is silly and makes you re-declare global variables if you want to use them inside functions.
global $filter;
foreach($item->category as $category) { // loops through all the <category> tags and returns true if any of them contain the filter string
if(
(!$useregex and strpos($category, $filter) !== false) or // Don't use regex and filter string is found
($useregex and preg_match($filter, $category)) // DO use regex and filter expression is found
) {
return true;
}
}
if(
(!$useregex and (strpos($item->description, $filter) !== false or strpos($item->title, $filter) !== false)) or // Don't use regex and filter string is found
($useregex and (preg_match($filter, $item->description) or preg_match($filter, $item->title))) // DO use regex and filter expression is found
) {
return true;
}
return false;
}
function atom_testforfilter($entry) {
global $useregex;
global $filter;
foreach($entry->category as $category) { // loops through all the <category> tags and returns true if any of them contain the filter string
if(
(!$useregex and strpos($category['term'], $filter) !== false) or // Don't use regex and filter string is found
($useregex and preg_match($filter, $category['term'])) // DO use regex and filter expression is found
) {
return true;
}
}
if(
(!$useregex and (strpos($entry->description, $filter) !== false or strpos($entry->title, $filter) !== false)) or // Don't use regex and filter string is found
($useregex and (preg_match($filter, $entry->description) or preg_match($filter, $entry->title))) // DO use regex and filter expression is found
) {
return true;
}
return false;
}
if($feed // A basic check to make sure the URL is formed properly. It doesn't actually check for a valid URL or valid regex.
and $filter // If this check fails, the HTML document below this gets served.
and ($filtertype === "white" or $filtertype === "black")
){
header('Content-Type: text/xml'); // Not sure if I actually need to do this, but it seems like good practice and it makes Chromium display the XML tree all nice.
$content = simplexml_load_string(file_get_contents($feed)); // I've encountered a problem where something between the server this command pulls from and this script seems to
// be caching the result. I'm guessing it's some kind of Nginx configuration error, but I'm not sure. Maybe I
// should be using a different function to do this? rss-bridge uses the cURL library, which I know because I had
// to install it myself.
if($content->getName() == 'rss') { // Here I just assume everything not explicitly an RSS feed is an Atom feed. I am 100% certain there is an edge case I'm missing here.
echo '<rss version="2.0"><channel>'; // This is the most embarrassing part of this code by far. I wanted to do this by looping over all the entries
echo $content->channel->title->asXML(); // in the XML object and specifically removing the ones that didn't pass the filter *from* the object, but
echo $content->channel->link->asXML(); // I couldn't for the life of me get that to work, so I settled for the next best thing: just echoing all the
echo $content->channel->description->asXML(); // fields that are part of the spec out to a brand new XML document and then adding all the entries that did
echo $content->channel->language->asXML(); // pass. I mean, if it works it works, but sorry to anyone that was using some nonstandard feature here.
echo $content->channel->pubDate->asXML();
echo $content->channel->lastBuildDate->asXML();
echo $content->channel->docs->asXML();
echo "<generator>will's RSS filter script</generator>";
echo $content->channel->managingEditor->asXML();
echo $content->channel->webMaster->asXML();
foreach($content->channel->item as $item) {
$found = rss_testforfilter($item, $filter);
if(($found and $filtertype == 'white') or (!$found and $filtertype == 'black')) {
echo $item->asXML();
}
}
echo '</channel></rss>';
die();
} else {
echo '<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom">';
echo $content->title->asXML();
echo $content->link->asXML();
echo $content->updated->asXML();
echo $content->author->asXML();
echo $content->id->asXML();
foreach($content->category as $category) {
echo $category->asXML();
}
foreach($content->contributor as $contributor) {
echo $contributor->asXML();
}
echo "<generator>will's Atom filter script</generator>";
echo $content->icon->asXML();
echo $content->logo->asXML();
echo $content->rights->asXML();
echo $content->subtitle->asXML();
foreach($content->entry as $entry) {
$found = atom_testforfilter($entry, $filter);
if(($found and $filtertype == 'white') or (!$found and $filtertype == 'black')) {
echo $entry->asXML();
}
}
echo '</feed>';
die();
}
}
?>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>RSS Filter</title>
<style type="text/css">
html, body {
margin: 0;
padding: 0;
height: 100%;
}
body {
margin: auto;
width: fit-content;
max-width: 32rem;
background-color: #222222;
font-family: monospace;
color: white;
display: flex;
align-items: center;
font-size: 1rem;
flex-direction: column;
justify-content: center;
text-shadow: 1px 1px 3px black;
}
td:first-child {
padding-right: 1em;
font-weight: bold;
}
input[type="text"] {
width: 100%;
}
code {
font-size: initial;
background-color: #101010;
padding: 1em;
border-radius: 1em;
display: block;
text-align: center;
margin: auto;
box-shadow: 2px 2px 4px black;
}
code span {
color: #ccc;
}
table {
font-size: 0.9rem;
}
a {
color: #df1955;
}
a:hover {
color: #00ffd5;
}
@media only screen and (orientation: portrait) {
td {
display: block;
padding-top: 0.5rem;
}
body {
max-width: 20rem;
}
}
</style>
</head>
<body>
<h1 style="margin: 0;">RSS Filter</h1>
<hr style="width: 30%; margin: 1.25rem;">
<table>
<noscript>
<tr>
<td colspan="2">You need to have Javascript enabled for this generator to work, sorry.</td>
</tr>
</noscript>
<tr>
<td>Feed URL</td>
<td><input type="text" placeholder="https://example.com/rss" id="feed" value=""></input></td>
</tr>
<tr>
<td>Filter String</td>
<td><input type="text" placeholder="foobar" id="filter" value=""></input></td>
</tr>
<tr>
<td>Filter Type</td>
<td>
<input type="radio" id="whitelist" name="filtertype" value="white" checked>
<label for="whitelist">Whitelist</label>
<input type="radio" id="blacklist" name="filtertype" value="black">
<label for="blacklist">Blacklist</label>
</td>
</tr>
<tr>
<td><label for="regex">Use Regex</label></td>
<td><input type="checkbox" id="regex"></td>
</tr>
</table>
<p>
<code><a id="generatedlink" href=""></a></code>
</p>
<p style="position: absolute; bottom: 1em; margin: 0; text-align: center;">
Made by <a href="https://isopod.cool/">an isopod</a><br>
<a href="?downloadsource=yes">Download source code</a>
</p>
<script>
let feedURL = "https://example.com/rss";
let filter = "foobar";
let type = "white";
let regex = "";
function updateLink() {
let link = `${window.location.href}?feed=${feedURL}&filter=${filter}&type=${type}${regex}`;
document.getElementById('generatedlink').href = link;
document.getElementById('generatedlink').innerHTML = link.replace(/&/g, "&amp;");
}
updateLink();
document.getElementById('feed').addEventListener('input', function(evt) {
feedURL = encodeURI(this.value);
updateLink();
});
document.getElementById('filter').addEventListener('input', function(evt) {
filter = encodeURI(this.value);
updateLink();
});
document.getElementById('whitelist').addEventListener('input', function(evt) {
type = "white";
updateLink();
});
document.getElementById('blacklist').addEventListener('input', function(evt) {
type = "black";
updateLink();
});
document.getElementById('regex').addEventListener('change', function(evt) {
if(this.checked) {
regex = "&regex=yes";
document.getElementById('filter').placeholder = "/foobar/";
} else {
regex = "";
document.getElementById('filter').placeholder = "foobar";
}
updateLink();
});
</script>
</body>
</html>