commit 47b61a63888eef81194961adb509340414fcea9e Author: raynor Date: Sat Dec 17 00:20:35 2022 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/domains b/domains new file mode 100644 index 0000000..dafd7a3 --- /dev/null +++ b/domains @@ -0,0 +1,50 @@ +mastodon.social +universeodon.com +aus.social +fedibird.com +mstdn.social +mindly.social +mastodonapp.uk +vivaldi.net +mas.to +mastodon.world +mastodon.au +mastodon.online +hachyderm.io +mastodon.nu +infosec.exchange +mastodon.lol +fosstodon.org +wxw.moe +misskey.io +theblower.au +mstdn.party +mastodon.art +chaos.social +mastodon.sdf.org +sfba.social +mastodon.scot +misskey.cf +kolektiva.social +c.im +masto.ai +sushi.ski +mstdn.ca +respublicae.eu +toot.community +ohai.social +troet.cafe +home.social +equestria.social +mastodon.ie +a.gup.pe +newsie.social +phpc.social +techhub.social +m.cmx.im +tech.lgbt +wandering.shop +mastodon.nz +zirk.us +mastodon.cloud +botsin.space diff --git a/get-stream.sh b/get-stream.sh new file mode 100755 index 0000000..f327588 --- /dev/null +++ b/get-stream.sh @@ -0,0 +1,23 @@ +host=$1 +while true +do + today=`date +"%Y%m%d"` + curl -X "GET" "https://$host/api/v1/streaming/public" \ + --no-progress-meter | \ + grep url | \ + sed 's/data://g' | \ + + while read -r line + do + + if [[ $line == *"uri"* ]] + then + url=`echo $line | jq .url| sed 's/\"//g'` + uri=`echo $line | jq .uri| sed 's/\"//g'` + + echo "$host $url" + echo $uri >> "$today.uris.txt" + + fi + done +done diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..72059ed --- /dev/null +++ b/readme.md @@ -0,0 +1,79 @@ +# Fake Firehose +This project generates the mythical "firehose" relay that small Mastodon instances look for, +at least to get content. + +It's a little crazy. + +Find a better way to do it and issue a pull request, or just tell me where your new repo is :) + +## How to run it + +Make sure you have `jq` installed. + +Linux: +`apt install jq` + +macOS: +`brew install jq` + +### 1. Fake Relay is Setup +You need to have [fakerelay](https://github.com/g3rv4/FakeRelay) running and hooked up with your Mastodon instance. + +[Gervasio](https://mastodonte.tech/@g3rv4) is _the man_ for fakerelay. + +### 2. Environmental variables +You need to have two environmental variables set: + +- fakeRelayHost +- fakeRelayKey + +I recommend you put them in your `.bashrc` file. I use: + +```shell +export fakeRelayKey="MrNtYH+GjwDtJtR6YCx2O4+TuldQ_SOMEKEY_aVni0rsbDryETCx9lHSZmzcOAv3Y8+4LiD8bFUZbnyl4w==" +export fakeRelayHost="https://my-relay.raynor.haus/index" +``` + +### 3. Instances you want to follow +Create a file `domains` and put once instance on each line that you want to follow. The top 50 by total accounts by what my instances sees is included. + +### 4. Start it up +Open a terminal and run `./start-firehose.sh` + +This starts reading the public federated statuses stream of every instance in the `domains` file. + +Open a different terminal and run `./run-firehose.sh` + +This starts feeding the statuses to fakerelay. + +Profit. + +### 5. How to stop it +Log out. + +No for real, I didn't get to that part yet. + +# Super important things to watch out for +**AFTER YOU RUN `start-firehose.sh` IT WILL KICK OFF A SHELL SCRIPT IN THE BACKGROUND FOR EVERY DOMAIN YOU HAVE LISTED. THERE IS NO EASY WAY TO KILL THESE.** + +100% for real, run this in a VM or a container or somewhere you can log out if you overdid it to start. + +`run-firehose.sh` has a couple of important lines to look at. + +First: + +`until [ $curls -lt 100 ]` + +determines the _total_ number of `curl` executables that can be ran at once, system-wide. This includes one for each `domain` you have listed. + +If your curl limit is less than your domains number, than nothing will flow. + +If your curl limit is too high, your machine will run out of resources and lock up. + +Second: + +`until [ $linesLeft -gt 500 ]` + +sets how many statuses (aka posts, toots) have to be in a batch. **YOU NEED TO BATCH THESE** + +_Most_ of the URIs will be duplicated, the beginning of `run-firehose.sh` de-duplicates the URIs. 500-1000 has been a good batch size in my experience. \ No newline at end of file diff --git a/run-firehose.sh b/run-firehose.sh new file mode 100755 index 0000000..6b75b3f --- /dev/null +++ b/run-firehose.sh @@ -0,0 +1,54 @@ +while true + do + + ## This assumes that we have other scripts that are writing to the file called + ## $source, which here is today's date appended with .uris.txt + today=`date +"%Y%m%d"` + source="$today.uris.txt" + + ## Here we take the top 500 lines of the file -- so we are in FIFO + ## and pipe them thru uniq so we only pass unique URIs through to the fake relay + ## This step easily cuts the total number of URIs in half and is the only way we can keep up + + ## Make sure that you have the name number in the following two lines. In this repo, it is currently at 500 + head "$source" -n 500 | sed 's/\"//g' | sort | uniq -u > backfilluris.txt + sed -i '1,500d' "$source" + + ## Start looping through the unique URIs + cat backfilluris.txt| \ + while read -r uri + do + # echo BACKFILL $url; + + ## Send it to the fake relay as a background job + curl -X "POST" "$fakeRelayHost" \ + -H "Authorization: Bearer $fakeRelayKey" \ + -H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \ + --data-urlencode "statusUrl=$uri" & + + ## Don't overload the system on open curls. Wait until they are below a certain amount to move on + ## Or have some fun, set this as high as you like and turn your computer into a space heater! + curls=`ps -ef|grep curl|wc -l` + until [ $curls -lt 100 ] + do + curls=`ps -ef|grep curl|wc -l` + echo "Waiting for existing curls to finish, at $curls" + linesLeft=`cat "$source"|wc -l` + echo "$linesLeft Total URIs left" + sleep 1s + done + + done + + linesLeft=`cat "$source"|wc -l` + + ## Wait until the queue is at least 500 lines long, less than that + ## and there are not enough lines to see if there are duplicates. + until [ $linesLeft -gt 500 ] + do + linesLeft=`cat "$source"|wc -l` + sleep 1s + echo "Waiting for more URIs to batch, currently at $linesLeft" + + done +done diff --git a/start-firehose.sh b/start-firehose.sh new file mode 100755 index 0000000..90d2088 --- /dev/null +++ b/start-firehose.sh @@ -0,0 +1,4 @@ +cat domains|while read -r host +do + ./get-stream.sh $host & +done