Merge pull request #1 from raynormast/dev

Dev
This commit is contained in:
raynormast 2022-12-17 20:47:19 -05:00 committed by GitHub
commit 1478d505f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 111 additions and 70 deletions

12
.env.production.sample Normal file
View file

@ -0,0 +1,12 @@
fakeRelayKey="YOUR--FAKE---RELAY---KEY"
fakeRelayHost="https://your-fake-relay-url.YourPetMastodon.com"
## Do you want to send URIs to fake relay or just log them?
runFirehose=true
# Maximum number of curls processes to run at once
maxCurls=50
# Minimum number of URIs to have before you process a batch.
# Don't put this too low or you send over lots of duplicates and burn up your machine
minURIs=100

4
.gitignore vendored
View file

@ -1,3 +1,7 @@
.DS_Store
data/20221217.json
20221217.uris.txt
.env.production
config/domains-federated
config/domains-local
config/hashtags

View file

@ -1,50 +0,0 @@
mastodon.social
universeodon.com
aus.social
fedibird.com
mstdn.social
mindly.social
mastodonapp.uk
#vivaldi.net
mas.to
mastodon.world
mastodon.au
mastodon.online
hachyderm.io
mastodon.nu
infosec.exchange
mastodon.lol
fosstodon.org
wxw.moe
misskey.io
theblower.au
mstdn.party
mastodon.art
chaos.social
mastodon.sdf.org
sfba.social
mastodon.scot
misskey.cf
kolektiva.social
c.im
masto.ai
sushi.ski
mstdn.ca
respublicae.eu
toot.community
ohai.social
troet.cafe
home.social
equestria.social
mastodon.ie
a.gup.pe
newsie.social
phpc.social
techhub.social
m.cmx.im
tech.lgbt
wandering.shop
mastodon.nz
zirk.us
mastodon.cloud
botsin.space

View file

@ -0,0 +1,8 @@
## Fake Firehose will only take all public posts from these domains
## This is the true firehose, use it carefully or blow up your server
### International English (if you aren't from the US) ###
mastodon.scot
aus.social
mastodon.nz
respublicae.eu

View file

@ -0,0 +1,11 @@
## Fake Firehose will only take local posts from these domains
### Tech ###
infosec.exchange
ioc.exchange
tech.lgbt
techhub.social
fosstodon.org
### News & Politics ###
journa.host

0
config/hashtags.sample Normal file
View file

View file

@ -7,4 +7,5 @@ services:
volumes:
- ./data:/data
- ./config:/config
restart: always
restart: always
env_file: .env.production

View file

@ -8,6 +8,18 @@ Find a better way to do it and issue a pull request, or just tell me where your
## How to run it
In the config folder there are three files
- domains-federated
- domains-local
- hashtags
If you want the full on public feed from an instance, put it in the domains-federated file, one domain per line
If you only want the local feed from an instance, put it on the domains-local file, one domain per line
If you want to follow a hashtag you're out of luck because I didn't get that far. But it will go into the hashtags file.
Build docker
Run docker

33
scripts/get-stream.sh Executable file → Normal file
View file

@ -1,8 +1,33 @@
host=$1
type=$2
if [[ "$host" == "" ]]
then
echo "Empty host: $host"
exit 2
fi
while true
do
today=`date +"%Y%m%d"`
curl -X "GET" "https://$host/api/v1/streaming/public?&local=true" \
case "$type" in
"federated")
fetch="https://$host/api/v1/streaming/public";;
"local")
fetch="https://$host/api/v1/streaming/public?local=true";;
"hashtags")
fetch="https://$host/api/v1/streaming/hashtag?tag=linux"
echo "Sorry, hash tags aren't implemented yet :("
exit 1
;;
esac
echo "Starting to stream $fetch in 5 seconds"
sleep 5s;
curl -X "GET" "$fetch" \
--no-progress-meter | \
tee -a "/data/$today.json" | \
grep url | \
@ -16,9 +41,9 @@ do
url=`echo $line | jq .url| sed 's/\"//g'`
uri=`echo $line | jq .uri| sed 's/\"//g'`
echo "$host $url"
echo $uri >> "$today.uris.txt"
echo "STREAMING: $host $url"
echo $uri >> "/data/$today.uris.txt"
fi
done
done
done

27
scripts/run-firehose.sh Executable file → Normal file
View file

@ -4,38 +4,40 @@ while true
## This assumes that we have other scripts that are writing to the file called
## $source, which here is today's date appended with .uris.txt
today=`date +"%Y%m%d"`
source="$today.uris.txt"
source="/data/$today.uris.txt"
## Here we take the top 500 lines of the file -- so we are in FIFO
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
## This step easily cuts the total number of URIs in half and is the only way we can keep up
## Make sure that you have the name number in the following two lines. In this repo, it is currently at 500
head "$source" -n 500 | sed 's/\"//g' | sort | uniq -u > backfilluris.txt
sed -i '1,500d' "$source"
head "$source" -n $minURIs | sed 's/\"//g' | sort | uniq -u > backfilluris.txt
sedExpression="1,${minURIs}d"
sed -i $sedExpression "$source"
## Start looping through the unique URIs
cat backfilluris.txt| \
while read -r uri
do
# echo BACKFILL $url;
echo "FIREHOSE: Posting $uri"
## Send it to the fake relay as a background job
curl -X "POST" "$fakeRelayHost" \
-H "Authorization: Bearer $fakeRelayKey" \
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
--data-urlencode "statusUrl=$uri" &
--data-urlencode "statusUrl=$uri" \
--no-progress-meter &
## Don't overload the system on open curls. Wait until they are below a certain amount to move on
## Or have some fun, set this as high as you like and turn your computer into a space heater!
curls=`ps -ef|grep curl|wc -l`
until [ $curls -lt 100 ]
until [ $curls -lt $maxCurls ]
do
curls=`ps -ef|grep curl|wc -l`
echo "Waiting for existing curls to finish, at $curls"
echo "FIREHOSE: Waiting for existing curls to finish, at $curls"
linesLeft=`cat "$source"|wc -l`
echo "$linesLeft Total URIs left"
sleep 1s
echo "FIREHOSE: $linesLeft Total URIs left"
sleep 5s
done
done
@ -44,11 +46,10 @@ while true
## Wait until the queue is at least 500 lines long, less than that
## and there are not enough lines to see if there are duplicates.
until [ $linesLeft -gt 500 ]
until [ $linesLeft -gt $minURIs ]
do
linesLeft=`cat "$source"|wc -l`
sleep 1s
echo "Waiting for more URIs to batch, currently at $linesLeft"
echo "FIREHOSE: Waiting for more URIs to batch, currently at $linesLeft"
sleep 5s
done
done

21
scripts/start-firehose.sh Executable file → Normal file
View file

@ -1,8 +1,25 @@
#!/bin/bash
cat /config/domains|grep -v "#"|while read -r host
cat /config/domains-federated|grep -v "#"|while read -r host
do
/scripts/get-stream.sh $host &
if [[ "$host" != "" ]]
then
/scripts/get-stream.sh $host "federated" &
fi
done
cat /config/domains-local|grep -v "#"|while read -r host
do
if [[ "$host" != "" ]]
then
/scripts/get-stream.sh $host "local" &
fi
done
if [[ $runFirehose == true ]]
then
/scripts/run-firehose.sh &
fi
## Don't let the container exit
while true; do sleep 1; done