forked from mirror/fake-firehose
commit
1478d505f6
11 changed files with 111 additions and 70 deletions
12
.env.production.sample
Normal file
12
.env.production.sample
Normal file
|
@ -0,0 +1,12 @@
|
|||
fakeRelayKey="YOUR--FAKE---RELAY---KEY"
|
||||
fakeRelayHost="https://your-fake-relay-url.YourPetMastodon.com"
|
||||
|
||||
## Do you want to send URIs to fake relay or just log them?
|
||||
runFirehose=true
|
||||
|
||||
# Maximum number of curls processes to run at once
|
||||
maxCurls=50
|
||||
|
||||
# Minimum number of URIs to have before you process a batch.
|
||||
# Don't put this too low or you send over lots of duplicates and burn up your machine
|
||||
minURIs=100
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -1,3 +1,7 @@
|
|||
.DS_Store
|
||||
data/20221217.json
|
||||
20221217.uris.txt
|
||||
.env.production
|
||||
config/domains-federated
|
||||
config/domains-local
|
||||
config/hashtags
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
mastodon.social
|
||||
universeodon.com
|
||||
aus.social
|
||||
fedibird.com
|
||||
mstdn.social
|
||||
mindly.social
|
||||
mastodonapp.uk
|
||||
#vivaldi.net
|
||||
mas.to
|
||||
mastodon.world
|
||||
mastodon.au
|
||||
mastodon.online
|
||||
hachyderm.io
|
||||
mastodon.nu
|
||||
infosec.exchange
|
||||
mastodon.lol
|
||||
fosstodon.org
|
||||
wxw.moe
|
||||
misskey.io
|
||||
theblower.au
|
||||
mstdn.party
|
||||
mastodon.art
|
||||
chaos.social
|
||||
mastodon.sdf.org
|
||||
sfba.social
|
||||
mastodon.scot
|
||||
misskey.cf
|
||||
kolektiva.social
|
||||
c.im
|
||||
masto.ai
|
||||
sushi.ski
|
||||
mstdn.ca
|
||||
respublicae.eu
|
||||
toot.community
|
||||
ohai.social
|
||||
troet.cafe
|
||||
home.social
|
||||
equestria.social
|
||||
mastodon.ie
|
||||
a.gup.pe
|
||||
newsie.social
|
||||
phpc.social
|
||||
techhub.social
|
||||
m.cmx.im
|
||||
tech.lgbt
|
||||
wandering.shop
|
||||
mastodon.nz
|
||||
zirk.us
|
||||
mastodon.cloud
|
||||
botsin.space
|
8
config/domains-federated.sample
Normal file
8
config/domains-federated.sample
Normal file
|
@ -0,0 +1,8 @@
|
|||
## Fake Firehose will only take all public posts from these domains
|
||||
## This is the true firehose, use it carefully or blow up your server
|
||||
|
||||
### International English (if you aren't from the US) ###
|
||||
mastodon.scot
|
||||
aus.social
|
||||
mastodon.nz
|
||||
respublicae.eu
|
11
config/domains-local.sample
Normal file
11
config/domains-local.sample
Normal file
|
@ -0,0 +1,11 @@
|
|||
## Fake Firehose will only take local posts from these domains
|
||||
|
||||
### Tech ###
|
||||
infosec.exchange
|
||||
ioc.exchange
|
||||
tech.lgbt
|
||||
techhub.social
|
||||
fosstodon.org
|
||||
|
||||
### News & Politics ###
|
||||
journa.host
|
0
config/hashtags.sample
Normal file
0
config/hashtags.sample
Normal file
|
@ -7,4 +7,5 @@ services:
|
|||
volumes:
|
||||
- ./data:/data
|
||||
- ./config:/config
|
||||
restart: always
|
||||
restart: always
|
||||
env_file: .env.production
|
12
readme.md
12
readme.md
|
@ -8,6 +8,18 @@ Find a better way to do it and issue a pull request, or just tell me where your
|
|||
|
||||
## How to run it
|
||||
|
||||
In the config folder there are three files
|
||||
|
||||
- domains-federated
|
||||
- domains-local
|
||||
- hashtags
|
||||
|
||||
If you want the full on public feed from an instance, put it in the domains-federated file, one domain per line
|
||||
|
||||
If you only want the local feed from an instance, put it on the domains-local file, one domain per line
|
||||
|
||||
If you want to follow a hashtag you're out of luck because I didn't get that far. But it will go into the hashtags file.
|
||||
|
||||
Build docker
|
||||
|
||||
Run docker
|
33
scripts/get-stream.sh
Executable file → Normal file
33
scripts/get-stream.sh
Executable file → Normal file
|
@ -1,8 +1,33 @@
|
|||
host=$1
|
||||
type=$2
|
||||
|
||||
if [[ "$host" == "" ]]
|
||||
then
|
||||
echo "Empty host: $host"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
while true
|
||||
do
|
||||
today=`date +"%Y%m%d"`
|
||||
curl -X "GET" "https://$host/api/v1/streaming/public?&local=true" \
|
||||
|
||||
case "$type" in
|
||||
"federated")
|
||||
fetch="https://$host/api/v1/streaming/public";;
|
||||
"local")
|
||||
fetch="https://$host/api/v1/streaming/public?local=true";;
|
||||
"hashtags")
|
||||
fetch="https://$host/api/v1/streaming/hashtag?tag=linux"
|
||||
echo "Sorry, hash tags aren't implemented yet :("
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Starting to stream $fetch in 5 seconds"
|
||||
|
||||
sleep 5s;
|
||||
|
||||
curl -X "GET" "$fetch" \
|
||||
--no-progress-meter | \
|
||||
tee -a "/data/$today.json" | \
|
||||
grep url | \
|
||||
|
@ -16,9 +41,9 @@ do
|
|||
url=`echo $line | jq .url| sed 's/\"//g'`
|
||||
uri=`echo $line | jq .uri| sed 's/\"//g'`
|
||||
|
||||
echo "$host $url"
|
||||
echo $uri >> "$today.uris.txt"
|
||||
echo "STREAMING: $host $url"
|
||||
echo $uri >> "/data/$today.uris.txt"
|
||||
|
||||
fi
|
||||
done
|
||||
done
|
||||
done
|
27
scripts/run-firehose.sh
Executable file → Normal file
27
scripts/run-firehose.sh
Executable file → Normal file
|
@ -4,38 +4,40 @@ while true
|
|||
## This assumes that we have other scripts that are writing to the file called
|
||||
## $source, which here is today's date appended with .uris.txt
|
||||
today=`date +"%Y%m%d"`
|
||||
source="$today.uris.txt"
|
||||
source="/data/$today.uris.txt"
|
||||
|
||||
## Here we take the top 500 lines of the file -- so we are in FIFO
|
||||
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
|
||||
## This step easily cuts the total number of URIs in half and is the only way we can keep up
|
||||
|
||||
## Make sure that you have the name number in the following two lines. In this repo, it is currently at 500
|
||||
head "$source" -n 500 | sed 's/\"//g' | sort | uniq -u > backfilluris.txt
|
||||
sed -i '1,500d' "$source"
|
||||
head "$source" -n $minURIs | sed 's/\"//g' | sort | uniq -u > backfilluris.txt
|
||||
sedExpression="1,${minURIs}d"
|
||||
sed -i $sedExpression "$source"
|
||||
|
||||
## Start looping through the unique URIs
|
||||
cat backfilluris.txt| \
|
||||
while read -r uri
|
||||
do
|
||||
# echo BACKFILL $url;
|
||||
echo "FIREHOSE: Posting $uri"
|
||||
|
||||
## Send it to the fake relay as a background job
|
||||
curl -X "POST" "$fakeRelayHost" \
|
||||
-H "Authorization: Bearer $fakeRelayKey" \
|
||||
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
|
||||
--data-urlencode "statusUrl=$uri" &
|
||||
--data-urlencode "statusUrl=$uri" \
|
||||
--no-progress-meter &
|
||||
|
||||
## Don't overload the system on open curls. Wait until they are below a certain amount to move on
|
||||
## Or have some fun, set this as high as you like and turn your computer into a space heater!
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
until [ $curls -lt 100 ]
|
||||
until [ $curls -lt $maxCurls ]
|
||||
do
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
echo "Waiting for existing curls to finish, at $curls"
|
||||
echo "FIREHOSE: Waiting for existing curls to finish, at $curls"
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
echo "$linesLeft Total URIs left"
|
||||
sleep 1s
|
||||
echo "FIREHOSE: $linesLeft Total URIs left"
|
||||
sleep 5s
|
||||
done
|
||||
|
||||
done
|
||||
|
@ -44,11 +46,10 @@ while true
|
|||
|
||||
## Wait until the queue is at least 500 lines long, less than that
|
||||
## and there are not enough lines to see if there are duplicates.
|
||||
until [ $linesLeft -gt 500 ]
|
||||
until [ $linesLeft -gt $minURIs ]
|
||||
do
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
sleep 1s
|
||||
echo "Waiting for more URIs to batch, currently at $linesLeft"
|
||||
|
||||
echo "FIREHOSE: Waiting for more URIs to batch, currently at $linesLeft"
|
||||
sleep 5s
|
||||
done
|
||||
done
|
||||
|
|
21
scripts/start-firehose.sh
Executable file → Normal file
21
scripts/start-firehose.sh
Executable file → Normal file
|
@ -1,8 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
cat /config/domains|grep -v "#"|while read -r host
|
||||
cat /config/domains-federated|grep -v "#"|while read -r host
|
||||
do
|
||||
/scripts/get-stream.sh $host &
|
||||
if [[ "$host" != "" ]]
|
||||
then
|
||||
/scripts/get-stream.sh $host "federated" &
|
||||
fi
|
||||
done
|
||||
|
||||
cat /config/domains-local|grep -v "#"|while read -r host
|
||||
do
|
||||
if [[ "$host" != "" ]]
|
||||
then
|
||||
/scripts/get-stream.sh $host "local" &
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $runFirehose == true ]]
|
||||
then
|
||||
/scripts/run-firehose.sh &
|
||||
fi
|
||||
|
||||
## Don't let the container exit
|
||||
while true; do sleep 1; done
|
Loading…
Reference in a new issue