forked from mirror/fake-firehose
Compare commits
4 Commits
json-strea
...
master
Author | SHA1 | Date |
---|---|---|
hnrd | 0d43e6f505 | |
hnrd | 5be901062b | |
hnrd | 2901b87d87 | |
raynor | d466f59781 |
|
@ -41,7 +41,7 @@ The entire thing should look something like:
|
|||
cd ~
|
||||
mkdir MastodonFireHose
|
||||
cd MastodonFirehose
|
||||
git pull https://github.com/raynormast/fake-firehose.git
|
||||
git clone https://github.com/raynormast/fake-firehose.git
|
||||
cd fake-firehose
|
||||
docker build -t fakefirehose .
|
||||
# Edit your docker-compose and .env.production here
|
||||
|
@ -317,4 +317,4 @@ The work of [Gervasio Marchand](https://mastodonte.tech/@g3rv4) is fantastic but
|
|||
I wanted the simplest setup and config I could create, without setting up an entirely new web UI.
|
||||
|
||||
There are a lot of things to do better, I'll work on the ones I have time and capability for. Otherwise, this project
|
||||
is practically begging to be re-written in python or something else.
|
||||
is practically begging to be re-written in python or something else.
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
############################################################################
|
||||
##
|
||||
## This script exports URIs from a saved JSON stream. It uses the same logic
|
||||
## as stream-url.sh, except that it reads the JSON from a file.
|
||||
## It takes one argument, the input file name.
|
||||
##
|
||||
############################################################################
|
||||
|
||||
source=$1
|
||||
|
||||
cat "$source"|grep -A 1 "event: update"|grep "data:" | \
|
||||
while read -r line
|
||||
do
|
||||
if [[ $line == *"uri"* ]]
|
||||
then
|
||||
uri=`echo $line | sed 's/data: //g' | jq .uri| sed 's/\"//g'`
|
||||
echo "$uri"
|
||||
fi
|
||||
done
|
|
@ -0,0 +1,83 @@
|
|||
############################################################################
|
||||
##
|
||||
## This script sends URIs to fakerelay based on a saved JSON stream.
|
||||
## It takes one argument, the input file name.
|
||||
##
|
||||
############################################################################
|
||||
|
||||
## Look for environmental variables. Because this script may be run outside of docker
|
||||
## there is a good change that they are not set, if they are not, attempt to set them
|
||||
## via the .env.production file. If that fails warn and keep going
|
||||
if [[ ! $loadEnv && -f ../../.env.production ]]
|
||||
then
|
||||
echo "[INFO] Did not detect that environmental variables are set, attempting to set via ../../.env.production"
|
||||
source ../../.env.production
|
||||
fi
|
||||
|
||||
if [[ ! $loadEnv ]]
|
||||
then
|
||||
echo "[WARN] Cannot find environemtnal variables, expect things to break ahead"
|
||||
sleep 5s
|
||||
fi
|
||||
|
||||
today=`date +"%Y%m%d"`
|
||||
|
||||
## The source file we are reading from
|
||||
source=$1
|
||||
|
||||
while true
|
||||
do
|
||||
|
||||
if [[ -f "./maxcurls" ]]
|
||||
then
|
||||
maxCurls=`cat ./maxcurls`
|
||||
fi
|
||||
|
||||
|
||||
## Here we take the top 500 lines of the file -- so we are in FIFO
|
||||
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
|
||||
## This step easily cuts the total number of URIs in half and is the only way we can keep up
|
||||
|
||||
## Make sure that you have the name number in the following two lines. In this repo, it is currently at 500
|
||||
seed=`date +%Y%M%d%H%M%S%N`
|
||||
backfillFile="backfilluris.$seed.txt"
|
||||
sedExpression="1,${minURIs}d"
|
||||
sed -i $sedExpression "$source"
|
||||
head "$source" -n $minURIs | sort | uniq -u > "$backfillFile"
|
||||
|
||||
## Start looping through the unique URIs
|
||||
cat "$backfillFile" | \
|
||||
while read -r line
|
||||
do
|
||||
if [[ "$line" != "" ]]
|
||||
then
|
||||
|
||||
uri=`echo $line | sed 's/data: //g' | jq .uri| sed 's/\"//g'`
|
||||
echo "[INFO] RUN-FIREHOSE: Posting $uri"
|
||||
|
||||
## Send it to the fake relay as a background job
|
||||
curl -X "POST" "$fakeRelayHost" \
|
||||
-H "Authorization: Bearer $fakeRelayKey" \
|
||||
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
|
||||
--data-urlencode "statusUrl=$uri" \
|
||||
--no-progress-meter &
|
||||
|
||||
## Don't overload the system on open curls. Wait until they are below a certain amount to move on
|
||||
## Or have some fun, set this as high as you like and turn your computer into a space heater!
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
until [ $curls -lt $maxCurls ]
|
||||
do
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE: Waiting for existing curls to finish, at $curls"
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE:$linesLeft Total URIs left"
|
||||
sleep 5s
|
||||
done
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
echo "\n \n LINES LEFT: $linesLeft \n\n"
|
||||
rm "$backfillFile"
|
||||
done
|
|
@ -0,0 +1,73 @@
|
|||
############################################################################
|
||||
##
|
||||
## This script sends URIs to fakerelay based on a saved file of URIS, one
|
||||
## URI per line. It takes on argument, the filename with the URIs
|
||||
##
|
||||
## The significant difference is that the JSON stream has already been processed
|
||||
## so this script can post the URIs much faster, as it doesn't have to run the
|
||||
## JSON stream through jq
|
||||
##
|
||||
############################################################################
|
||||
|
||||
## Look for environmental variables. Because this script may be run outside of docker
|
||||
## there is a good change that they are not set, if they are not, attempt to set them
|
||||
## via the .env.production file. If that fails warn and keep going
|
||||
if [[ ! $loadEnv && -f ../../.env.production ]]
|
||||
then
|
||||
echo "[INFO] Did not detect that environmental variables are set, attempting to set via ../../.env.production"
|
||||
source ../../.env.production
|
||||
fi
|
||||
|
||||
if [[ ! $loadEnv ]]
|
||||
then
|
||||
echo "[WARN] Cannot find environemtnal variables, expect things to break ahead"
|
||||
sleep 5s
|
||||
fi
|
||||
|
||||
today=`date +"%Y%m%d"`
|
||||
|
||||
## The source file we are reading from
|
||||
source=$1
|
||||
|
||||
## Here we take the top $minURIs lines of the file -- so we are in FIFO
|
||||
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
|
||||
## This step easily cuts the total number of URIs in half and is the only way we can keep up
|
||||
|
||||
seed=`date +%Y%M%d%H%M%S%N`
|
||||
backfillFile="backfilluris.$seed.txt"
|
||||
cat "$source" | sort | uniq -u > "$backfillFile"
|
||||
|
||||
## Start looping through the unique URIs
|
||||
cat "$backfillFile" | \
|
||||
while read -r line
|
||||
do
|
||||
if [[ "$line" != "" ]]
|
||||
then
|
||||
|
||||
uri=$line
|
||||
echo "[INFO] RUN-FIREHOSE: Posting $uri"
|
||||
sleep 1s
|
||||
|
||||
## Send it to the fake relay as a background job
|
||||
curl -X "POST" "$fakeRelayHost" \
|
||||
-H "Authorization: Bearer $fakeRelayKey" \
|
||||
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
|
||||
--data-urlencode "statusUrl=$uri" \
|
||||
--no-progress-meter &
|
||||
|
||||
## Don't overload the system on open curls. Wait until they are below a certain amount to move on
|
||||
## Or have some fun, set this as high as you like and turn your computer into a space heater!
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
until [ $curls -lt $maxCurls ]
|
||||
do
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE: Waiting for existing curls to finish, at $curls"
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE:$linesLeft Total URIs left"
|
||||
sleep 5s
|
||||
done
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
rm "$backfillFile"
|
|
@ -4,9 +4,9 @@ while true
|
|||
## This assumes that we have other scripts that are writing to the file called
|
||||
## $source, which here is today's date appended with .uris.txt
|
||||
today=`date +"%Y%m%d"`
|
||||
source="/data/$today.uris.txt"
|
||||
source="/data/$today.uris.txt"
|
||||
|
||||
## Here we take the top 500 lines of the file -- so we are in FIFO
|
||||
## Here we take the top 500 lines of the file -- so we are in FIFO
|
||||
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
|
||||
## This step easily cuts the total number of URIs in half and is the only way we can keep up
|
||||
|
||||
|
@ -18,11 +18,11 @@ while true
|
|||
## Start looping through the unique URIs
|
||||
cat backfilluris.txt| \
|
||||
while read -r uri
|
||||
do
|
||||
do
|
||||
echo "[INFO] RUN-FIREHOSE: Posting $uri"
|
||||
|
||||
## Send it to the fake relay as a background job
|
||||
curl -X "POST" "$fakeRelayHost" \
|
||||
curl -L -X "POST" "$fakeRelayHost" \
|
||||
-H "Authorization: Bearer $fakeRelayKey" \
|
||||
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
|
||||
--data-urlencode "statusUrl=$uri" \
|
||||
|
@ -35,20 +35,20 @@ while true
|
|||
do
|
||||
curls=`ps -ef|grep curl|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE: Waiting for existing curls to finish, at $curls"
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE:$linesLeft Total URIs left"
|
||||
sleep 5s
|
||||
done
|
||||
|
||||
done
|
||||
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
|
||||
## Wait until the queue is at least 500 lines long, less than that
|
||||
## and there are not enough lines to see if there are duplicates.
|
||||
until [ $linesLeft -gt $minURIs ]
|
||||
do
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
linesLeft=`cat "$source"|wc -l`
|
||||
echo "[INFO] RUN-FIREHOSE: Waiting for more URIs to batch, currently at $linesLeft"
|
||||
sleep 5s
|
||||
done
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
echo > /config/urls.txt
|
||||
echo > /config/hosts
|
||||
echo > /data/urls.txt
|
||||
echo > /data/hosts
|
||||
|
||||
# Get federated hosts and begin to stream them
|
||||
cat /config/domains-federated | grep -v "##" | while read -r line
|
||||
|
@ -23,15 +23,15 @@ do
|
|||
if [[ $tag != "" ]]; then
|
||||
echo "[INFO] Found tag $tag"
|
||||
# Create a url to fetch for each tag
|
||||
echo "https://$host/api/v1/streaming/hashtag?tag=$tag $host" >> /config/urls.txt
|
||||
echo "https://$host/api/v1/streaming/hashtag?tag=$tag $host" >> /data/urls.txt
|
||||
fi
|
||||
done
|
||||
elif [[ "$line" != *" #"* ]]; then
|
||||
echo "[INFO] $line didn't have hashtags"
|
||||
host=$line
|
||||
echo "https://$line/api/v1/streaming/public $line" >> /config/urls.txt
|
||||
fi
|
||||
echo $host >> /config/hosts
|
||||
echo "https://$line/api/v1/streaming/public $line" >> /data/urls.txt
|
||||
fi
|
||||
echo $host >> /data/hosts
|
||||
fi
|
||||
done
|
||||
|
||||
|
@ -56,30 +56,30 @@ do
|
|||
if [[ $tag != "" ]]; then
|
||||
echo "[INFO] Found tag $tag"
|
||||
# Create a url to fetch for each tag
|
||||
echo "https://$host/api/v1/streaming/hashtag/local?tag=$tag $host" >> /config/urls.txt
|
||||
echo "https://$host/api/v1/streaming/hashtag/local?tag=$tag $host" >> /data/urls.txt
|
||||
fi
|
||||
done
|
||||
elif [[ "$line" != *" #"* ]]; then
|
||||
echo "[INFO] $line didn't have hashtags"
|
||||
host=$line
|
||||
echo "https://$line/api/v1/streaming/public/local $line" >> /config/urls.txt
|
||||
echo "https://$line/api/v1/streaming/public/local $line" >> /data/urls.txt
|
||||
fi
|
||||
echo $host >> /config/hosts
|
||||
echo $host >> /data/hosts
|
||||
fi
|
||||
done
|
||||
|
||||
cat /config/hashtags | grep -v "##" | while read -r hashtag; do
|
||||
hashtag=`echo $hashtag | cut -d "#" -f 2`
|
||||
sort /config/hosts | uniq -u |while read -r host; do
|
||||
sort /data/hosts | uniq -u |while read -r host; do
|
||||
if [[ $hashtag != "" && "$host" != "" ]]; then
|
||||
echo "https://$host/api/v1/streaming/hashtag?tag=$hashtag $host" >> /config/hashtag-urls.txt
|
||||
echo "https://$host/api/v1/streaming/hashtag?tag=$hashtag $host" >> /data/hashtag-urls.txt
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
cat /config/hashtag-urls.txt >> /config/urls.txt
|
||||
cat /data/hashtag-urls.txt >> /data/urls.txt
|
||||
|
||||
cat /config/urls.txt | while read -r url
|
||||
cat /data/urls.txt | while read -r url
|
||||
do
|
||||
echo "[INFO] Opening $url to stream"
|
||||
sleep $streamDelay
|
||||
|
@ -96,4 +96,4 @@ fi
|
|||
# then restart
|
||||
echo "[INFO] Container restart timoe is $restartTimeout"
|
||||
sleep $restartTimeout
|
||||
exit 0
|
||||
exit 0
|
||||
|
|
|
@ -31,7 +31,7 @@ do
|
|||
then
|
||||
#Not in archive mode
|
||||
|
||||
curl -X "GET" "$url" \
|
||||
curl -L -X "GET" "$url" \
|
||||
--no-progress-meter | \
|
||||
tee -a "/data/$today.json" | \
|
||||
grep url | \
|
||||
|
@ -56,7 +56,7 @@ do
|
|||
mkdir -p "/data/$today/"
|
||||
fi
|
||||
|
||||
curl -X "GET" "$url" --no-progress-meter >> "/data/$today/$today.$host.json"
|
||||
curl -L -X "GET" "$url" --no-progress-meter >> "/data/$today/$today.$host.json"
|
||||
fi
|
||||
|
||||
# Basic exponential backoff
|
||||
|
@ -79,4 +79,4 @@ do
|
|||
done
|
||||
|
||||
## Exit 0 by default
|
||||
exit 0
|
||||
exit 0
|
||||
|
|
Loading…
Reference in New Issue