fake-firehose/scripts/run-firehose.sh

56 lines
2.2 KiB
Bash
Raw Normal View History

2022-12-17 05:20:35 +00:00
while true
do
## This assumes that we have other scripts that are writing to the file called
## $source, which here is today's date appended with .uris.txt
today=`date +"%Y%m%d"`
2022-12-18 01:45:49 +00:00
source="/data/$today.uris.txt"
2022-12-17 05:20:35 +00:00
## Here we take the top 500 lines of the file -- so we are in FIFO
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
## This step easily cuts the total number of URIs in half and is the only way we can keep up
## Make sure that you have the name number in the following two lines. In this repo, it is currently at 500
2022-12-18 01:45:49 +00:00
head "$source" -n $minURIs | sed 's/\"//g' | sort | uniq -u > backfilluris.txt
sedExpression="1,${minURIs}d"
sed -i $sedExpression "$source"
2022-12-17 05:20:35 +00:00
## Start looping through the unique URIs
cat backfilluris.txt| \
while read -r uri
do
2022-12-18 01:45:49 +00:00
echo "FIREHOSE: Posting $uri"
2022-12-17 05:20:35 +00:00
## Send it to the fake relay as a background job
curl -X "POST" "$fakeRelayHost" \
-H "Authorization: Bearer $fakeRelayKey" \
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
2022-12-18 01:45:49 +00:00
--data-urlencode "statusUrl=$uri" \
--no-progress-meter &
2022-12-17 05:20:35 +00:00
## Don't overload the system on open curls. Wait until they are below a certain amount to move on
## Or have some fun, set this as high as you like and turn your computer into a space heater!
curls=`ps -ef|grep curl|wc -l`
2022-12-18 01:45:49 +00:00
until [ $curls -lt $maxCurls ]
2022-12-17 05:20:35 +00:00
do
curls=`ps -ef|grep curl|wc -l`
2022-12-18 01:45:49 +00:00
echo "FIREHOSE: Waiting for existing curls to finish, at $curls"
2022-12-17 05:20:35 +00:00
linesLeft=`cat "$source"|wc -l`
2022-12-18 01:45:49 +00:00
echo "FIREHOSE: $linesLeft Total URIs left"
sleep 5s
2022-12-17 05:20:35 +00:00
done
done
linesLeft=`cat "$source"|wc -l`
## Wait until the queue is at least 500 lines long, less than that
## and there are not enough lines to see if there are duplicates.
2022-12-18 01:45:49 +00:00
until [ $linesLeft -gt $minURIs ]
2022-12-17 05:20:35 +00:00
do
linesLeft=`cat "$source"|wc -l`
2022-12-18 01:45:49 +00:00
echo "FIREHOSE: Waiting for more URIs to batch, currently at $linesLeft"
sleep 5s
2022-12-17 05:20:35 +00:00
done
done