2022-12-17 05:20:35 +00:00
|
|
|
while true
|
|
|
|
do
|
|
|
|
|
|
|
|
## This assumes that we have other scripts that are writing to the file called
|
|
|
|
## $source, which here is today's date appended with .uris.txt
|
|
|
|
today=`date +"%Y%m%d"`
|
2022-12-18 01:45:49 +00:00
|
|
|
source="/data/$today.uris.txt"
|
2022-12-17 05:20:35 +00:00
|
|
|
|
|
|
|
## Here we take the top 500 lines of the file -- so we are in FIFO
|
|
|
|
## and pipe them thru uniq so we only pass unique URIs through to the fake relay
|
|
|
|
## This step easily cuts the total number of URIs in half and is the only way we can keep up
|
|
|
|
|
|
|
|
## Make sure that you have the name number in the following two lines. In this repo, it is currently at 500
|
2022-12-18 01:45:49 +00:00
|
|
|
head "$source" -n $minURIs | sed 's/\"//g' | sort | uniq -u > backfilluris.txt
|
|
|
|
sedExpression="1,${minURIs}d"
|
|
|
|
sed -i $sedExpression "$source"
|
2022-12-17 05:20:35 +00:00
|
|
|
|
|
|
|
## Start looping through the unique URIs
|
|
|
|
cat backfilluris.txt| \
|
|
|
|
while read -r uri
|
|
|
|
do
|
2022-12-18 01:45:49 +00:00
|
|
|
echo "FIREHOSE: Posting $uri"
|
2022-12-17 05:20:35 +00:00
|
|
|
|
|
|
|
## Send it to the fake relay as a background job
|
|
|
|
curl -X "POST" "$fakeRelayHost" \
|
|
|
|
-H "Authorization: Bearer $fakeRelayKey" \
|
|
|
|
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
|
2022-12-18 01:45:49 +00:00
|
|
|
--data-urlencode "statusUrl=$uri" \
|
|
|
|
--no-progress-meter &
|
2022-12-17 05:20:35 +00:00
|
|
|
|
|
|
|
## Don't overload the system on open curls. Wait until they are below a certain amount to move on
|
|
|
|
## Or have some fun, set this as high as you like and turn your computer into a space heater!
|
|
|
|
curls=`ps -ef|grep curl|wc -l`
|
2022-12-18 01:45:49 +00:00
|
|
|
until [ $curls -lt $maxCurls ]
|
2022-12-17 05:20:35 +00:00
|
|
|
do
|
|
|
|
curls=`ps -ef|grep curl|wc -l`
|
2022-12-18 01:45:49 +00:00
|
|
|
echo "FIREHOSE: Waiting for existing curls to finish, at $curls"
|
2022-12-17 05:20:35 +00:00
|
|
|
linesLeft=`cat "$source"|wc -l`
|
2022-12-18 01:45:49 +00:00
|
|
|
echo "FIREHOSE: $linesLeft Total URIs left"
|
|
|
|
sleep 5s
|
2022-12-17 05:20:35 +00:00
|
|
|
done
|
|
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
linesLeft=`cat "$source"|wc -l`
|
|
|
|
|
|
|
|
## Wait until the queue is at least 500 lines long, less than that
|
|
|
|
## and there are not enough lines to see if there are duplicates.
|
2022-12-18 01:45:49 +00:00
|
|
|
until [ $linesLeft -gt $minURIs ]
|
2022-12-17 05:20:35 +00:00
|
|
|
do
|
|
|
|
linesLeft=`cat "$source"|wc -l`
|
2022-12-18 01:45:49 +00:00
|
|
|
echo "FIREHOSE: Waiting for more URIs to batch, currently at $linesLeft"
|
|
|
|
sleep 5s
|
2022-12-17 05:20:35 +00:00
|
|
|
done
|
|
|
|
done
|