2022-12-19 00:30:19 +00:00
|
|
|
url=$1 #A proper URL is all that should be sent to this script
|
|
|
|
host=$2
|
2022-12-22 14:06:43 +00:00
|
|
|
errors=0
|
2022-12-19 00:30:19 +00:00
|
|
|
|
|
|
|
if [[ "$url" == "" ]]
|
|
|
|
then
|
2022-12-22 14:06:43 +00:00
|
|
|
echo "[WARN] Empty url, skipping" # Exit if an empty URL was sent
|
2022-12-19 00:30:19 +00:00
|
|
|
exit 2
|
|
|
|
fi
|
|
|
|
|
2022-12-22 14:06:43 +00:00
|
|
|
# if [[ "$checkUrl" != *"200"* ]]
|
|
|
|
# then
|
|
|
|
# echo "[WARN] Server threw an error, skipping"
|
|
|
|
# fi
|
|
|
|
|
|
|
|
# Check to see if domain name resolves. If not, exist
|
|
|
|
if [[ ! `dig $host +short` ]]
|
|
|
|
then
|
|
|
|
echo "[WARN] DNS Lookup failed for $host, skipping"
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "[INFO] Archive is $archive"
|
|
|
|
|
2022-12-19 00:30:19 +00:00
|
|
|
while true # Loop endlessly
|
|
|
|
do
|
2022-12-22 14:06:43 +00:00
|
|
|
|
2022-12-19 00:30:19 +00:00
|
|
|
today=`date +"%Y%m%d"`
|
|
|
|
|
2022-12-22 14:06:43 +00:00
|
|
|
echo "[INFO] Starting to stream $url in 5 seconds"
|
|
|
|
echo "[INFO] Archive status is $archive"
|
2022-12-19 00:30:19 +00:00
|
|
|
|
|
|
|
sleep 5s;
|
|
|
|
|
2022-12-22 14:06:43 +00:00
|
|
|
# Im archive mode we'll only fetch the json stream to save resources from jq and sed
|
2022-12-21 03:21:55 +00:00
|
|
|
if [[ $archive != "true" ]]
|
|
|
|
then
|
2022-12-22 14:06:43 +00:00
|
|
|
#Not in archive mode
|
|
|
|
|
2022-12-21 03:21:55 +00:00
|
|
|
curl -X "GET" "$url" \
|
|
|
|
--no-progress-meter | \
|
|
|
|
tee -a "/data/$today.json" | \
|
|
|
|
grep url | \
|
|
|
|
sed 's/data://g' | \
|
|
|
|
|
|
|
|
while read -r line
|
|
|
|
do
|
|
|
|
if [[ $line == *"uri"* ]]
|
|
|
|
then
|
|
|
|
url=`echo $line | jq .url| sed 's/\"//g'`
|
|
|
|
uri=`echo $line | jq .uri| sed 's/\"//g'`
|
|
|
|
|
2022-12-26 17:42:01 +00:00
|
|
|
echo "[INFO] Posting $url from $host"
|
2022-12-21 03:21:55 +00:00
|
|
|
echo $uri >> "/data/$today.uris.txt"
|
|
|
|
fi
|
|
|
|
done
|
2022-12-22 14:06:43 +00:00
|
|
|
# In archive mode
|
2022-12-21 03:21:55 +00:00
|
|
|
else
|
2022-12-21 03:36:31 +00:00
|
|
|
|
|
|
|
if [[ ! -d "/data/$today/" ]]
|
|
|
|
then
|
|
|
|
mkdir -p "/data/$today/"
|
|
|
|
fi
|
|
|
|
|
2022-12-21 03:29:28 +00:00
|
|
|
curl -X "GET" "$url" --no-progress-meter >> "/data/$today/$today.$host.json"
|
2022-12-21 03:21:55 +00:00
|
|
|
fi
|
2022-12-21 03:29:28 +00:00
|
|
|
|
2022-12-22 14:06:43 +00:00
|
|
|
# Basic exponential backoff
|
|
|
|
((++errors))
|
|
|
|
sleepseconds=$((errors*errors))
|
|
|
|
|
|
|
|
# Don't allow a back off for more than 5 minutes.
|
|
|
|
# Because we expect this container to reset occasionally to kill hanging curl processes
|
|
|
|
# a graceful exit will wait for all scripts to stop. So, it will take at least as long as $sleepseconds
|
|
|
|
# to stop.
|
|
|
|
if [[ $sleepseconds -gt 299 ]]
|
2022-12-21 03:29:28 +00:00
|
|
|
then
|
2022-12-22 14:06:43 +00:00
|
|
|
sleepseconds=300
|
2022-12-21 03:29:28 +00:00
|
|
|
fi
|
|
|
|
|
2022-12-22 14:06:43 +00:00
|
|
|
sleep $sleepseconds;
|
|
|
|
|
|
|
|
echo "[WARN] Streaming abrubtly stopped for $host, streaming will pause for $sleepseconds seconds before retrying."
|
|
|
|
|
2022-12-21 03:36:31 +00:00
|
|
|
done
|
|
|
|
|
|
|
|
## Exit 0 by default
|
|
|
|
exit 0
|