From 9d3f7f48732e926bba49c056d9f3a5c6134842e1 Mon Sep 17 00:00:00 2001 From: Raynor Date: Thu, 22 Dec 2022 14:06:43 +0000 Subject: [PATCH] Added archive mode and some basic health checks --- scripts/stream-url.sh | 44 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/scripts/stream-url.sh b/scripts/stream-url.sh index 9a72801..d1968a8 100644 --- a/scripts/stream-url.sh +++ b/scripts/stream-url.sh @@ -1,23 +1,41 @@ url=$1 #A proper URL is all that should be sent to this script host=$2 +errors=0 if [[ "$url" == "" ]] then - echo "Empty url, skipping" # Exit if an empty URL was sent + echo "[WARN] Empty url, skipping" # Exit if an empty URL was sent exit 2 fi +# if [[ "$checkUrl" != *"200"* ]] +# then +# echo "[WARN] Server threw an error, skipping" +# fi + +# Check to see if domain name resolves. If not, exist +if [[ ! `dig $host +short` ]] +then + echo "[WARN] DNS Lookup failed for $host, skipping" +fi + +echo "[INFO] Archive is $archive" + while true # Loop endlessly do + today=`date +"%Y%m%d"` - echo "Starting to stream $url in 5 seconds" - echo "Archive status is $archive" + echo "[INFO] Starting to stream $url in 5 seconds" + echo "[INFO] Archive status is $archive" sleep 5s; + # Im archive mode we'll only fetch the json stream to save resources from jq and sed if [[ $archive != "true" ]] then + #Not in archive mode + curl -X "GET" "$url" \ --no-progress-meter | \ tee -a "/data/$today.json" | \ @@ -31,10 +49,11 @@ do url=`echo $line | jq .url| sed 's/\"//g'` uri=`echo $line | jq .uri| sed 's/\"//g'` - echo "STREAMING from $host $url" + echo "[INFO] STREAMING from $host $url" echo $uri >> "/data/$today.uris.txt" fi done + # In archive mode else if [[ ! -d "/data/$today/" ]] @@ -45,12 +64,23 @@ do curl -X "GET" "$url" --no-progress-meter >> "/data/$today/$today.$host.json" fi - # If we are in archive mode only, then back off if there is a curl error. - if [[ $archive == "true" ]] + # Basic exponential backoff + ((++errors)) + sleepseconds=$((errors*errors)) + + # Don't allow a back off for more than 5 minutes. + # Because we expect this container to reset occasionally to kill hanging curl processes + # a graceful exit will wait for all scripts to stop. So, it will take at least as long as $sleepseconds + # to stop. + if [[ $sleepseconds -gt 299 ]] then - sleep 5m; + sleepseconds=300 fi + sleep $sleepseconds; + + echo "[WARN] Streaming abrubtly stopped for $host, streaming will pause for $sleepseconds seconds before retrying." + done ## Exit 0 by default