diff --git a/scripts/run-firehose.sh b/scripts/run-firehose.sh index bae6b18..7759ecf 100644 --- a/scripts/run-firehose.sh +++ b/scripts/run-firehose.sh @@ -4,9 +4,9 @@ while true ## This assumes that we have other scripts that are writing to the file called ## $source, which here is today's date appended with .uris.txt today=`date +"%Y%m%d"` - source="/data/$today.uris.txt" + source="/data/$today.uris.txt" - ## Here we take the top 500 lines of the file -- so we are in FIFO + ## Here we take the top 500 lines of the file -- so we are in FIFO ## and pipe them thru uniq so we only pass unique URIs through to the fake relay ## This step easily cuts the total number of URIs in half and is the only way we can keep up @@ -18,11 +18,11 @@ while true ## Start looping through the unique URIs cat backfilluris.txt| \ while read -r uri - do + do echo "[INFO] RUN-FIREHOSE: Posting $uri" ## Send it to the fake relay as a background job - curl -X "POST" "$fakeRelayHost" \ + curl -L -X "POST" "$fakeRelayHost" \ -H "Authorization: Bearer $fakeRelayKey" \ -H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \ --data-urlencode "statusUrl=$uri" \ @@ -35,20 +35,20 @@ while true do curls=`ps -ef|grep curl|wc -l` echo "[INFO] RUN-FIREHOSE: Waiting for existing curls to finish, at $curls" - linesLeft=`cat "$source"|wc -l` + linesLeft=`cat "$source"|wc -l` echo "[INFO] RUN-FIREHOSE:$linesLeft Total URIs left" sleep 5s done done - linesLeft=`cat "$source"|wc -l` + linesLeft=`cat "$source"|wc -l` ## Wait until the queue is at least 500 lines long, less than that ## and there are not enough lines to see if there are duplicates. until [ $linesLeft -gt $minURIs ] do - linesLeft=`cat "$source"|wc -l` + linesLeft=`cat "$source"|wc -l` echo "[INFO] RUN-FIREHOSE: Waiting for more URIs to batch, currently at $linesLeft" sleep 5s done diff --git a/scripts/stream-url.sh b/scripts/stream-url.sh index 12fe385..d6adee9 100644 --- a/scripts/stream-url.sh +++ b/scripts/stream-url.sh @@ -31,7 +31,7 @@ do then #Not in archive mode - curl -X "GET" "$url" \ + curl -L -X "GET" "$url" \ --no-progress-meter | \ tee -a "/data/$today.json" | \ grep url | \ @@ -56,7 +56,7 @@ do mkdir -p "/data/$today/" fi - curl -X "GET" "$url" --no-progress-meter >> "/data/$today/$today.$host.json" + curl -L -X "GET" "$url" --no-progress-meter >> "/data/$today/$today.$host.json" fi # Basic exponential backoff @@ -79,4 +79,4 @@ do done ## Exit 0 by default -exit 0 \ No newline at end of file +exit 0