From c2d101b946c753623c58495aa98c4049c754277b Mon Sep 17 00:00:00 2001 From: raynor Date: Sat, 17 Dec 2022 21:22:26 -0500 Subject: [PATCH 1/9] Auto restart in lieu of real health checks --- scripts/start-firehose.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/start-firehose.sh b/scripts/start-firehose.sh index ebc21cb..9a73ce6 100644 --- a/scripts/start-firehose.sh +++ b/scripts/start-firehose.sh @@ -21,5 +21,8 @@ then /scripts/run-firehose.sh & fi -## Don't let the container exit -while true; do sleep 1; done \ No newline at end of file +## We don't have a health check, so just exit after an hour +# If your docker file has restart: always on this should gracefully exit, and +# then restart +sleep 1h +exit 0 \ No newline at end of file From 3ecd85b5b3be8de860974dc6215789d46e0d4182 Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 14:15:59 -0500 Subject: [PATCH 2/9] Added script to make rebuilding docker faster --- rebuild.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 rebuild.sh diff --git a/rebuild.sh b/rebuild.sh new file mode 100755 index 0000000..1496773 --- /dev/null +++ b/rebuild.sh @@ -0,0 +1,3 @@ +docker compose -f docker-compose.yml down +docker build -t fakefirehose . +docker compose -f docker-compose.yml up -d \ No newline at end of file From 4d72a59e9f64a2b078ab94e1b2d7ca372e98dfeb Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:28:17 -0500 Subject: [PATCH 3/9] Changed rebuild to stay attached --- rebuild.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebuild.sh b/rebuild.sh index 1496773..d4149d4 100755 --- a/rebuild.sh +++ b/rebuild.sh @@ -1,3 +1,3 @@ docker compose -f docker-compose.yml down docker build -t fakefirehose . -docker compose -f docker-compose.yml up -d \ No newline at end of file +docker compose -f docker-compose.yml up \ No newline at end of file From 54a544703b5f3b6372fae6a2ba11b16d32d6c8a0 Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:29:51 -0500 Subject: [PATCH 4/9] Updated sample files --- config/domains-federated.sample | 7 ++++++- config/domains-local.sample | 5 ++++- config/hashtags.sample | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/config/domains-federated.sample b/config/domains-federated.sample index ad03e04..57337bb 100644 --- a/config/domains-federated.sample +++ b/config/domains-federated.sample @@ -1,7 +1,12 @@ +## Comments require two #'s,, like ## + ## Fake Firehose will only take all public posts from these domains ## This is the true firehose, use it carefully or blow up your server -### International English (if you aren't from the US) ### +## Hashtags to follow by instance +mastodon.social #JohnMastodon #silentsunday #mastoadmin + +## International English (if you aren't from the US) ### mastodon.scot aus.social mastodon.nz diff --git a/config/domains-local.sample b/config/domains-local.sample index f71b5b8..f95e269 100644 --- a/config/domains-local.sample +++ b/config/domains-local.sample @@ -8,4 +8,7 @@ techhub.social fosstodon.org ### News & Politics ### -journa.host \ No newline at end of file +journa.host + +### Cool People ### +raynor.haus \ No newline at end of file diff --git a/config/hashtags.sample b/config/hashtags.sample index e69de29..12dfadc 100644 --- a/config/hashtags.sample +++ b/config/hashtags.sample @@ -0,0 +1,11 @@ +## If this file is not empty then EVERY host from an earlier file +## including duplicates, will stream to look for these hashtags + +## You can list hashtags with a leading "#" ... + +cybersecurity +mastoadmin + +## Or you can list hashtags with the "#" + +#mastodon \ No newline at end of file From 8339a881d004f32f1749db6aafa2b9ec0a942b8e Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:30:19 -0500 Subject: [PATCH 5/9] Updated to work with tags --- scripts/start-firehose.sh | 86 +++++++++++++++++++++++++++++++++++---- scripts/stream-url.sh | 37 +++++++++++++++++ 2 files changed, 115 insertions(+), 8 deletions(-) create mode 100644 scripts/stream-url.sh diff --git a/scripts/start-firehose.sh b/scripts/start-firehose.sh index 9a73ce6..16009f4 100644 --- a/scripts/start-firehose.sh +++ b/scripts/start-firehose.sh @@ -1,21 +1,91 @@ #!/bin/bash -cat /config/domains-federated|grep -v "#"|while read -r host +echo > /config/urls.txt +echo > /config/hosts + +# Get federated hosts and begin to stream them +cat /config/domains-federated | grep -v "##" | while read -r line do - if [[ "$host" != "" ]] - then - /scripts/get-stream.sh $host "federated" & + #filter out empty lines + if [[ "$line" != "" ]]; then + echo "Opening federated line $line" + + #Check for hashtags + if [[ "$line" == *" #"* ]]; then + + echo "$line has hashtags!" + + # Get just the first field of the line, which is the host + host=`echo $line | cut -d " " -f 1` + tags=`echo $line | cut -d " " -f 1 --complement|tr "#" "\n "` + for tag in $tags + do + if [[ $tag != "" ]]; then + echo "Found tag $tag" + # Create a url to fetch for each tag + echo "https://$host/api/v1/streaming/hashtag?tag=$tag $host" >> /config/urls.txt + fi + done + elif [[ "$line" != *" #"* ]]; then + echo "$line didn't have hashtags" + host=$line + echo "https://$line/api/v1/streaming/public $line" >> /config/urls.txt + fi + echo $host >> /config/hosts fi done -cat /config/domains-local|grep -v "#"|while read -r host + +# Get local hosts and begin to stream them +cat /config/domains-local | grep -v "##" | while read -r line do - if [[ "$host" != "" ]] - then - /scripts/get-stream.sh $host "local" & + #filter out empty lines + if [[ "$line" != "" ]]; then + echo "Opening federated line $line" + + #Check for hashtags + if [[ "$line" == *" #"* ]]; then + + echo "$line has hashtags!" + + # Get just the first field of the line, which is the host + host=`echo $line | cut -d " " -f 1` + tags=`echo $line | cut -d " " -f 1 --complement|tr "#" "\n "` + for tag in $tags + do + if [[ $tag != "" ]]; then + echo "Found tag $tag" + # Create a url to fetch for each tag + echo "https://$host/api/v1/streaming/hashtag/local?tag=$tag $host" >> /config/urls.txt + fi + done + elif [[ "$line" != *" #"* ]]; then + echo "$line didn't have hashtags" + host=$line + echo "https://$line/api/v1/streaming/local $line" >> /config/urls.txt + fi + echo $host >> /config/hosts fi done +cat /config/hashtags | grep -v "##" | while read -r hashtag; do + hashtag=`echo $hashtag | cut -d "#" -f 2` + sort /config/hosts | uniq -u |while read -r host; do + if [[ $hashtag != "" && "$host" != "" ]]; then + echo "https://$host/api/v1/streaming/hashtag?tag=$hashtag $host" >> /config/hashtag-urls.txt + fi + done +done + +cat /config/hashtag-urls.txt >> /config/urls.txt + +cat /config/urls.txt | while read -r url +do + echo "Opening $url to stream" + sleep 1s + ./stream-url.sh $url & +done + if [[ $runFirehose == true ]] then /scripts/run-firehose.sh & diff --git a/scripts/stream-url.sh b/scripts/stream-url.sh new file mode 100644 index 0000000..ba34c5a --- /dev/null +++ b/scripts/stream-url.sh @@ -0,0 +1,37 @@ +url=$1 #A proper URL is all that should be sent to this script +host=$2 + +if [[ "$url" == "" ]] +then + echo "Empty url, skipping" # Exit if an empty URL was sent + exit 2 +fi + +while true # Loop endlessly +do + today=`date +"%Y%m%d"` + + echo "Starting to stream $url in 5 seconds" + + sleep 5s; + + curl -X "GET" "$url" \ + --no-progress-meter | \ + tee -a "/data/$today.json" | \ + grep url | \ + sed 's/data://g' | \ + + while read -r line + do + + if [[ $line == *"uri"* ]] + then + url=`echo $line | jq .url| sed 's/\"//g'` + uri=`echo $line | jq .uri| sed 's/\"//g'` + + echo "STREAMING from $host $url" + echo $uri >> "/data/$today.uris.txt" + + fi + done +done \ No newline at end of file From 079dc3b7f32405949adb8a7a0cd505232c98cbd2 Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:30:30 -0500 Subject: [PATCH 6/9] Started a todo list I may or may not get to --- TODO.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..2cf1f39 --- /dev/null +++ b/TODO.md @@ -0,0 +1,3 @@ +1. Create language filter +2. Create bot filter +3. \ No newline at end of file From 316e5e6efaffc5f7cae700a3935132cf0cd9fa71 Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:31:03 -0500 Subject: [PATCH 7/9] Updated to not use hashtags at all, can delete in future --- scripts/get-stream.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/get-stream.sh b/scripts/get-stream.sh index 7cc2aaf..5415c35 100644 --- a/scripts/get-stream.sh +++ b/scripts/get-stream.sh @@ -1,5 +1,6 @@ host=$1 type=$2 +hashtag=$1 if [[ "$host" == "" ]] then @@ -16,11 +17,7 @@ do fetch="https://$host/api/v1/streaming/public";; "local") fetch="https://$host/api/v1/streaming/public?local=true";; - "hashtags") - fetch="https://$host/api/v1/streaming/hashtag?tag=linux" - echo "Sorry, hash tags aren't implemented yet :(" - exit 1 - ;; + esac echo "Starting to stream $fetch in 5 seconds" From 243c64e2b28eacbffc00c665487e84850f08410a Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:31:07 -0500 Subject: [PATCH 8/9] Added some files --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index 2b9454b..61a99c3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,10 @@ data/20221217.json config/domains-federated config/domains-local config/hashtags +config/hosts +.gitignore +data/20221219.json +.gitignore +config/hashtag-urls.txt +config/urls.txt +data/20221219.uris.txt From 8d17d14736855699c4f9ba5d5bcb4436e45da481 Mon Sep 17 00:00:00 2001 From: raynor Date: Sun, 18 Dec 2022 19:41:52 -0500 Subject: [PATCH 9/9] Updated readme. Didn't proofread. Might meed more. --- readme.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 1b581a8..87faf08 100644 --- a/readme.md +++ b/readme.md @@ -18,8 +18,55 @@ If you want the full on public feed from an instance, put it in the domains-fede If you only want the local feed from an instance, put it on the domains-local file, one domain per line -If you want to follow a hashtag you're out of luck because I didn't get that far. But it will go into the hashtags file. +If you want to follow a hash tag you either either add a hashtag after an instance in `domains-federated` or `domains-local` +For example: if in `domains-fedarated` you put `mastodon.social #JohnMastodon` a stream will open to watch for the hashtag #JohnMastodon on the public +stream from mastodon.social + +Another example: if in `domains-local` you put `infosec.exchange #hacker` a stream will open to watch for the hashtag #hacker on the _local_ stream from infosec.exchange + +## Docker Build docker -Run docker \ No newline at end of file +Run docker + +### The hashtags file +If you put ANY hashtags in here a stream will be opened for _every_ host in the `domains-federated` and `domains-local` file. + +Example: +`domains-federated` content: + +``` +mastodon.social +mas.to +``` + +`domains-local` content: + +``` +aus.social +mastodon.nz +``` + +`hashtags` content: +``` +JohnMastodon +Mastodon +``` + +will result in the following streams all opening: +```shell +https://mastodon.social/api/v1/streaming/hashtag?tag=JohnMastodon +https://mas.to/api/v1/streaming/hashtag?tag=JohnMastodon +https://aus.social/api/v1/streaming/hashtag?tag=JohnMastodon +https://mastodon.nz/api/v1/streaming/hashtag?tag=JohnMastodon +https://mastodon.social/api/v1/streaming/hashtag?tag=Mastodon +https://mas.to/api/v1/streaming/hashtag?tag=Mastodon +https://aus.social/api/v1/streaming/hashtag?tag=Mastodon +https://mastodon.nz/api/v1/streaming/hashtag?tag=Mastodon +``` + +If you had a total of 5 lines in `domains-federated` and `domains-local` plus 3 entries in `hashtags` +there would 5x5x3 = 75 new streams. + +I mean, you can do it, but you won't need your central heating system any more. \ No newline at end of file