Merge pull request #2 from raynormast/dev

Dev
This commit is contained in:
raynormast 2022-12-18 19:43:10 -05:00 committed by GitHub
commit 962cc9455c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 205 additions and 19 deletions

7
.gitignore vendored
View file

@ -5,3 +5,10 @@ data/20221217.json
config/domains-federated
config/domains-local
config/hashtags
config/hosts
.gitignore
data/20221219.json
.gitignore
config/hashtag-urls.txt
config/urls.txt
data/20221219.uris.txt

3
TODO.md Normal file
View file

@ -0,0 +1,3 @@
1. Create language filter
2. Create bot filter
3.

View file

@ -1,7 +1,12 @@
## Comments require two #'s,, like ##
## Fake Firehose will only take all public posts from these domains
## This is the true firehose, use it carefully or blow up your server
### International English (if you aren't from the US) ###
## Hashtags to follow by instance
mastodon.social #JohnMastodon #silentsunday #mastoadmin
## International English (if you aren't from the US) ###
mastodon.scot
aus.social
mastodon.nz

View file

@ -9,3 +9,6 @@ fosstodon.org
### News & Politics ###
journa.host
### Cool People ###
raynor.haus

View file

@ -0,0 +1,11 @@
## If this file is not empty then EVERY host from an earlier file
## including duplicates, will stream to look for these hashtags
## You can list hashtags with a leading "#" ...
cybersecurity
mastoadmin
## Or you can list hashtags with the "#"
#mastodon

View file

@ -18,8 +18,55 @@ If you want the full on public feed from an instance, put it in the domains-fede
If you only want the local feed from an instance, put it on the domains-local file, one domain per line
If you want to follow a hashtag you're out of luck because I didn't get that far. But it will go into the hashtags file.
If you want to follow a hash tag you either either add a hashtag after an instance in `domains-federated` or `domains-local`
For example: if in `domains-fedarated` you put `mastodon.social #JohnMastodon` a stream will open to watch for the hashtag #JohnMastodon on the public
stream from mastodon.social
Another example: if in `domains-local` you put `infosec.exchange #hacker` a stream will open to watch for the hashtag #hacker on the _local_ stream from infosec.exchange
## Docker
Build docker
Run docker
### The hashtags file
If you put ANY hashtags in here a stream will be opened for _every_ host in the `domains-federated` and `domains-local` file.
Example:
`domains-federated` content:
```
mastodon.social
mas.to
```
`domains-local` content:
```
aus.social
mastodon.nz
```
`hashtags` content:
```
JohnMastodon
Mastodon
```
will result in the following streams all opening:
```shell
https://mastodon.social/api/v1/streaming/hashtag?tag=JohnMastodon
https://mas.to/api/v1/streaming/hashtag?tag=JohnMastodon
https://aus.social/api/v1/streaming/hashtag?tag=JohnMastodon
https://mastodon.nz/api/v1/streaming/hashtag?tag=JohnMastodon
https://mastodon.social/api/v1/streaming/hashtag?tag=Mastodon
https://mas.to/api/v1/streaming/hashtag?tag=Mastodon
https://aus.social/api/v1/streaming/hashtag?tag=Mastodon
https://mastodon.nz/api/v1/streaming/hashtag?tag=Mastodon
```
If you had a total of 5 lines in `domains-federated` and `domains-local` plus 3 entries in `hashtags`
there would 5x5x3 = 75 new streams.
I mean, you can do it, but you won't need your central heating system any more.

3
rebuild.sh Executable file
View file

@ -0,0 +1,3 @@
docker compose -f docker-compose.yml down
docker build -t fakefirehose .
docker compose -f docker-compose.yml up

View file

@ -1,5 +1,6 @@
host=$1
type=$2
hashtag=$1
if [[ "$host" == "" ]]
then
@ -16,11 +17,7 @@ do
fetch="https://$host/api/v1/streaming/public";;
"local")
fetch="https://$host/api/v1/streaming/public?local=true";;
"hashtags")
fetch="https://$host/api/v1/streaming/hashtag?tag=linux"
echo "Sorry, hash tags aren't implemented yet :("
exit 1
;;
esac
echo "Starting to stream $fetch in 5 seconds"

View file

@ -1,25 +1,98 @@
#!/bin/bash
cat /config/domains-federated|grep -v "#"|while read -r host
echo > /config/urls.txt
echo > /config/hosts
# Get federated hosts and begin to stream them
cat /config/domains-federated | grep -v "##" | while read -r line
do
if [[ "$host" != "" ]]
then
/scripts/get-stream.sh $host "federated" &
#filter out empty lines
if [[ "$line" != "" ]]; then
echo "Opening federated line $line"
#Check for hashtags
if [[ "$line" == *" #"* ]]; then
echo "$line has hashtags!"
# Get just the first field of the line, which is the host
host=`echo $line | cut -d " " -f 1`
tags=`echo $line | cut -d " " -f 1 --complement|tr "#" "\n "`
for tag in $tags
do
if [[ $tag != "" ]]; then
echo "Found tag $tag"
# Create a url to fetch for each tag
echo "https://$host/api/v1/streaming/hashtag?tag=$tag $host" >> /config/urls.txt
fi
done
elif [[ "$line" != *" #"* ]]; then
echo "$line didn't have hashtags"
host=$line
echo "https://$line/api/v1/streaming/public $line" >> /config/urls.txt
fi
echo $host >> /config/hosts
fi
done
cat /config/domains-local|grep -v "#"|while read -r host
# Get local hosts and begin to stream them
cat /config/domains-local | grep -v "##" | while read -r line
do
if [[ "$host" != "" ]]
then
/scripts/get-stream.sh $host "local" &
#filter out empty lines
if [[ "$line" != "" ]]; then
echo "Opening federated line $line"
#Check for hashtags
if [[ "$line" == *" #"* ]]; then
echo "$line has hashtags!"
# Get just the first field of the line, which is the host
host=`echo $line | cut -d " " -f 1`
tags=`echo $line | cut -d " " -f 1 --complement|tr "#" "\n "`
for tag in $tags
do
if [[ $tag != "" ]]; then
echo "Found tag $tag"
# Create a url to fetch for each tag
echo "https://$host/api/v1/streaming/hashtag/local?tag=$tag $host" >> /config/urls.txt
fi
done
elif [[ "$line" != *" #"* ]]; then
echo "$line didn't have hashtags"
host=$line
echo "https://$line/api/v1/streaming/local $line" >> /config/urls.txt
fi
echo $host >> /config/hosts
fi
done
cat /config/hashtags | grep -v "##" | while read -r hashtag; do
hashtag=`echo $hashtag | cut -d "#" -f 2`
sort /config/hosts | uniq -u |while read -r host; do
if [[ $hashtag != "" && "$host" != "" ]]; then
echo "https://$host/api/v1/streaming/hashtag?tag=$hashtag $host" >> /config/hashtag-urls.txt
fi
done
done
cat /config/hashtag-urls.txt >> /config/urls.txt
cat /config/urls.txt | while read -r url
do
echo "Opening $url to stream"
sleep 1s
./stream-url.sh $url &
done
if [[ $runFirehose == true ]]
then
/scripts/run-firehose.sh &
fi
## Don't let the container exit
while true; do sleep 1; done
## We don't have a health check, so just exit after an hour
# If your docker file has restart: always on this should gracefully exit, and
# then restart
sleep 1h
exit 0

37
scripts/stream-url.sh Normal file
View file

@ -0,0 +1,37 @@
url=$1 #A proper URL is all that should be sent to this script
host=$2
if [[ "$url" == "" ]]
then
echo "Empty url, skipping" # Exit if an empty URL was sent
exit 2
fi
while true # Loop endlessly
do
today=`date +"%Y%m%d"`
echo "Starting to stream $url in 5 seconds"
sleep 5s;
curl -X "GET" "$url" \
--no-progress-meter | \
tee -a "/data/$today.json" | \
grep url | \
sed 's/data://g' | \
while read -r line
do
if [[ $line == *"uri"* ]]
then
url=`echo $line | jq .url| sed 's/\"//g'`
uri=`echo $line | jq .uri| sed 's/\"//g'`
echo "STREAMING from $host $url"
echo $uri >> "/data/$today.uris.txt"
fi
done
done