From 909a8a5a9be9b36c88eed81db6407167afd4d1e8 Mon Sep 17 00:00:00 2001 From: Daniel Supernault Date: Tue, 27 Dec 2022 05:23:54 -0700 Subject: [PATCH] Update lexer/extractor to handle banned hashtags --- app/Jobs/StatusPipeline/StatusTagsPipeline.php | 9 +++++++++ app/Services/TrendingHashtagService.php | 11 +++++++++-- app/Util/Lexer/Extractor.php | 10 +++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/app/Jobs/StatusPipeline/StatusTagsPipeline.php b/app/Jobs/StatusPipeline/StatusTagsPipeline.php index 44fcaa353..65761cf22 100644 --- a/app/Jobs/StatusPipeline/StatusTagsPipeline.php +++ b/app/Jobs/StatusPipeline/StatusTagsPipeline.php @@ -15,6 +15,7 @@ use App\Mention; use App\Services\AccountService; use App\Hashtag; use App\StatusHashtag; +use App\Services\TrendingHashtagService; class StatusTagsPipeline implements ShouldQueue { @@ -61,6 +62,14 @@ class StatusTagsPipeline implements ShouldQueue $name = substr($tag['name'], 0, 1) == '#' ? substr($tag['name'], 1) : $tag['name']; + $banned = TrendingHashtagService::getBannedHashtagNames(); + + if(count($banned)) { + if(in_array(strtolower($name), array_map('strtolower', $banned))) { + continue; + } + } + $hashtag = Hashtag::firstOrCreate([ 'slug' => str_slug($name) ], [ diff --git a/app/Services/TrendingHashtagService.php b/app/Services/TrendingHashtagService.php index c27b2f3c3..6695f2729 100644 --- a/app/Services/TrendingHashtagService.php +++ b/app/Services/TrendingHashtagService.php @@ -16,13 +16,20 @@ class TrendingHashtagService return self::CACHE_KEY . $k; } - public static function getBlockedHashtags() + public static function getBannedHashtags() { return Cache::remember(self::key(':is_banned'), 1209600, function() { return Hashtag::whereIsBanned(true)->pluck('id')->toArray(); }); } + public static function getBannedHashtagNames() + { + return Cache::remember(self::key(':is_banned:names'), 1209600, function() { + return Hashtag::find(self::getBannedHashtags())->pluck('name')->toArray(); + }); + } + public static function getNonTrendingHashtags() { return Cache::remember(self::key(':can_trend'), 1209600, function() { @@ -52,7 +59,7 @@ class TrendingHashtagService { $minId = self::getMinRecentId(); - $skipIds = array_merge(self::getBlockedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags()); + $skipIds = array_merge(self::getBannedHashtags(), self::getNonTrendingHashtags(), self::getNsfwHashtags()); return Cache::remember(self::CACHE_KEY, config('trending.hashtags.ttl'), function() use($minId, $skipIds) { return StatusHashtag::select('hashtag_id', \DB::raw('count(*) as total')) diff --git a/app/Util/Lexer/Extractor.php b/app/Util/Lexer/Extractor.php index 364a1c573..c71d3f2e6 100755 --- a/app/Util/Lexer/Extractor.php +++ b/app/Util/Lexer/Extractor.php @@ -12,6 +12,7 @@ namespace App\Util\Lexer; use Illuminate\Support\Str; use App\Status; use App\Services\AutolinkService; +use App\Services\TrendingHashtagService; /** * Twitter Extractor Class. @@ -267,6 +268,8 @@ class Extractor extends Regex return []; } + $bannedTags = TrendingHashtagService::getBannedHashtagNames(); + preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); $tags = []; @@ -278,7 +281,12 @@ class Extractor extends Regex if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) { continue; } - if(mb_strlen($hashtag[0]) > 124) { + if (count($bannedTags)) { + if(in_array(strtolower($hashtag[0]), array_map('strtolower', $bannedTags))) { + continue; + } + } + if (mb_strlen($hashtag[0]) > 124) { continue; } $tags[] = [