From f911c96d16d039e764e9ba716bc979e62e643503 Mon Sep 17 00:00:00 2001 From: Daniel Supernault Date: Sun, 2 Feb 2020 23:20:19 -0700 Subject: [PATCH] Update lexer autolinker and extractor, add support for mentioned usernames containing dashes, periods and underscore characters --- app/Util/Lexer/Regex.php | 4 +- tests/Unit/Lexer/UsernameTest.php | 179 ++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 tests/Unit/Lexer/UsernameTest.php diff --git a/app/Util/Lexer/Regex.php b/app/Util/Lexer/Regex.php index c24e0d4b0..ecc468d05 100755 --- a/app/Util/Lexer/Regex.php +++ b/app/Util/Lexer/Regex.php @@ -162,9 +162,9 @@ abstract class Regex // look-ahead capture here and don't append $after when we return. $tmp['valid_mention_preceding_chars'] = '([^a-zA-Z0-9_!#\$%&*@@\/]|^|(?:^|[^a-z0-9_+~.-])RT:?)'; - $re['valid_mentions_or_lists'] = '/'.$tmp['valid_mention_preceding_chars'].'(['.$tmp['at_signs'].'])([a-z0-9_]{1,20})((\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))(?:@[a-z0-9\.\-]+[a-z0-9]+)?)/i'; + $re['valid_mentions_or_lists'] = '/'.$tmp['valid_mention_preceding_chars'].'(['.$tmp['at_signs'].'])([a-z0-9_\-.]{1,20})((\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))(?:@[a-z0-9\.\-]+[a-z0-9]+)?)/i'; - $re['valid_reply'] = '/^(?:['.$tmp['spaces'].'])*['.$tmp['at_signs'].']([a-z0-9_]{1,20})(?=(.*|$))/iu'; + $re['valid_reply'] = '/^(?:['.$tmp['spaces'].'])*['.$tmp['at_signs'].']([a-z0-9_\-.]{1,20})(?=(.*|$))/iu'; $re['end_mention_match'] = '/\A(?:['.$tmp['at_signs'].']|['.$tmp['latin_accents'].']|:\/\/)/iu'; // URL related hash regex collection diff --git a/tests/Unit/Lexer/UsernameTest.php b/tests/Unit/Lexer/UsernameTest.php new file mode 100644 index 000000000..e5c310db7 --- /dev/null +++ b/tests/Unit/Lexer/UsernameTest.php @@ -0,0 +1,179 @@ +extract($username); + $autolink = Autolink::create()->autolink($username); + $expectedAutolink = '@dansup'; + $expectedEntity = [ + "hashtags" => [], + "urls" => [], + "mentions" => [ + "dansup", + ], + "replyto" => "dansup", + "hashtags_with_indices" => [], + "urls_with_indices" => [], + "mentions_with_indices" => [ + [ + "screen_name" => "dansup", + "indices" => [ + 0, + 7, + ], + ], + ], + ]; + $this->assertEquals($expectedAutolink, $autolink); + $this->assertEquals($expectedEntity, $entities); + } + + /** @test **/ + public function usernameWithPeriod() + { + $username = '@dansup.two'; + $autolink = Autolink::create()->autolink($username); + $entities = Extractor::create()->extract($username); + $expectedAutolink = '@dansup.two'; + $expectedEntity = [ + "hashtags" => [], + "urls" => [], + "mentions" => [ + "dansup.two", + ], + "replyto" => "dansup.two", + "hashtags_with_indices" => [], + "urls_with_indices" => [], + "mentions_with_indices" => [ + [ + "screen_name" => "dansup.two", + "indices" => [ + 0, + 11, + ], + ], + ], + ]; + $this->assertEquals($expectedAutolink, $autolink); + $this->assertEquals($expectedEntity, $entities); + } + + /** @test **/ + public function usernameWithDash() + { + $username = '@dansup-too'; + $autolink = Autolink::create()->autolink($username); + $entities = Extractor::create()->extract($username); + $expectedAutolink = '@dansup-too'; + $expectedEntity = [ + "hashtags" => [], + "urls" => [], + "mentions" => [ + "dansup-too", + ], + "replyto" => "dansup-too", + "hashtags_with_indices" => [], + "urls_with_indices" => [], + "mentions_with_indices" => [ + [ + "screen_name" => "dansup-too", + "indices" => [ + 0, + 11, + ], + ], + ], + ]; + $this->assertEquals($expectedAutolink, $autolink); + $this->assertEquals($expectedEntity, $entities); + } + + /** @test **/ + public function usernameWithUnderscore() + { + $username = '@dansup_too'; + $autolink = Autolink::create()->autolink($username); + $entities = Extractor::create()->extract($username); + $expectedAutolink = '@dansup_too'; + $expectedEntity = [ + "hashtags" => [], + "urls" => [], + "mentions" => [ + "dansup_too", + ], + "replyto" => "dansup_too", + "hashtags_with_indices" => [], + "urls_with_indices" => [], + "mentions_with_indices" => [ + [ + "screen_name" => "dansup_too", + "indices" => [ + 0, + 11, + ], + ], + ], + ]; + $this->assertEquals($expectedAutolink, $autolink); + $this->assertEquals($expectedEntity, $entities); + } + + /** @test **/ + public function multipleMentions() + { + $text = 'hello @dansup and @pixelfed.team from @username_underscore'; + $autolink = Autolink::create()->autolink($text); + $entities = Extractor::create()->extract($text); + $expectedAutolink = 'hello @dansup and @pixelfed.team from @username_underscore'; + $expectedEntity = [ + "hashtags" => [], + "urls" => [], + "mentions" => [ + "dansup", + "pixelfed.team", + "username_underscore", + ], + "replyto" => null, + "hashtags_with_indices" => [], + "urls_with_indices" => [], + "mentions_with_indices" => [ + [ + "screen_name" => "dansup", + "indices" => [ + 6, + 13, + ], + ], + [ + "screen_name" => "pixelfed.team", + "indices" => [ + 18, + 32, + ], + ], + [ + "screen_name" => "username_underscore", + "indices" => [ + 38, + 58, + ], + ], + ], + ]; + + $this->assertEquals($expectedAutolink, $autolink); + $this->assertEquals($expectedEntity, $entities); + } + +} \ No newline at end of file