1
0
Fork 0
mirror of https://github.com/morpheus65535/bazarr synced 2024-12-25 09:12:38 +00:00

Merge remote-tracking branch 'origin/development' into development

This commit is contained in:
morpheus65535 2022-10-13 08:33:34 -04:00
commit 560cbc0bd4
4 changed files with 1681 additions and 4 deletions

View file

@ -34,7 +34,7 @@ class HearingImpaired(SubtitleTextModification):
# uppercase text before colon (at least 3 uppercase chars); at start or after a sentence, # uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
# possibly with a dash in front; ignore anything ending with a quote # possibly with a dash in front; ignore anything ending with a quote
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])' NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=[.\-!?\"\'])\s)([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
r'[A-zÀ-ž-_0-9\s\"\'&+()\[\],:]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "", r'[A-zÀ-ž-_0-9\s\"\'&+()\[\],:]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
name="HI_before_colon_caps"), name="HI_before_colon_caps"),

View file

@ -39,9 +39,9 @@ class FixOCR(SubtitleTextModification):
return [ return [
# remove broken HI tag colons (ANNOUNCER'., ". instead of :) after at least 3 uppercase chars # remove broken HI tag colons (ANNOUNCER'., ". instead of :) after at least 3 uppercase chars
# don't modify stuff inside quotes # don't modify stuff inside quotes
NReProcessor(re.compile(r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)' #NReProcessor(re.compile(r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)'
r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'), # r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'),
r"\1:\3", name="OCR_fix_HI_colons", supported=lambda p: not p.only_uppercase), # r"\1:\3", name="OCR_fix_HI_colons", supported=lambda p: not p.only_uppercase),
# fix F'bla # fix F'bla
NReProcessor(re.compile(r'(?u)(\bF)(\')([A-zÀ-ž]*\b)'), r"\1\3", name="OCR_fix_F"), NReProcessor(re.compile(r'(?u)(\bF)(\')([A-zÀ-ž]*\b)'), r"\1\3", name="OCR_fix_F"),
WholeLineProcessor(self.data_dict["WholeLines"], name="OCR_replace_line"), WholeLineProcessor(self.data_dict["WholeLines"], name="OCR_replace_line"),

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,16 @@
import pytest
import os
from subliminal_patch import Subtitle
@pytest.fixture
def test_file(data):
return os.path.join(data, "subs_for_mods.srt")
def test_apply_mods_remove_hi(languages, test_file):
sub = Subtitle(languages["en"], mods=["remove_HI", "OCR_fixes"])
with open(test_file, "rb") as f:
sub.content = f.read()
assert sub.get_modified_content(debug=True)