Update subzero mods

This commit is contained in:
Vitiko 2022-10-12 19:04:42 -04:00
parent 1a612d12b8
commit daeb28baef
4 changed files with 1681 additions and 4 deletions

View File

@ -34,7 +34,7 @@ class HearingImpaired(SubtitleTextModification):
# uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
# possibly with a dash in front; ignore anything ending with a quote
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=[.\-!?\"\'])\s)([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
r'[A-zÀ-ž-_0-9\s\"\'&+()\[\],:]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
name="HI_before_colon_caps"),

View File

@ -39,9 +39,9 @@ class FixOCR(SubtitleTextModification):
return [
# remove broken HI tag colons (ANNOUNCER'., ". instead of :) after at least 3 uppercase chars
# don't modify stuff inside quotes
NReProcessor(re.compile(r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)'
r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'),
r"\1:\3", name="OCR_fix_HI_colons", supported=lambda p: not p.only_uppercase),
#NReProcessor(re.compile(r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)'
# r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'),
# r"\1:\3", name="OCR_fix_HI_colons", supported=lambda p: not p.only_uppercase),
# fix F'bla
NReProcessor(re.compile(r'(?u)(\bF)(\')([A-zÀ-ž]*\b)'), r"\1\3", name="OCR_fix_F"),
WholeLineProcessor(self.data_dict["WholeLines"], name="OCR_replace_line"),

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
import pytest
import os
from subliminal_patch import Subtitle
@pytest.fixture
def test_file(data):
return os.path.join(data, "subs_for_mods.srt")
def test_apply_mods_remove_hi(languages, test_file):
sub = Subtitle(languages["en"], mods=["remove_HI", "OCR_fixes"])
with open(test_file, "rb") as f:
sub.content = f.read()
assert sub.get_modified_content(debug=True)