mirror of
https://github.com/morpheus65535/bazarr
synced 2025-01-03 05:25:28 +00:00
Improve mods
1. Avoid uppercase after possible abbreviations 2. Avoid removing double punctuation for spanish subtitles
This commit is contained in:
parent
386ac22631
commit
b36b3782d7
1 changed files with 7 additions and 2 deletions
|
@ -13,6 +13,7 @@ from tld import get_tld
|
|||
|
||||
|
||||
ENGLISH = Language("eng")
|
||||
SPANISH = (Language("spa"), Language("spa", "MX"))
|
||||
|
||||
|
||||
class CommonFixes(SubtitleTextModification):
|
||||
|
@ -105,12 +106,16 @@ class CommonFixes(SubtitleTextModification):
|
|||
|
||||
# uppercase after dot
|
||||
NReProcessor(re.compile(r'(?u)((?<!(?=\s*[A-ZÀ-Ž-_0-9.]\s*))(?:[^.\s])+\.\s+)([a-zà-ž])'),
|
||||
lambda match: r'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"),
|
||||
lambda match: r'%s%s' % (match.group(1), match.group(2).upper()) if len(match.group(1)) > 4 else r"%s%s" % (match.group(1), match.group(2)),
|
||||
name="CM_uppercase_after_dot"),
|
||||
|
||||
# remove double interpunction
|
||||
NReProcessor(re.compile(r'(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)'),
|
||||
lambda match: match.group(1).strip() + (" " if match.group(2).endswith(" ") else ""),
|
||||
name="CM_double_interpunct"),
|
||||
name="CM_double_interpunct",
|
||||
# Double interpunction is valid for spanish
|
||||
# https://www.rae.es/duda-linguistica/es-correcto-combinar-los-signos-de-interrogacion-y-exclamacion
|
||||
supported=lambda p: p.language not in SPANISH),
|
||||
|
||||
# remove spaces before punctuation; don't break spaced ellipses
|
||||
NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))'), r"\1", name="CM_punctuation_space"),
|
||||
|
|
Loading…
Reference in a new issue