mirror of
https://github.com/morpheus65535/bazarr
synced 2024-12-21 15:22:33 +00:00
When multiple audio streams exist, pick the correct one for whisper to process (#2688)
This commit is contained in:
parent
a1fac160fb
commit
e544e1fab1
1 changed files with 17 additions and 1 deletions
|
@ -16,6 +16,7 @@ from babelfish.exceptions import LanguageReverseError
|
|||
|
||||
import ffmpeg
|
||||
import functools
|
||||
from pycountry import languages
|
||||
|
||||
# These are all the languages Whisper supports.
|
||||
# from whisper.tokenizer import LANGUAGES
|
||||
|
@ -132,6 +133,18 @@ def set_log_level(newLevel="INFO"):
|
|||
# initialize to default above
|
||||
set_log_level()
|
||||
|
||||
# ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language
|
||||
# if we give it the newer ISO 639-3 code it can't find that audio stream by name because it's different
|
||||
# for example it wants 'ger' instead of 'deu' for the German language
|
||||
# or 'fre' instead of 'fra' for the French language
|
||||
def get_ISO_639_2_code(iso639_3_code):
|
||||
# find the language using ISO 639-3 code
|
||||
language = languages.get(alpha_3=iso639_3_code)
|
||||
# get the ISO 639-2 code or use the original input if there isn't a match
|
||||
iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code
|
||||
logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')")
|
||||
return iso639_2_code
|
||||
|
||||
@functools.lru_cache(2)
|
||||
def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
|
||||
logger.debug("Encoding audio stream to WAV with ffmpeg")
|
||||
|
@ -140,7 +153,10 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
|
|||
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
||||
inp = ffmpeg.input(path, threads=0)
|
||||
if audio_stream_language:
|
||||
logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}")
|
||||
# There is more than one audio stream, so pick the requested one by name
|
||||
# Use the ISO 639-2 code if available
|
||||
audio_stream_language = get_ISO_639_2_code(audio_stream_language)
|
||||
logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}")
|
||||
inp = inp[f'a:m:language:{audio_stream_language}']
|
||||
|
||||
out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \
|
||||
|
|
Loading…
Reference in a new issue