mirror of https://github.com/morpheus65535/bazarr
Fixed some Whisper issues and added logging level selector for this specific provider
This commit is contained in:
parent
549bebcc43
commit
c0bbd4f150
|
@ -226,6 +226,8 @@ validators = [
|
||||||
# whisperai section
|
# whisperai section
|
||||||
Validator('whisperai.endpoint', must_exist=True, default='http://127.0.0.1:9000', is_type_of=str),
|
Validator('whisperai.endpoint', must_exist=True, default='http://127.0.0.1:9000', is_type_of=str),
|
||||||
Validator('whisperai.timeout', must_exist=True, default=3600, is_type_of=int, gte=1),
|
Validator('whisperai.timeout', must_exist=True, default=3600, is_type_of=int, gte=1),
|
||||||
|
Validator('whisperai.loglevel', must_exist=True, default='INFO', is_type_of=str,
|
||||||
|
is_in=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']),
|
||||||
|
|
||||||
# legendasdivx section
|
# legendasdivx section
|
||||||
Validator('legendasdivx.username', must_exist=True, default='', is_type_of=str, cast=str),
|
Validator('legendasdivx.username', must_exist=True, default='', is_type_of=str, cast=str),
|
||||||
|
|
|
@ -310,6 +310,7 @@ def get_providers_auth():
|
||||||
'endpoint': settings.whisperai.endpoint,
|
'endpoint': settings.whisperai.endpoint,
|
||||||
'timeout': settings.whisperai.timeout,
|
'timeout': settings.whisperai.timeout,
|
||||||
'ffmpeg_path': _FFMPEG_BINARY,
|
'ffmpeg_path': _FFMPEG_BINARY,
|
||||||
|
'loglevel': settings.whisperai.loglevel,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,15 @@ import {
|
||||||
useRef,
|
useRef,
|
||||||
useState,
|
useState,
|
||||||
} from "react";
|
} from "react";
|
||||||
import { Card, Check, Chips, Message, Password, Text } from "../components";
|
import {
|
||||||
|
Card,
|
||||||
|
Check,
|
||||||
|
Chips,
|
||||||
|
Selector as GlobalSelector,
|
||||||
|
Message,
|
||||||
|
Password,
|
||||||
|
Text,
|
||||||
|
} from "../components";
|
||||||
import {
|
import {
|
||||||
FormContext,
|
FormContext,
|
||||||
FormValues,
|
FormValues,
|
||||||
|
@ -206,6 +214,7 @@ const ProviderTool: FunctionComponent<ProviderToolProps> = ({
|
||||||
info.inputs?.forEach((value) => {
|
info.inputs?.forEach((value) => {
|
||||||
const key = value.key;
|
const key = value.key;
|
||||||
const label = value.name ?? capitalize(value.key);
|
const label = value.name ?? capitalize(value.key);
|
||||||
|
const options = value.options ?? [];
|
||||||
|
|
||||||
switch (value.type) {
|
switch (value.type) {
|
||||||
case "text":
|
case "text":
|
||||||
|
@ -236,6 +245,16 @@ const ProviderTool: FunctionComponent<ProviderToolProps> = ({
|
||||||
></Check>
|
></Check>
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
|
case "select":
|
||||||
|
elements.push(
|
||||||
|
<GlobalSelector
|
||||||
|
key={key}
|
||||||
|
label={label}
|
||||||
|
settingKey={`settings-${itemKey}-${key}`}
|
||||||
|
options={options}
|
||||||
|
></GlobalSelector>
|
||||||
|
);
|
||||||
|
return;
|
||||||
case "chips":
|
case "chips":
|
||||||
elements.push(
|
elements.push(
|
||||||
<Chips
|
<Chips
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import { SelectorOption } from "@/components";
|
||||||
import { ReactText } from "react";
|
import { ReactText } from "react";
|
||||||
|
|
||||||
type Input<T, N> = {
|
type Input<T, N> = {
|
||||||
|
@ -6,12 +7,14 @@ type Input<T, N> = {
|
||||||
defaultValue?: T;
|
defaultValue?: T;
|
||||||
name?: string;
|
name?: string;
|
||||||
description?: string;
|
description?: string;
|
||||||
|
options?: SelectorOption<string>[];
|
||||||
};
|
};
|
||||||
|
|
||||||
type AvailableInput =
|
type AvailableInput =
|
||||||
| Input<ReactText, "text">
|
| Input<ReactText, "text">
|
||||||
| Input<string, "password">
|
| Input<string, "password">
|
||||||
| Input<boolean, "switch">
|
| Input<boolean, "switch">
|
||||||
|
| Input<string, "select">
|
||||||
| Input<ReactText[], "chips">;
|
| Input<ReactText[], "chips">;
|
||||||
|
|
||||||
export interface ProviderInfo {
|
export interface ProviderInfo {
|
||||||
|
@ -22,6 +25,14 @@ export interface ProviderInfo {
|
||||||
inputs?: AvailableInput[];
|
inputs?: AvailableInput[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const logLevelOptions: SelectorOption<string>[] = [
|
||||||
|
{ label: "DEBUG", value: "DEBUG" },
|
||||||
|
{ label: "INFO", value: "INFO" },
|
||||||
|
{ label: "WARNING", value: "WARNING" },
|
||||||
|
{ label: "ERROR", value: "ERROR" },
|
||||||
|
{ label: "CRITICAL", value: "CRITICAL" },
|
||||||
|
];
|
||||||
|
|
||||||
export const ProviderList: Readonly<ProviderInfo[]> = [
|
export const ProviderList: Readonly<ProviderInfo[]> = [
|
||||||
{
|
{
|
||||||
key: "addic7ed",
|
key: "addic7ed",
|
||||||
|
@ -221,6 +232,12 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
|
||||||
defaultValue: 3600,
|
defaultValue: 3600,
|
||||||
name: "Transcription/translation timeout in seconds",
|
name: "Transcription/translation timeout in seconds",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
type: "select",
|
||||||
|
key: "loglevel",
|
||||||
|
name: "Logging level",
|
||||||
|
options: logLevelOptions,
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
|
||||||
|
@ -122,6 +124,13 @@ whisper_languages = {
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def set_log_level(newLevel="INFO"):
|
||||||
|
newLevel = newLevel.upper()
|
||||||
|
# print(f'WhisperAI log level changing from {logging._levelToName[logger.getEffectiveLevel()]} to {newLevel}')
|
||||||
|
logger.setLevel(getattr(logging, newLevel))
|
||||||
|
|
||||||
|
# initialize to default above
|
||||||
|
set_log_level()
|
||||||
|
|
||||||
@functools.lru_cache(2)
|
@functools.lru_cache(2)
|
||||||
def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
|
def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
|
||||||
|
@ -138,7 +147,8 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
|
||||||
.run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
|
.run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||||
|
|
||||||
except ffmpeg.Error as e:
|
except ffmpeg.Error as e:
|
||||||
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
logger.warning(f"ffmpeg failed to load audio: {e.stderr.decode()}")
|
||||||
|
return None
|
||||||
|
|
||||||
logger.debug(f"Finished encoding audio stream in {path} with no errors")
|
logger.debug(f"Finished encoding audio stream in {path} with no errors")
|
||||||
|
|
||||||
|
@ -161,6 +171,9 @@ def whisper_get_language_reverse(alpha3):
|
||||||
return wl
|
return wl
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
|
def language_from_alpha3(lang):
|
||||||
|
name = Language(lang).name
|
||||||
|
return name
|
||||||
|
|
||||||
class WhisperAISubtitle(Subtitle):
|
class WhisperAISubtitle(Subtitle):
|
||||||
'''Whisper AI Subtitle.'''
|
'''Whisper AI Subtitle.'''
|
||||||
|
@ -198,12 +211,10 @@ class WhisperAIProvider(Provider):
|
||||||
for lan in whisper_languages:
|
for lan in whisper_languages:
|
||||||
languages.update({whisper_get_language(lan, whisper_languages[lan])})
|
languages.update({whisper_get_language(lan, whisper_languages[lan])})
|
||||||
|
|
||||||
languages.update(set(Language.rebuild(lang, hi=True) for lang in languages))
|
|
||||||
languages.update(set(Language.rebuild(lang, forced=True) for lang in languages))
|
|
||||||
|
|
||||||
video_types = (Episode, Movie)
|
video_types = (Episode, Movie)
|
||||||
|
|
||||||
def __init__(self, endpoint=None, timeout=None, ffmpeg_path=None):
|
def __init__(self, endpoint=None, timeout=None, ffmpeg_path=None, loglevel=None):
|
||||||
|
set_log_level(loglevel)
|
||||||
if not endpoint:
|
if not endpoint:
|
||||||
raise ConfigurationError('Whisper Web Service Endpoint must be provided')
|
raise ConfigurationError('Whisper Web Service Endpoint must be provided')
|
||||||
|
|
||||||
|
@ -230,12 +241,16 @@ class WhisperAIProvider(Provider):
|
||||||
def detect_language(self, path) -> Language:
|
def detect_language(self, path) -> Language:
|
||||||
out = encode_audio_stream(path, self.ffmpeg_path)
|
out = encode_audio_stream(path, self.ffmpeg_path)
|
||||||
|
|
||||||
|
if out == None:
|
||||||
|
logger.info(f"Whisper cannot detect language of {path} because of missing/bad audio track")
|
||||||
|
return None
|
||||||
|
|
||||||
r = self.session.post(f"{self.endpoint}/detect-language",
|
r = self.session.post(f"{self.endpoint}/detect-language",
|
||||||
params={'encode': 'false'},
|
params={'encode': 'false'},
|
||||||
files={'audio_file': out},
|
files={'audio_file': out},
|
||||||
timeout=(5, self.timeout))
|
timeout=(self.timeout, self.timeout))
|
||||||
|
|
||||||
logger.info(f"Whisper detected language of {path} as {r.json()['detected_language']}")
|
logger.debug(f"Whisper detected language of {path} as {r.json()['detected_language']}")
|
||||||
|
|
||||||
return whisper_get_language(r.json()["language_code"], r.json()["detected_language"])
|
return whisper_get_language(r.json()["language_code"], r.json()["detected_language"])
|
||||||
|
|
||||||
|
@ -262,6 +277,11 @@ class WhisperAIProvider(Provider):
|
||||||
else:
|
else:
|
||||||
# We must detect the language manually
|
# We must detect the language manually
|
||||||
detected_lang = self.detect_language(video.original_path)
|
detected_lang = self.detect_language(video.original_path)
|
||||||
|
if detected_lang == None:
|
||||||
|
sub.task = "error"
|
||||||
|
# tell the user what is wrong
|
||||||
|
sub.release_info = "bad/missing audio track - cannot transcribe"
|
||||||
|
return sub
|
||||||
|
|
||||||
if detected_lang != language:
|
if detected_lang != language:
|
||||||
sub.task = "translate"
|
sub.task = "translate"
|
||||||
|
@ -270,9 +290,11 @@ class WhisperAIProvider(Provider):
|
||||||
|
|
||||||
if sub.task == "translate":
|
if sub.task == "translate":
|
||||||
if language.alpha3 != "eng":
|
if language.alpha3 != "eng":
|
||||||
logger.info(f"Translation only possible from {language} to English")
|
logger.debug(f"Translation only possible from {language} to English")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# tell the user what we are about to do
|
||||||
|
sub.release_info = f"{sub.task} {language_from_alpha3(sub.audio_language)} audio -> {language_from_alpha3(language.alpha3)} SRT"
|
||||||
logger.debug(f"Whisper ({video.original_path}): {sub.audio_language} -> {language.alpha3} [TASK: {sub.task}]")
|
logger.debug(f"Whisper ({video.original_path}): {sub.audio_language} -> {language.alpha3} [TASK: {sub.task}]")
|
||||||
|
|
||||||
return sub
|
return sub
|
||||||
|
@ -285,11 +307,29 @@ class WhisperAIProvider(Provider):
|
||||||
# Invoke Whisper through the API. This may take a long time depending on the file.
|
# Invoke Whisper through the API. This may take a long time depending on the file.
|
||||||
# TODO: This loads the entire file into memory, find a good way to stream the file in chunks
|
# TODO: This loads the entire file into memory, find a good way to stream the file in chunks
|
||||||
|
|
||||||
out = encode_audio_stream(subtitle.video.original_path, self.ffmpeg_path, subtitle.force_audio_stream)
|
out = None
|
||||||
|
if subtitle.task != "error":
|
||||||
|
out = encode_audio_stream(subtitle.video.original_path, self.ffmpeg_path, subtitle.force_audio_stream)
|
||||||
|
if out == None:
|
||||||
|
logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track")
|
||||||
|
subtitle.content = None
|
||||||
|
return
|
||||||
|
|
||||||
|
if subtitle.task == "transcribe":
|
||||||
|
output_language = subtitle.audio_language
|
||||||
|
else:
|
||||||
|
output_language = "eng"
|
||||||
|
|
||||||
|
logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}')
|
||||||
|
startTime = time.time()
|
||||||
|
|
||||||
r = self.session.post(f"{self.endpoint}/asr",
|
r = self.session.post(f"{self.endpoint}/asr",
|
||||||
params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
|
params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
|
||||||
files={'audio_file': out},
|
files={'audio_file': out},
|
||||||
timeout=(5, self.timeout))
|
timeout=(self.timeout, self.timeout))
|
||||||
|
|
||||||
|
endTime = time.time()
|
||||||
|
elapsedTime = timedelta(seconds=round(endTime - startTime))
|
||||||
|
logger.info(f'Completed WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} in {elapsedTime} for {subtitle.video.original_path}')
|
||||||
|
|
||||||
subtitle.content = r.content
|
subtitle.content = r.content
|
||||||
|
|
Loading…
Reference in New Issue