Fixed some Whisper issues and added logging level selector for this specific provider

This commit is contained in:
JayZed 2024-01-05 21:01:45 -05:00 committed by GitHub
parent 549bebcc43
commit c0bbd4f150
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 12 deletions

View File

@ -226,6 +226,8 @@ validators = [
# whisperai section # whisperai section
Validator('whisperai.endpoint', must_exist=True, default='http://127.0.0.1:9000', is_type_of=str), Validator('whisperai.endpoint', must_exist=True, default='http://127.0.0.1:9000', is_type_of=str),
Validator('whisperai.timeout', must_exist=True, default=3600, is_type_of=int, gte=1), Validator('whisperai.timeout', must_exist=True, default=3600, is_type_of=int, gte=1),
Validator('whisperai.loglevel', must_exist=True, default='INFO', is_type_of=str,
is_in=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']),
# legendasdivx section # legendasdivx section
Validator('legendasdivx.username', must_exist=True, default='', is_type_of=str, cast=str), Validator('legendasdivx.username', must_exist=True, default='', is_type_of=str, cast=str),

View File

@ -310,6 +310,7 @@ def get_providers_auth():
'endpoint': settings.whisperai.endpoint, 'endpoint': settings.whisperai.endpoint,
'timeout': settings.whisperai.timeout, 'timeout': settings.whisperai.timeout,
'ffmpeg_path': _FFMPEG_BINARY, 'ffmpeg_path': _FFMPEG_BINARY,
'loglevel': settings.whisperai.loglevel,
} }
} }

View File

@ -20,7 +20,15 @@ import {
useRef, useRef,
useState, useState,
} from "react"; } from "react";
import { Card, Check, Chips, Message, Password, Text } from "../components"; import {
Card,
Check,
Chips,
Selector as GlobalSelector,
Message,
Password,
Text,
} from "../components";
import { import {
FormContext, FormContext,
FormValues, FormValues,
@ -206,6 +214,7 @@ const ProviderTool: FunctionComponent<ProviderToolProps> = ({
info.inputs?.forEach((value) => { info.inputs?.forEach((value) => {
const key = value.key; const key = value.key;
const label = value.name ?? capitalize(value.key); const label = value.name ?? capitalize(value.key);
const options = value.options ?? [];
switch (value.type) { switch (value.type) {
case "text": case "text":
@ -236,6 +245,16 @@ const ProviderTool: FunctionComponent<ProviderToolProps> = ({
></Check> ></Check>
); );
return; return;
case "select":
elements.push(
<GlobalSelector
key={key}
label={label}
settingKey={`settings-${itemKey}-${key}`}
options={options}
></GlobalSelector>
);
return;
case "chips": case "chips":
elements.push( elements.push(
<Chips <Chips

View File

@ -1,3 +1,4 @@
import { SelectorOption } from "@/components";
import { ReactText } from "react"; import { ReactText } from "react";
type Input<T, N> = { type Input<T, N> = {
@ -6,12 +7,14 @@ type Input<T, N> = {
defaultValue?: T; defaultValue?: T;
name?: string; name?: string;
description?: string; description?: string;
options?: SelectorOption<string>[];
}; };
type AvailableInput = type AvailableInput =
| Input<ReactText, "text"> | Input<ReactText, "text">
| Input<string, "password"> | Input<string, "password">
| Input<boolean, "switch"> | Input<boolean, "switch">
| Input<string, "select">
| Input<ReactText[], "chips">; | Input<ReactText[], "chips">;
export interface ProviderInfo { export interface ProviderInfo {
@ -22,6 +25,14 @@ export interface ProviderInfo {
inputs?: AvailableInput[]; inputs?: AvailableInput[];
} }
export const logLevelOptions: SelectorOption<string>[] = [
{ label: "DEBUG", value: "DEBUG" },
{ label: "INFO", value: "INFO" },
{ label: "WARNING", value: "WARNING" },
{ label: "ERROR", value: "ERROR" },
{ label: "CRITICAL", value: "CRITICAL" },
];
export const ProviderList: Readonly<ProviderInfo[]> = [ export const ProviderList: Readonly<ProviderInfo[]> = [
{ {
key: "addic7ed", key: "addic7ed",
@ -221,6 +232,12 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
defaultValue: 3600, defaultValue: 3600,
name: "Transcription/translation timeout in seconds", name: "Transcription/translation timeout in seconds",
}, },
{
type: "select",
key: "loglevel",
name: "Logging level",
options: logLevelOptions,
},
], ],
}, },
{ {

View File

@ -1,5 +1,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
import time
from datetime import timedelta
from requests import Session from requests import Session
@ -122,6 +124,13 @@ whisper_languages = {
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def set_log_level(newLevel="INFO"):
newLevel = newLevel.upper()
# print(f'WhisperAI log level changing from {logging._levelToName[logger.getEffectiveLevel()]} to {newLevel}')
logger.setLevel(getattr(logging, newLevel))
# initialize to default above
set_log_level()
@functools.lru_cache(2) @functools.lru_cache(2)
def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None): def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
@ -138,7 +147,8 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
.run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True) .run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
except ffmpeg.Error as e: except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e logger.warning(f"ffmpeg failed to load audio: {e.stderr.decode()}")
return None
logger.debug(f"Finished encoding audio stream in {path} with no errors") logger.debug(f"Finished encoding audio stream in {path} with no errors")
@ -161,6 +171,9 @@ def whisper_get_language_reverse(alpha3):
return wl return wl
raise ValueError raise ValueError
def language_from_alpha3(lang):
name = Language(lang).name
return name
class WhisperAISubtitle(Subtitle): class WhisperAISubtitle(Subtitle):
'''Whisper AI Subtitle.''' '''Whisper AI Subtitle.'''
@ -198,12 +211,10 @@ class WhisperAIProvider(Provider):
for lan in whisper_languages: for lan in whisper_languages:
languages.update({whisper_get_language(lan, whisper_languages[lan])}) languages.update({whisper_get_language(lan, whisper_languages[lan])})
languages.update(set(Language.rebuild(lang, hi=True) for lang in languages))
languages.update(set(Language.rebuild(lang, forced=True) for lang in languages))
video_types = (Episode, Movie) video_types = (Episode, Movie)
def __init__(self, endpoint=None, timeout=None, ffmpeg_path=None): def __init__(self, endpoint=None, timeout=None, ffmpeg_path=None, loglevel=None):
set_log_level(loglevel)
if not endpoint: if not endpoint:
raise ConfigurationError('Whisper Web Service Endpoint must be provided') raise ConfigurationError('Whisper Web Service Endpoint must be provided')
@ -230,12 +241,16 @@ class WhisperAIProvider(Provider):
def detect_language(self, path) -> Language: def detect_language(self, path) -> Language:
out = encode_audio_stream(path, self.ffmpeg_path) out = encode_audio_stream(path, self.ffmpeg_path)
if out == None:
logger.info(f"Whisper cannot detect language of {path} because of missing/bad audio track")
return None
r = self.session.post(f"{self.endpoint}/detect-language", r = self.session.post(f"{self.endpoint}/detect-language",
params={'encode': 'false'}, params={'encode': 'false'},
files={'audio_file': out}, files={'audio_file': out},
timeout=(5, self.timeout)) timeout=(self.timeout, self.timeout))
logger.info(f"Whisper detected language of {path} as {r.json()['detected_language']}") logger.debug(f"Whisper detected language of {path} as {r.json()['detected_language']}")
return whisper_get_language(r.json()["language_code"], r.json()["detected_language"]) return whisper_get_language(r.json()["language_code"], r.json()["detected_language"])
@ -262,6 +277,11 @@ class WhisperAIProvider(Provider):
else: else:
# We must detect the language manually # We must detect the language manually
detected_lang = self.detect_language(video.original_path) detected_lang = self.detect_language(video.original_path)
if detected_lang == None:
sub.task = "error"
# tell the user what is wrong
sub.release_info = "bad/missing audio track - cannot transcribe"
return sub
if detected_lang != language: if detected_lang != language:
sub.task = "translate" sub.task = "translate"
@ -270,9 +290,11 @@ class WhisperAIProvider(Provider):
if sub.task == "translate": if sub.task == "translate":
if language.alpha3 != "eng": if language.alpha3 != "eng":
logger.info(f"Translation only possible from {language} to English") logger.debug(f"Translation only possible from {language} to English")
return None return None
# tell the user what we are about to do
sub.release_info = f"{sub.task} {language_from_alpha3(sub.audio_language)} audio -> {language_from_alpha3(language.alpha3)} SRT"
logger.debug(f"Whisper ({video.original_path}): {sub.audio_language} -> {language.alpha3} [TASK: {sub.task}]") logger.debug(f"Whisper ({video.original_path}): {sub.audio_language} -> {language.alpha3} [TASK: {sub.task}]")
return sub return sub
@ -285,11 +307,29 @@ class WhisperAIProvider(Provider):
# Invoke Whisper through the API. This may take a long time depending on the file. # Invoke Whisper through the API. This may take a long time depending on the file.
# TODO: This loads the entire file into memory, find a good way to stream the file in chunks # TODO: This loads the entire file into memory, find a good way to stream the file in chunks
out = encode_audio_stream(subtitle.video.original_path, self.ffmpeg_path, subtitle.force_audio_stream) out = None
if subtitle.task != "error":
out = encode_audio_stream(subtitle.video.original_path, self.ffmpeg_path, subtitle.force_audio_stream)
if out == None:
logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track")
subtitle.content = None
return
if subtitle.task == "transcribe":
output_language = subtitle.audio_language
else:
output_language = "eng"
logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}')
startTime = time.time()
r = self.session.post(f"{self.endpoint}/asr", r = self.session.post(f"{self.endpoint}/asr",
params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'}, params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
files={'audio_file': out}, files={'audio_file': out},
timeout=(5, self.timeout)) timeout=(self.timeout, self.timeout))
endTime = time.time()
elapsedTime = timedelta(seconds=round(endTime - startTime))
logger.info(f'Completed WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} in {elapsedTime} for {subtitle.video.original_path}')
subtitle.content = r.content subtitle.content = r.content