Merge branch 'development'

# Conflicts:
#	frontend/package-lock.json
This commit is contained in:
morpheus65535 2023-05-01 20:39:33 -04:00
commit b485dd9c71
108 changed files with 7731 additions and 39543 deletions

View File

@ -3,3 +3,4 @@ frontend/build
libs
bazarr.py
requirements.txt
postgres-requirements.txt

View File

@ -13,6 +13,8 @@ from operator import itemgetter
from app.get_providers import get_enabled_providers
from app.database import TableAnnouncements
from .get_args import args
from sonarr.info import get_sonarr_info
from radarr.info import get_radarr_info
# Announcements as receive by browser must be in the form of a list of dicts converted to JSON
@ -84,6 +86,24 @@ def get_local_announcements():
'timestamp': 1676236978,
})
# deprecated Sonarr and Radarr versions
if get_sonarr_info.is_deprecated():
announcements.append({
'text': f'Sonarr {get_sonarr_info.version()} is deprecated and unsupported. You should consider upgrading '
f'as Bazarr will eventually drop support for deprecated Sonarr version.',
'link': 'https://forums.sonarr.tv/t/v3-is-now-officially-stable-v2-is-eol/27858',
'dismissible': False,
'timestamp': 1679606061,
})
if get_radarr_info.is_deprecated():
announcements.append({
'text': f'Radarr {get_radarr_info.version()} is deprecated and unsupported. You should consider upgrading '
f'as Bazarr will eventually drop support for deprecated Radarr version.',
'link': 'https://discord.com/channels/264387956343570434/264388019585286144/1051567458697363547',
'dismissible': False,
'timestamp': 1679606309,
})
for announcement in announcements:
if 'enabled' not in announcement:
announcement['enabled'] = True

View File

@ -82,7 +82,8 @@ defaults = {
'embedded_subtitles_parser': 'ffprobe',
'default_und_audio_lang': '',
'default_und_embedded_subtitles_lang': '',
'parse_embedded_audio_track': 'False'
'parse_embedded_audio_track': 'False',
'skip_hashing': 'False'
},
'auth': {
'type': 'None',
@ -169,6 +170,10 @@ defaults = {
'subf2m': {
'verify_ssl': 'True'
},
'whisperai': {
'endpoint': 'http://127.0.0.1:9000',
'timeout': '3600'
},
'legendasdivx': {
'username': '',
'password': '',
@ -275,7 +280,7 @@ defaults = {
'database': '',
'username': '',
'password': '',
}
},
}
settings = SimpleConfigParser(defaults=defaults, interpolation=None)

View File

@ -22,7 +22,7 @@ from .get_args import args
logger = logging.getLogger(__name__)
postgresql = settings.postgresql.getboolean('enabled')
postgresql = (os.getenv("POSTGRES_ENABLED", settings.postgresql.enabled).lower() == 'true')
region = make_region().configure('dogpile.cache.memory')
@ -32,14 +32,19 @@ if postgresql:
(OperationalError, 'server closed the connection unexpectedly'),
)
postgres_database = os.getenv("POSTGRES_DATABASE", settings.postgresql.database)
postgres_username = os.getenv("POSTGRES_USERNAME", settings.postgresql.username)
postgres_password = os.getenv("POSTGRES_PASSWORD", settings.postgresql.password)
postgres_host = os.getenv("POSTGRES_HOST", settings.postgresql.host)
postgres_port = os.getenv("POSTGRES_PORT", settings.postgresql.port)
logger.debug(
f"Connecting to PostgreSQL database: {settings.postgresql.host}:{settings.postgresql.port}/{settings.postgresql.database}")
database = ReconnectPostgresqlDatabase(settings.postgresql.database,
user=settings.postgresql.username,
password=settings.postgresql.password,
host=settings.postgresql.host,
port=settings.postgresql.port,
f"Connecting to PostgreSQL database: {postgres_host}:{postgres_port}/{postgres_database}")
database = ReconnectPostgresqlDatabase(postgres_database,
user=postgres_username,
password=postgres_password,
host=postgres_host,
port=postgres_port,
autocommit=True,
autorollback=True,
autoconnect=True,

View File

@ -252,6 +252,10 @@ def get_providers_auth():
'subf2m': {
'verify_ssl': settings.subf2m.getboolean('verify_ssl')
},
'whisperai': {
'endpoint': settings.whisperai.endpoint,
'timeout': settings.whisperai.timeout
}
}

View File

@ -125,6 +125,7 @@ def configure_logging(debug=False):
logging.getLogger("srt").setLevel(logging.ERROR)
logging.getLogger("SignalRCoreClient").setLevel(logging.CRITICAL)
logging.getLogger("websocket").setLevel(logging.CRITICAL)
logging.getLogger("ga4mp.ga4mp").setLevel(logging.ERROR)
logging.getLogger("waitress").setLevel(logging.ERROR)
logging.getLogger("knowit").setLevel(logging.CRITICAL)

View File

@ -10,7 +10,6 @@ import time
import rarfile
from dogpile.cache.region import register_backend as register_cache_backend
from subliminal_patch.extensions import provider_registry
from app.config import settings, configure_captcha_func, get_array_from
from app.get_args import args
@ -57,6 +56,9 @@ os.environ["SZ_HI_EXTENSION"] = settings.general.hi_extension
# set anti-captcha provider and key
configure_captcha_func()
# import Google Analytics module to make sure logging is properly configured afterwards
from ga4mp import GtagMP # noqa E402
# configure logging
configure_logging(settings.general.getboolean('debug') or args.debug)
import logging # noqa E402
@ -73,7 +75,7 @@ def is_virtualenv():
# deploy requirements.txt
if not args.no_update:
try:
import lxml, numpy, webrtcvad, setuptools # noqa E401
import lxml, numpy, webrtcvad, setuptools, PIL # noqa E401
except ImportError:
try:
import pip # noqa W0611
@ -200,6 +202,7 @@ with open(os.path.normpath(os.path.join(args.config_dir, 'config', 'config.ini')
# Remove deprecated providers from enabled providers in config.ini
from subliminal_patch.extensions import provider_registry # noqa E401
existing_providers = provider_registry.names()
enabled_providers = get_array_from(settings.general.enabled_providers)
settings.general.enabled_providers = str([x for x in enabled_providers if x in existing_providers])

View File

@ -59,6 +59,17 @@ class GetRadarrInfo:
else:
return False
def is_deprecated(self):
"""
Call self.version() and parse the result to determine if it's a deprecated version of Radarr
@return: bool
"""
radarr_version = self.version()
if radarr_version.startswith(('0.', '3.')):
return True
else:
return False
get_radarr_info = GetRadarrInfo()

View File

@ -59,6 +59,17 @@ class GetSonarrInfo:
else:
return False
def is_deprecated(self):
"""
Call self.version() and parse the result to determine if it's a deprecated version of Sonarr
@return: bool
"""
sonarr_version = self.version()
if sonarr_version.startswith(('0.', '2.')):
return True
else:
return False
get_sonarr_info = GetSonarrInfo()

View File

@ -83,8 +83,12 @@ def manual_search(path, profile_id, providers, sceneName, title, media_type):
can_verify_series = False
if can_verify_series and not {"series", "season", "episode"}.issubset(matches):
logging.debug(f"BAZARR Skipping {s}, because it doesn't match our series/episode")
continue
try:
logging.debug(f"BAZARR Skipping {s}, because it doesn't match our series/episode")
except TypeError:
logging.debug("BAZARR Ignoring invalid subtitles")
finally:
continue
initial_hi = None
initial_hi_match = False

View File

@ -8,7 +8,7 @@ from utilities.path_mappings import path_mappings
from utilities.post_processing import pp_replace, set_chmod
from languages.get_languages import alpha2_from_alpha3, alpha2_from_language, alpha3_from_language, language_from_alpha3
from app.database import TableEpisodes, TableMovies
from utilities.analytics import track_event
from utilities.analytics import event_tracker
from radarr.notify import notify_radarr
from sonarr.notify import notify_sonarr
from app.event_handler import event_stream
@ -135,7 +135,7 @@ def process_subtitle(subtitle, media_type, audio_language, path, max_score, is_u
notify_radarr(movie_metadata['radarrId'])
event_stream(type='movie-wanted', action='delete', payload=movie_metadata['radarrId'])
track_event(category=downloaded_provider, action=action, label=downloaded_language)
event_tracker.track(provider=downloaded_provider, action=action, language=downloaded_language)
return ProcessSubtitlesResult(message=message,
reversed_path=reversed_path,

View File

@ -138,6 +138,9 @@ def parse_upgradable_list(upgradable_list, perfect_score, media_type):
for item in upgradable_list:
logging.debug(f"Trying to validate eligibility to upgrade for this subtitles: "
f"{item['subtitles_path']}")
if not os.path.exists(path_replace_method(item['subtitles_path'])):
logging.debug("Subtitles file doesn't exists anymore, we skip this one.")
continue
if (item['video_path'], item['language']) in \
[(x['video_path'], x['language']) for x in items_to_upgrade]:
logging.debug("Newer video path and subtitles language combination already in list of subtitles to "

View File

@ -7,6 +7,7 @@ import os
from subzero.language import Language
from subzero.video import parse_video
from app.config import settings
from languages.custom_lang import CustomLanguage
from app.database import get_profiles_list
from subtitles.tools.score import movie_score, series_score
@ -36,7 +37,8 @@ def get_video(path, title, sceneName, providers=None, media_type="movie"):
hash_from = original_path
try:
video = parse_video(path, hints=hints, providers=providers, dry_run=used_scene_name,
skip_hashing = settings.general.getboolean('skip_hashing')
video = parse_video(path, hints=hints, skip_hashing=skip_hashing, dry_run=used_scene_name, providers=providers,
hash_from=hash_from)
video.used_scene_name = used_scene_name
video.original_name = original_name

View File

@ -1,70 +1,64 @@
# coding=utf-8
import pickle
import random
import platform
import os
import logging
import codecs
from pyga.requests import Event, Tracker, Session, Visitor, Config
from pyga.entities import CustomVariable
from ga4mp import GtagMP
from app.get_args import args
from app.config import settings
from radarr.info import get_radarr_info
from sonarr.info import get_sonarr_info
sonarr_version = get_sonarr_info.version()
radarr_version = get_radarr_info.version()
class EventTracker:
def __init__(self):
self.bazarr_version = os.environ["BAZARR_VERSION"].lstrip('v')
self.os_version = platform.python_version()
self.sonarr_version = get_sonarr_info.version()
self.radarr_version = get_radarr_info.version()
self.python_version = platform.platform()
def track_event(category=None, action=None, label=None):
if not settings.analytics.getboolean('enabled'):
return
self.tracker = None
anonymousConfig = Config()
anonymousConfig.anonimize_ip_address = True
self.start_tracker()
tracker = Tracker('UA-138214134-3', 'none', conf=anonymousConfig)
def start_tracker(self):
self.tracker = GtagMP(api_secret="qHRaseheRsic6-h2I_rIAA", measurement_id="G-3820T18GE3", client_id="temp")
try:
if os.path.isfile(os.path.normpath(os.path.join(args.config_dir, 'config', 'analytics.dat'))):
with open(os.path.normpath(os.path.join(args.config_dir, 'config', 'analytics.dat')), 'r') as handle:
visitor_text = handle.read()
visitor = pickle.loads(codecs.decode(visitor_text.encode(), "base64"))
if visitor.user_agent is None:
visitor.user_agent = os.environ.get("SZ_USER_AGENT")
if visitor.unique_id > int(0x7fffffff):
visitor.unique_id = random.randint(0, 0x7fffffff)
if not os.path.isfile(os.path.normpath(os.path.join(args.config_dir, 'config', 'analytics_visitor_id.txt'))):
visitor_id = self.tracker.random_client_id()
with open(os.path.normpath(os.path.join(args.config_dir, 'config', 'analytics_visitor_id.txt')), 'w+') \
as handle:
handle.write(str(visitor_id))
else:
visitor = Visitor()
visitor.unique_id = random.randint(0, 0x7fffffff)
except Exception:
visitor = Visitor()
visitor.unique_id = random.randint(0, 0x7fffffff)
with open(os.path.normpath(os.path.join(args.config_dir, 'config', 'analytics_visitor_id.txt')), 'r') as \
handle:
visitor_id = handle.read()
session = Session()
event = Event(category=category, action=action, label=label, value=1)
self.tracker.client_id = visitor_id
tracker.add_custom_variable(CustomVariable(index=1, name='BazarrVersion',
value=os.environ["BAZARR_VERSION"].lstrip('v'), scope=1))
tracker.add_custom_variable(CustomVariable(index=2, name='PythonVersion', value=platform.python_version(), scope=1))
if settings.general.getboolean('use_sonarr'):
tracker.add_custom_variable(CustomVariable(index=3, name='SonarrVersion', value=sonarr_version, scope=1))
else:
tracker.add_custom_variable(CustomVariable(index=3, name='SonarrVersion', value='unused', scope=1))
if settings.general.getboolean('use_radarr'):
tracker.add_custom_variable(CustomVariable(index=4, name='RadarrVersion', value=radarr_version, scope=1))
else:
tracker.add_custom_variable(CustomVariable(index=4, name='RadarrVersion', value='unused', scope=1))
tracker.add_custom_variable(CustomVariable(index=5, name='OSVersion', value=platform.platform(), scope=1))
self.tracker.store.set_user_property(name="BazarrVersion", value=self.bazarr_version)
self.tracker.store.set_user_property(name="PythonVersion", value=self.os_version)
self.tracker.store.set_user_property(name="SonarrVersion", value=self.sonarr_version)
self.tracker.store.set_user_property(name="RadarrVersion", value=self.radarr_version)
self.tracker.store.set_user_property(name="OSVersion", value=self.python_version)
try:
tracker.track_event(event, session, visitor)
except Exception:
logging.debug("BAZARR unable to track event.")
pass
else:
with open(os.path.normpath(os.path.join(args.config_dir, 'config', 'analytics.dat')), 'w+') as handle:
handle.write(codecs.encode(pickle.dumps(visitor), "base64").decode())
self.tracker.store.save()
def track(self, provider, action, language):
subtitles_event = self.tracker.create_new_event(name="subtitles")
subtitles_event.set_event_param(name="subtitles_provider", value=provider)
subtitles_event.set_event_param(name="subtitles_action", value=action)
subtitles_event.set_event_param(name="subtitles_language", value=language)
try:
self.tracker.send(events=[subtitles_event])
except Exception:
logging.debug("BAZARR unable to track event.")
else:
self.tracker.store.save()
event_tracker = EventTracker()

View File

@ -158,7 +158,9 @@ def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=No
# or if we have mediainfo available
elif mediainfo_path:
try:
data["mediainfo"] = know(video_path=file, context={"provider": "mediainfo", "mediainfo": mediainfo_path})
# disabling mediainfo path temporarily until issue with knowit is fixed.
# data["mediainfo"] = know(video_path=file, context={"provider": "mediainfo", "mediainfo": mediainfo_path})
data["mediainfo"] = know(video_path=file, context={"provider": "mediainfo"})
except KnowitException as e:
logging.error(f"BAZARR mediainfo cannot analyze this video file {file}. Could it be corrupted? {e}")
return None

11074
frontend/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -44,7 +44,7 @@
"@types/react-table": "^7.7.0",
"@vitejs/plugin-react": "^2.2.0",
"@vitest/coverage-c8": "^0.25.0",
"@vitest/ui": "^0.25.0",
"@vitest/ui": "^0.29.1",
"clsx": "^1.2.0",
"eslint": "^8.26.0",
"eslint-config-react-app": "^7.0.1",

View File

@ -77,7 +77,11 @@ export function useEpisodeAddBlacklist() {
},
{
onSuccess: (_, { seriesId, episodeId }) => {
client.invalidateQueries([QueryKeys.Series, QueryKeys.Blacklist]);
client.invalidateQueries([
QueryKeys.Series,
QueryKeys.Episodes,
QueryKeys.Blacklist,
]);
client.invalidateQueries([QueryKeys.Series, seriesId]);
},
}
@ -92,7 +96,11 @@ export function useEpisodeDeleteBlacklist() {
api.episodes.deleteBlacklist(param.all, param.form),
{
onSuccess: (_, param) => {
client.invalidateQueries([QueryKeys.Series, QueryKeys.Blacklist]);
client.invalidateQueries([
QueryKeys.Series,
QueryKeys.Episodes,
QueryKeys.Blacklist,
]);
},
}
);

View File

@ -11,4 +11,6 @@ class BadgesApi extends BaseApi {
}
}
export default new BadgesApi();
const badgesApi = new BadgesApi();
export default badgesApi;

View File

@ -94,4 +94,5 @@ class BazarrClient {
}
}
export default new BazarrClient();
const client = new BazarrClient();
export default client;

View File

@ -95,4 +95,5 @@ class EpisodeApi extends BaseApi {
}
}
export default new EpisodeApi();
const episodeApi = new EpisodeApi();
export default episodeApi;

View File

@ -23,4 +23,5 @@ class FilesApi extends BaseApi {
}
}
export default new FilesApi();
const filesApi = new FilesApi();
export default filesApi;

View File

@ -21,4 +21,5 @@ class HistoryApi extends BaseApi {
}
}
export default new HistoryApi();
const historyApi = new HistoryApi();
export default historyApi;

View File

@ -90,4 +90,5 @@ class MovieApi extends BaseApi {
}
}
export default new MovieApi();
const movieApi = new MovieApi();
export default movieApi;

View File

@ -47,4 +47,5 @@ class ProviderApi extends BaseApi {
}
}
export default new ProviderApi();
const providerApi = new ProviderApi();
export default providerApi;

View File

@ -29,4 +29,5 @@ class SeriesApi extends BaseApi {
}
}
export default new SeriesApi();
const seriesApi = new SeriesApi();
export default seriesApi;

View File

@ -17,4 +17,5 @@ class SubtitlesApi extends BaseApi {
}
}
export default new SubtitlesApi();
const subtitlesApi = new SubtitlesApi();
export default subtitlesApi;

View File

@ -119,4 +119,5 @@ class SystemApi extends BaseApi {
}
}
export default new SystemApi();
const systemApi = new SystemApi();
export default systemApi;

View File

@ -33,4 +33,5 @@ class RequestUtils {
}
}
export default new RequestUtils();
const requestUtils = new RequestUtils();
export default requestUtils;

View File

@ -168,4 +168,6 @@ class SocketIOClient {
}
}
export default new SocketIOClient();
const socketIOClient = new SocketIOClient();
export default socketIOClient;

View File

@ -206,6 +206,25 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
],
},
{ key: "napiprojekt", description: "Polish Subtitles Provider" },
{
key: "whisperai",
name: "Whisper",
description: "AI Generated Subtitles powered by Whisper",
inputs: [
{
type: "text",
key: "endpoint",
defaultValue: "http://127.0.0.1:9000",
name: "Whisper ASR Docker Endpoint",
},
{
type: "text",
key: "timeout",
defaultValue: 3600,
name: "Transcription/translation timeout in seconds",
},
],
},
{
key: "napisy24",
description: "Polish Subtitles Provider",
@ -429,5 +448,9 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
description: "Bulgarian Subtitles Provider",
},
{ key: "yifysubtitles", name: "YIFY Subtitles" },
{ key: "zimuku", description: "Chinese Subtitles Provider" },
{
key: "zimuku",
name: "Zimuku",
description: "Chinese Subtitles Provider. Anti-captcha required",
},
];

View File

@ -249,7 +249,7 @@ const SettingsSubtitlesView: FunctionComponent = () => {
<Message>
Use embedded subtitles in media files when determining missing ones.
</Message>
<CollapseBox settingKey="settings-general-use_embedded_subs">
<CollapseBox indent settingKey="settings-general-use_embedded_subs">
<Check
label="Ignore Embedded PGS Subtitles"
settingKey="settings-general-ignore_pgs_subs"
@ -288,6 +288,15 @@ const SettingsSubtitlesView: FunctionComponent = () => {
></Selector>
<Message>Embedded subtitles video parser</Message>
</CollapseBox>
<Check
label="Skip video file hash calculation"
settingKey="settings-general-skip_hashing"
></Check>
<Message>
Skip video file hashing during search process to prevent sleeping hard
disk drive from waking-up. On the other hand, this may decrease your
search results scores.
</Message>
</Section>
<Section header="Post-Processing">
<Check
@ -369,7 +378,7 @@ const SettingsSubtitlesView: FunctionComponent = () => {
label="Permission (chmod)"
settingKey="settings-general-chmod_enabled"
></Check>
<CollapseBox settingKey="settings-general-chmod_enabled">
<CollapseBox indent settingKey="settings-general-chmod_enabled">
<Text placeholder="0777" settingKey="settings-general-chmod"></Text>
<Message>Must be 4 digit octal</Message>
</CollapseBox>
@ -389,7 +398,7 @@ const SettingsSubtitlesView: FunctionComponent = () => {
Enable the automatic subtitles synchronization after downloading a
subtitles.
</Message>
<CollapseBox settingKey="settings-subsync-use_subsync">
<CollapseBox indent settingKey="settings-subsync-use_subsync">
<Check label="Debug" settingKey="settings-subsync-debug"></Check>
<Message>
Do not actually sync the subtitles but generate a .tar.gz file to be
@ -418,7 +427,7 @@ const SettingsSubtitlesView: FunctionComponent = () => {
<Message>
Enable the post-processing execution after downloading a subtitles.
</Message>
<CollapseBox settingKey="settings-general-use_postprocessing">
<CollapseBox indent settingKey="settings-general-use_postprocessing">
<Check
settingKey="settings-general-use_postprocessing_threshold"
label="Series Score Threshold"

View File

@ -0,0 +1 @@
__version__ = "0.5.0"

View File

@ -1,4 +1,5 @@
import codecs
from collections.abc import MutableMapping
import logging
import os
import pickle
@ -7,14 +8,6 @@ import tempfile
import appdirs
try:
from collections.abc import MutableMapping
unicode = str
except ImportError:
# Python 2 imports
from collections import MutableMapping
FileNotFoundError = IOError
from .posixemulation import rename
logger = logging.getLogger(__name__)
@ -33,14 +26,14 @@ class FileCache(MutableMapping):
.. NOTE::
Keys and values are always stored as :class:`bytes` objects. If data
serialization is enabled, keys are returned as :class:`str` or
:class:`unicode` objects.
serialization is enabled, keys are returned as :class:`str` objects.
If data serialization is disabled, keys are returned as a
:class:`bytes` object.
:param str appname: The app/script the cache should be associated with.
:param str flag: How the cache should be opened. See below for details.
:param mode: The Unix mode for the cache files.
:param mode: The Unix mode for the cache files or False to prevent changing
permissions.
:param str keyencoding: The encoding the keys use, defaults to 'utf-8'.
This is used if *serialize* is ``False``; the keys are treated as
:class:`bytes` objects.
@ -85,57 +78,66 @@ class FileCache(MutableMapping):
"""
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
serialize=True, app_cache_dir=None):
def __init__(
self,
appname,
flag="c",
mode=0o666,
keyencoding="utf-8",
serialize=True,
app_cache_dir=None,
):
"""Initialize a :class:`FileCache` object."""
if not isinstance(flag, str):
raise TypeError("flag must be str not '{}'".format(type(flag)))
elif flag[0] not in 'rwcn':
raise ValueError("invalid flag: '{}', first flag must be one of "
"'r', 'w', 'c' or 'n'".format(flag))
elif len(flag) > 1 and flag[1] != 's':
raise ValueError("invalid flag: '{}', second flag must be "
"'s'".format(flag))
elif flag[0] not in "rwcn":
raise ValueError(
"invalid flag: '{}', first flag must be one of "
"'r', 'w', 'c' or 'n'".format(flag)
)
elif len(flag) > 1 and flag[1] != "s":
raise ValueError(
"invalid flag: '{}', second flag must be " "'s'".format(flag)
)
appname, subcache = self._parse_appname(appname)
if 'cache' in subcache:
if "cache" in subcache:
raise ValueError("invalid subcache name: 'cache'.")
self._is_subcache = bool(subcache)
if not app_cache_dir:
app_cache_dir = appdirs.user_cache_dir(appname, appname)
subcache_dir = os.path.join(app_cache_dir, *subcache)
self.cache_dir = os.path.join(subcache_dir, 'cache')
self.cache_dir = os.path.join(subcache_dir, "cache")
exists = os.path.exists(self.cache_dir)
if len(flag) > 1 and flag[1] == 's':
if len(flag) > 1 and flag[1] == "s":
self._sync = True
else:
self._sync = False
self._buffer = {}
if exists and 'n' in flag:
if exists and "n" in flag:
self.clear()
self.create()
elif not exists and ('c' in flag or 'n' in flag):
elif not exists and ("c" in flag or "n" in flag):
self.create()
elif not exists:
raise FileNotFoundError("no such directory: '{}'".format(
self.cache_dir))
raise FileNotFoundError("no such directory: '{}'".format(self.cache_dir))
self._flag = 'rb' if 'r' in flag else 'wb'
self._flag = "rb" if "r" in flag else "wb"
self._mode = mode
self._keyencoding = keyencoding
self._serialize = serialize
def _parse_appname(self, appname):
"""Splits an appname into the appname and subcache components."""
components = appname.split('.')
components = appname.split(".")
return components[0], components[1:]
def create(self):
"""Create the write buffer and cache directory."""
if not self._sync and not hasattr(self, '_buffer'):
if not self._sync and not hasattr(self, "_buffer"):
self._buffer = {}
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
@ -195,11 +197,11 @@ class FileCache(MutableMapping):
:class:`str`.
"""
if isinstance(key, str) or isinstance(key, unicode):
if isinstance(key, str):
key = key.encode(self._keyencoding)
elif not isinstance(key, bytes):
raise TypeError("key must be bytes or str")
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
return codecs.encode(key, "hex_codec").decode(self._keyencoding)
def _decode_key(self, key):
"""Decode key using hex_codec to retrieve the original key.
@ -208,7 +210,7 @@ class FileCache(MutableMapping):
Keys are returned as :class:`bytes` if serialization is disabled.
"""
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
bkey = codecs.decode(key.encode(self._keyencoding), "hex_codec")
return bkey.decode(self._keyencoding) if self._serialize else bkey
def _dumps(self, value):
@ -228,8 +230,10 @@ class FileCache(MutableMapping):
def _all_filenames(self):
"""Return a list of absolute cache filenames"""
try:
return [os.path.join(self.cache_dir, filename) for filename in
os.listdir(self.cache_dir)]
return [
os.path.join(self.cache_dir, filename)
for filename in os.listdir(self.cache_dir)
]
except (FileNotFoundError, OSError):
return []
@ -247,16 +251,13 @@ class FileCache(MutableMapping):
with os.fdopen(fh, self._flag) as f:
f.write(self._dumps(bytesvalue))
rename(tmp, filename)
os.chmod(filename, self._mode)
if self._mode:
os.chmod(filename, self._mode)
def _read_from_file(self, filename):
"""Read data from filename."""
try:
with open(filename, 'rb') as f:
return self._loads(f.read())
except (IOError, OSError):
logger.warning('Error opening file: {}'.format(filename))
raise
with open(filename, "rb") as f:
return self._loads(f.read())
def __setitem__(self, key, value):
ekey = self._encode_key(key)
@ -280,17 +281,17 @@ class FileCache(MutableMapping):
def __delitem__(self, key):
ekey = self._encode_key(key)
filename = self._key_to_filename(ekey)
found_in_buffer = hasattr(self, "_buffer") and ekey in self._buffer
if not self._sync:
try:
del self._buffer[ekey]
except KeyError:
if filename not in self._all_filenames():
raise KeyError(key)
try:
pass
filename = self._key_to_filename(ekey)
if filename in self._all_filenames():
os.remove(filename)
except (IOError, OSError):
pass
elif not found_in_buffer:
raise KeyError(key)
def __iter__(self):
for key in self._all_keys():
@ -302,3 +303,9 @@ class FileCache(MutableMapping):
def __contains__(self, key):
ekey = self._encode_key(key)
return ekey in self._all_keys()
def __enter__(self):
return self
def __exit__(self, type_, value, traceback):
self.close()

View File

@ -46,7 +46,15 @@ if os.name == 'nt': # pragma: no cover
dst = unicode(dst, sys.getfilesystemencoding())
if _rename_atomic(src, dst):
return True
return _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING | _MOVEFILE_WRITE_THROUGH)
retry = 0
rv = False
while not rv and retry < 100:
rv = _MoveFileEx(src, dst, _MOVEFILE_REPLACE_EXISTING |
_MOVEFILE_WRITE_THROUGH)
if not rv:
time.sleep(0.001)
retry += 1
return rv
# new in Vista and Windows Server 2008
_CreateTransaction = ctypes.windll.ktmw32.CreateTransaction
@ -60,11 +68,18 @@ if os.name == 'nt': # pragma: no cover
if ta == -1:
return False
try:
rv = _MoveFileTransacted(src, dst, None, None,
_MOVEFILE_REPLACE_EXISTING |
_MOVEFILE_WRITE_THROUGH, ta)
if rv:
rv = _CommitTransaction(ta)
retry = 0
rv = False
while not rv and retry < 100:
rv = _MoveFileTransacted(src, dst, None, None,
_MOVEFILE_REPLACE_EXISTING |
_MOVEFILE_WRITE_THROUGH, ta)
if rv:
rv = _CommitTransaction(ta)
break
else:
time.sleep(0.001)
retry += 1
return rv
finally:
_CloseHandle(ta)
@ -82,8 +97,8 @@ if os.name == 'nt': # pragma: no cover
if e.errno != errno.EEXIST:
raise
old = "%s-%08x" % (dst, random.randint(0, sys.maxint))
os.rename(dst, old)
os.rename(src, dst)
shutil.move(dst, old)
shutil.move(src, dst)
try:
os.unlink(old)
except Exception:

3
libs/ga4mp/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from ga4mp.ga4mp import GtagMP, FirebaseMP
__all__ = ['GtagMP','FirebaseMP']

44
libs/ga4mp/event.py Normal file
View File

@ -0,0 +1,44 @@
from ga4mp.item import Item
class Event(dict):
def __init__(self, name):
self.set_event_name(name)
def set_event_name(self, name):
if len(name) > 40:
raise ValueError("Event name cannot exceed 40 characters.")
self["name"] = name
def get_event_name(self):
return self.get("name")
def set_event_param(self, name, value):
# Series of checks to comply with GA4 event collection limits: https://support.google.com/analytics/answer/9267744
if len(name) > 40:
raise ValueError("Event parameter name cannot exceed 40 characters.")
if name in ["page_location", "page_referrer", "page_title"] and len(str(value)) > 300:
raise ValueError("Event parameter value for page info cannot exceed 300 characters.")
if name not in ["page_location", "page_referrer", "page_title"] and len(str(value)) > 100:
raise ValueError("Event parameter value cannot exceed 100 characters.")
if "params" not in self.keys():
self["params"] = {}
if len(self["params"]) >= 100:
raise RuntimeError("Event cannot contain more than 100 parameters.")
self["params"][name] = value
def get_event_params(self):
return self.get("params")
def delete_event_param(self, name):
# Since only 25 event parameters are allowed, this will allow the user to delete a parameter if necessary.
self["params"].pop(name, None)
def create_new_item(self, item_id=None, item_name=None):
return Item(item_id=item_id, item_name=item_name)
def add_item_to_event(self, item):
if not isinstance(item, dict):
raise ValueError("'item' must be an instance of a dictionary.")
if "items" not in self["params"].keys():
self.set_event_param("items", [])
self["params"]["items"].append(item)

416
libs/ga4mp/ga4mp.py Normal file
View File

@ -0,0 +1,416 @@
###############################################################################
# Google Analytics 4 Measurement Protocol for Python
# Copyright (c) 2022, Adswerve
#
# This project is free software, distributed under the BSD license.
# Adswerve offers consulting and integration services if your firm needs
# assistance in strategy, implementation, or auditing existing work.
###############################################################################
import json
import logging
import urllib.request
import time
import datetime
import random
from ga4mp.utils import params_dict
from ga4mp.event import Event
from ga4mp.store import BaseStore, DictStore
import os, sys
sys.path.append(
os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class BaseGa4mp(object):
"""
Parent class that provides an interface for sending data to Google Analytics, supporting the GA4 Measurement Protocol.
Parameters
----------
api_secret : string
Generated through the Google Analytics UI. To create a new secret, navigate in the Google Analytics UI to: Admin > Data Streams >
[choose your stream] > Measurement Protocol API Secrets > Create
See Also
--------
* Measurement Protocol (Google Analytics 4): https://developers.google.com/analytics/devguides/collection/protocol/ga4
Examples
--------
# Initialize tracking object for gtag usage
>>> ga = gtagMP(api_secret = "API_SECRET", measurement_id = "MEASUREMENT_ID", client_id="CLIENT_ID")
# Initialize tracking object for Firebase usage
>>> ga = firebaseMP(api_secret = "API_SECRET", firebase_app_id = "FIREBASE_APP_ID", app_instance_id="APP_INSTANCE_ID")
# Build an event
>>> event_type = 'new_custom_event'
>>> event_parameters = {'parameter_key_1': 'parameter_1', 'parameter_key_2': 'parameter_2'}
>>> event = {'name': event_type, 'params': event_parameters }
>>> events = [event]
# Send a custom event to GA4 immediately
>>> ga.send(events)
# Postponed send of a custom event to GA4
>>> ga.send(events, postpone=True)
>>> ga.postponed_send()
"""
def __init__(self, api_secret, store: BaseStore = None):
self._initialization_time = time.time() # used for both session_id and calculating engagement time
self.api_secret = api_secret
self._event_list = []
assert store is None or isinstance(store, BaseStore), "if supplied, store must be an instance of BaseStore"
self.store = store or DictStore()
self._check_store_requirements()
self._base_domain = "https://www.google-analytics.com/mp/collect"
self._validation_domain = "https://www.google-analytics.com/debug/mp/collect"
def _check_store_requirements(self):
# Store must contain "session_id" and "last_interaction_time_msec" in order for tracking to work properly.
if self.store.get_session_parameter("session_id") is None:
self.store.set_session_parameter(name="session_id", value=int(self._initialization_time))
# Note: "last_interaction_time_msec" factors into the required "engagement_time_msec" event parameter.
self.store.set_session_parameter(name="last_interaction_time_msec", value=int(self._initialization_time * 1000))
def create_new_event(self, name):
return Event(name=name)
def send(self, events, validation_hit=False, postpone=False, date=None):
"""
Method to send an http post request to google analytics with the specified events.
Parameters
----------
events : List[Dict]
A list of dictionaries of the events to be sent to Google Analytics. The list of dictionaries should adhere
to the following format:
[{'name': 'level_end',
'params' : {'level_name': 'First',
'success': 'True'}
},
{'name': 'level_up',
'params': {'character': 'John Madden',
'level': 'First'}
}]
validation_hit : bool, optional
Boolean to depict if events should be tested against the Measurement Protocol Validation Server, by default False
postpone : bool, optional
Boolean to depict if provided event list should be postponed, by default False
date : datetime
Python datetime object for sending a historical event at the given date. Date cannot be in the future.
"""
# check for any missing or invalid parameters among automatically collected and recommended event types
self._check_params(events)
self._check_date_not_in_future(date)
self._add_session_id_and_engagement_time(events)
if postpone is True:
# build event list to send later
for event in events:
event["_timestamp_micros"] = self._get_timestamp(time.time())
self._event_list.append(event)
else:
# batch events into sets of 25 events, the maximum allowed.
batched_event_list = [
events[event : event + 25] for event in range(0, len(events), 25)
]
# send http post request
self._http_post(
batched_event_list, validation_hit=validation_hit, date=date
)
def postponed_send(self):
"""
Method to send the events provided to Ga4mp.send(events,postpone=True)
"""
for event in self._event_list:
self._http_post([event], postpone=True)
# clear event_list for future use
self._event_list = []
def append_event_to_params_dict(self, new_name_and_parameters):
"""
Method to append event name and parameters key-value pairing(s) to parameters dictionary.
Parameters
----------
new_name_and_parameters : Dict
A dictionary with one key-value pair representing a new type of event to be sent to Google Analytics.
The dictionary should adhere to the following format:
{'new_name': ['new_param_1', 'new_param_2', 'new_param_3']}
"""
params_dict.update(new_name_and_parameters)
def _http_post(self, batched_event_list, validation_hit=False, postpone=False, date=None):
"""
Method to send http POST request to google-analytics.
Parameters
----------
batched_event_list : List[List[Dict]]
List of List of events. Places initial event payload into a list to send http POST in batches.
validation_hit : bool, optional
Boolean to depict if events should be tested against the Measurement Protocol Validation Server, by default False
postpone : bool, optional
Boolean to depict if provided event list should be postponed, by default False
date : datetime
Python datetime object for sending a historical event at the given date. Date cannot be in the future.
Timestamp micros supports up to 48 hours of backdating.
If date is specified, postpone must be False or an assertion will be thrown.
"""
self._check_date_not_in_future(date)
status_code = None # Default set to know if batch loop does not work and to bound status_code
# set domain
domain = self._base_domain
if validation_hit is True:
domain = self._validation_domain
logger.info(f"Sending POST to: {domain}")
# loop through events in batches of 25
batch_number = 1
for batch in batched_event_list:
# url and request slightly differ by subclass
url = self._build_url(domain=domain)
request = self._build_request(batch=batch)
self._add_user_props_to_hit(request)
# make adjustments for postponed hit
request["events"] = (
{"name": batch["name"], "params": batch["params"]}
if (postpone)
else batch
)
if date is not None:
logger.info(f"Setting event timestamp to: {date}")
assert (
postpone is False
), "Cannot send postponed historical hit, ensure postpone=False"
ts = self._datetime_to_timestamp(date)
ts_micro = self._get_timestamp(ts)
request["timestamp_micros"] = int(ts_micro)
logger.info(f"Timestamp of request is: {request['timestamp_micros']}")
if postpone:
# add timestamp to hit
request["timestamp_micros"] = batch["_timestamp_micros"]
req = urllib.request.Request(url)
req.add_header("Content-Type", "application/json; charset=utf-8")
jsondata = json.dumps(request)
json_data_as_bytes = jsondata.encode("utf-8") # needs to be bytes
req.add_header("Content-Length", len(json_data_as_bytes))
result = urllib.request.urlopen(req, json_data_as_bytes)
status_code = result.status
logger.info(f"Batch Number: {batch_number}")
logger.info(f"Status code: {status_code}")
batch_number += 1
return status_code
def _check_params(self, events):
"""
Method to check whether the provided event payload parameters align with supported parameters.
Parameters
----------
events : List[Dict]
A list of dictionaries of the events to be sent to Google Analytics. The list of dictionaries should adhere
to the following format:
[{'name': 'level_end',
'params' : {'level_name': 'First',
'success': 'True'}
},
{'name': 'level_up',
'params': {'character': 'John Madden',
'level': 'First'}
}]
"""
# check to make sure it's a list of dictionaries with the right keys
assert type(events) == list, "events should be a list"
for event in events:
assert isinstance(event, dict), "each event should be an instance of a dictionary"
assert "name" in event, 'each event should have a "name" key'
assert "params" in event, 'each event should have a "params" key'
# check for any missing or invalid parameters
for e in events:
event_name = e["name"]
event_params = e["params"]
if event_name in params_dict.keys():
for parameter in params_dict[event_name]:
if parameter not in event_params.keys():
logger.warning(
f"WARNING: Event parameters do not match event type.\nFor {event_name} event type, the correct parameter(s) are {params_dict[event_name]}.\nThe parameter '{parameter}' triggered this warning.\nFor a breakdown of currently supported event types and their parameters go here: https://support.google.com/analytics/answer/9267735\n"
)
def _add_session_id_and_engagement_time(self, events):
"""
Method to add the session_id and engagement_time_msec parameter to all events.
"""
for event in events:
current_time_in_milliseconds = int(time.time() * 1000)
event_params = event["params"]
if "session_id" not in event_params.keys():
event_params["session_id"] = self.store.get_session_parameter("session_id")
if "engagement_time_msec" not in event_params.keys():
last_interaction_time = self.store.get_session_parameter("last_interaction_time_msec")
event_params["engagement_time_msec"] = current_time_in_milliseconds - last_interaction_time if current_time_in_milliseconds > last_interaction_time else 0
self.store.set_session_parameter(name="last_interaction_time_msec", value=current_time_in_milliseconds)
def _add_user_props_to_hit(self, hit):
"""
Method is a helper function to add user properties to outgoing hits.
Parameters
----------
hit : dict
"""
for key in self.store.get_all_user_properties():
try:
if key in ["user_id", "non_personalized_ads"]:
hit.update({key: self.store.get_user_property(key)})
else:
if "user_properties" not in hit.keys():
hit.update({"user_properties": {}})
hit["user_properties"].update(
{key: {"value": self.store.get_user_property(key)}}
)
except:
logger.info(f"Failed to add user property to outgoing hit: {key}")
def _get_timestamp(self, timestamp):
"""
Method returns UNIX timestamp in microseconds for postponed hits.
Parameters
----------
None
"""
return int(timestamp * 1e6)
def _datetime_to_timestamp(self, dt):
"""
Private method to convert a datetime object into a timestamp
Parameters
----------
dt : datetime
A datetime object in any format
Returns
-------
timestamp
A UNIX timestamp in milliseconds
"""
return time.mktime(dt.timetuple())
def _check_date_not_in_future(self, date):
"""
Method to check that provided date is not in the future.
Parameters
----------
date : datetime
Python datetime object
"""
if date is None:
pass
else:
assert (
date <= datetime.datetime.now()
), "Provided date cannot be in the future"
def _build_url(self, domain):
raise NotImplementedError("Subclass should be using this function, but it was called through the base class instead.")
def _build_request(self, batch):
raise NotImplementedError("Subclass should be using this function, but it was called through the base class instead.")
class GtagMP(BaseGa4mp):
"""
Subclass for users of gtag. See `Ga4mp` parent class for examples.
Parameters
----------
measurement_id : string
The identifier for a Data Stream. Found in the Google Analytics UI under: Admin > Data Streams > [choose your stream] > Measurement ID (top-right)
client_id : string
A unique identifier for a client, representing a specific browser/device.
"""
def __init__(self, api_secret, measurement_id, client_id,):
super().__init__(api_secret)
self.measurement_id = measurement_id
self.client_id = client_id
def _build_url(self, domain):
return f"{domain}?measurement_id={self.measurement_id}&api_secret={self.api_secret}"
def _build_request(self, batch):
return {"client_id": self.client_id, "events": batch}
def random_client_id(self):
"""
Utility function for generating a new client ID matching the typical format of 10 random digits and the UNIX timestamp in seconds, joined by a period.
"""
return "%0.10d" % random.randint(0,9999999999) + "." + str(int(time.time()))
class FirebaseMP(BaseGa4mp):
"""
Subclass for users of Firebase. See `Ga4mp` parent class for examples.
Parameters
----------
firebase_app_id : string
The identifier for a Firebase app. Found in the Firebase console under: Project Settings > General > Your Apps > App ID.
app_instance_id : string
A unique identifier for a Firebase app instance.
* Android - getAppInstanceId() - https://firebase.google.com/docs/reference/android/com/google/firebase/analytics/FirebaseAnalytics#public-taskstring-getappinstanceid
* Kotlin - getAppInstanceId() - https://firebase.google.com/docs/reference/kotlin/com/google/firebase/analytics/FirebaseAnalytics#getappinstanceid
* Swift - appInstanceID() - https://firebase.google.com/docs/reference/swift/firebaseanalytics/api/reference/Classes/Analytics#appinstanceid
* Objective-C - appInstanceID - https://firebase.google.com/docs/reference/ios/firebaseanalytics/api/reference/Classes/FIRAnalytics#+appinstanceid
* C++ - GetAnalyticsInstanceId() - https://firebase.google.com/docs/reference/cpp/namespace/firebase/analytics#getanalyticsinstanceid
* Unity - GetAnalyticsInstanceIdAsync() - https://firebase.google.com/docs/reference/unity/class/firebase/analytics/firebase-analytics#getanalyticsinstanceidasync
"""
def __init__(self, api_secret, firebase_app_id, app_instance_id):
super().__init__(api_secret)
self.firebase_app_id = firebase_app_id
self.app_instance_id = app_instance_id
def _build_url(self, domain):
return f"{domain}?firebase_app_id={self.firebase_app_id}&api_secret={self.api_secret}"
def _build_request(self, batch):
return {"app_instance_id": self.app_instance_id, "events": batch}

11
libs/ga4mp/item.py Normal file
View File

@ -0,0 +1,11 @@
class Item(dict):
def __init__(self, item_id=None, item_name=None):
if item_id is None and item_name is None:
raise ValueError("At least one of 'item_id' and 'item_name' is required.")
if item_id is not None:
self.set_parameter("item_id", str(item_id))
if item_name is not None:
self.set_parameter("item_name", item_name)
def set_parameter(self, name, value):
self[name] = value

116
libs/ga4mp/store.py Normal file
View File

@ -0,0 +1,116 @@
import json
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class BaseStore(dict):
def __init__(self):
self.update([("user_properties", {}),("session_parameters", {})])
def save(self):
raise NotImplementedError("Subclass should be using this function, but it was called through the base class instead.")
def _check_exists(self, key):
# Helper function to make sure a key exists before trying to work with values within it.
if key not in self.keys():
self[key] = {}
def _set(self, param_type, name, value):
# Helper function to set a single parameter (user or session or other).
self._check_exists(key=param_type)
self[param_type][name] = value
def _get_one(self, param_type, name):
# Helper function to get a single parameter value (user or session).
self._check_exists(key=param_type)
return self[param_type].get(name, None)
def _get_all(self, param_type=None):
# Helper function to get all user or session parameters - or the entire dictionary if not specified.
if param_type is not None:
return self[param_type]
else:
return self
# While redundant, the following make sure the distinction between session and user items is easier for the end user.
def set_user_property(self, name, value):
self._set(param_type="user_properties", name=name, value=value)
def get_user_property(self, name):
return self._get_one(param_type="user_properties", name=name)
def get_all_user_properties(self):
return self._get_all(param_type="user_properties")
def clear_user_properties(self):
self["user_properties"] = {}
def set_session_parameter(self, name, value):
self._set(param_type="session_parameters", name=name, value=value)
def get_session_parameter(self, name):
return self._get_one(param_type="session_parameters", name=name)
def get_all_session_parameters(self):
return self._get_all(param_type="session_parameters")
def clear_session_parameters(self):
self["session_parameters"] = {}
# Similar functions for other items the user wants to store that don't fit the other two categories.
def set_other_parameter(self, name, value):
self._set(param_type="other", name=name, value=value)
def get_other_parameter(self, name):
return self._get_one(param_type="other", name=name)
def get_all_other_parameters(self):
return self._get_all(param_type="other")
def clear_other_parameters(self):
self["other"] = {}
class DictStore(BaseStore):
# Class for working with dictionaries that persist for the life of the class.
def __init__(self, data: dict = None):
super().__init__()
if data:
self.update(data)
def save(self):
# Give the user back what's in the dictionary so they can decide how to save it.
self._get_all()
class FileStore(BaseStore):
# Class for working with dictionaries that get saved to a JSON file.
def __init__(self, data_location: str = None):
super().__init__()
self.data_location = data_location
try:
self._load_file(data_location)
except:
logger.info(f"Failed to find file at location: {data_location}")
def _load_file(self):
# Function to get data from the object's initialized location.
# If the provided or stored data_location exists, read the file and overwrite the object's contents.
if Path(self.data_location).exists():
with open(self.data_location, "r") as json_file:
self = json.load(json_file)
# If the data_location doesn't exist, try to create a new starter JSON file at the location given.
else:
starter_dict = '{"user_properties":{}, "session_parameters":{}}'
starter_json = json.loads(starter_dict)
Path(self.data_location).touch()
with open(self.data_location, "w") as json_file:
json.dumps(starter_json, json_file)
def save(self):
# Function to save the current dictionary to a JSON file at the object's initialized location.
try:
with open(self.data_location, "w") as outfile:
json.dump(self, outfile)
except:
logger.info(f"Failed to save file at location: {self.data_location}")

392
libs/ga4mp/utils.py Normal file
View File

@ -0,0 +1,392 @@
# all automatically collected and recommended event types
params_dict = {
"ad_click": [
"ad_event_id"
],
"ad_exposure": [
"firebase_screen",
"firebase_screen_id",
"firebase_screen_class",
"exposure_time",
],
"ad_impression": [
"ad_event_id"
],
"ad_query": [
"ad_event_id"
],
"ad_reward": [
"ad_unit_id",
"reward_type",
"reward_value"
],
"add_payment_info": [
"coupon",
"currency",
"items",
"payment_type",
"value"
],
"add_shipping_info": [
"coupon",
"currency",
"items",
"shipping_tier",
"value"
],
"add_to_cart": [
"currency",
"items",
"value"
],
"add_to_wishlist": [
"currency",
"items",
"value"
],
"adunit_exposure": [
"firebase_screen",
"firebase_screen_id",
"firebase_screen_class",
"exposure_time",
],
"app_clear_data": [],
"app_exception": [
"fatal",
"timestamp",
"engagement_time_msec"
],
"app_remove": [],
"app_store_refund": [
"product_id",
"value",
"currency",
"quantity"
],
"app_store_subscription_cancel": [
"product_id",
"price",
"value",
"currency",
"cancellation_reason",
],
"app_store_subscription_convert": [
"product_id",
"price",
"value",
"currency",
"quantity",
],
"app_store_subscription_renew": [
"product_id",
"price",
"value",
"currency",
"quantity",
"renewal_count",
],
"app_update": [
"previous_app_version"
],
"begin_checkout": [
"coupon",
"currency",
"items",
"value"
],
"click": [],
"dynamic_link_app_open": [
"source",
"medium",
"campaign",
"link_id",
"accept_time"
],
"dynamic_link_app_update": [
"source",
"medium",
"campaign",
"link_id",
"accept_time",
],
"dynamic_link_first_open": [
"source",
"medium",
"campaign",
"link_id",
"accept_time",
],
"earn_virtual_currency": [
"virtual_currency_name",
"value"
],
"error": [
"firebase_error",
"firebase_error_value"
],
"file_download": [
"file_extension",
"file_name",
"link_classes",
"link_domain",
"link_id",
"link_text",
"link_url",
],
"firebase_campaign": [
"source",
"medium",
"campaign",
"term",
"content",
"gclid",
"aclid",
"cp1",
"anid",
"click_timestamp",
"campaign_info_source",
],
"firebase_in_app_message_action": [
"message_name",
"message_device_time",
"message_id",
],
"firebase_in_app_message_dismiss": [
"message_name",
"message_device_time",
"message_id",
],
"firebase_in_app_message_impression": [
"message_name",
"message_device_time",
"message_id",
],
"first_open": [
"previous_gmp_app_id",
"updated_with_analytics",
"previous_first_open_count",
"system_app",
"system_app_update",
"deferred_analytics_collection",
"reset_analytics_cause",
"engagement_time_msec",
],
"first_visit": [],
"generate_lead": [
"value",
"currency"
],
"in_app_purchase": [
"product_id",
"price",
"value",
"currency",
"quantity",
"subscription",
"free_trial",
"introductory_price",
],
"join_group": [
"group_id"
],
"level_end": [
"level_name",
"success"
],
"level_start": [
"level_name"
],
"level_up": [
"character",
"level"
],
"login": [
"method"
],
"notification_dismiss": [
"message_name",
"message_time",
"message_device_time",
"message_id",
"topic",
"label",
"message_channel",
],
"notification_foreground": [
"message_name",
"message_time",
"message_device_time",
"message_id",
"topic",
"label",
"message_channel",
"message_type",
],
"notification_open": [
"message_name",
"message_time",
"message_device_time",
"message_id",
"topic",
"label",
"message_channel",
],
"notification_receive": [
"message_name",
"message_time",
"message_device_time",
"message_id",
"topic",
"label",
"message_channel",
"message_type",
],
"notification_send": [
"message_name",
"message_time",
"message_device_time",
"message_id",
"topic",
"label",
"message_channel",
],
"os_update": [
"previous_os_version"
],
"page_view": [
"page_location",
"page_referrer"
],
"post_score": [
"level",
"character",
"score"
],
"purchase": [
"affiliation",
"coupon",
"currency",
"items",
"transaction_id",
"shipping",
"tax",
"value",
],
"refund": [
"transaction_id",
"value",
"currency",
"tax",
"shipping",
"items"
],
"remove_from_cart": [
"currency",
"items",
"value"
],
"screen_view": [
"firebase_screen",
"firebase_screen_class",
"firebase_screen_id",
"firebase_previous_screen",
"firebase_previous_class",
"firebase_previous_id",
"engagement_time_msec",
],
"scroll": [],
"search": [
"search_term"
],
"select_content": [
"content_type",
"item_id"
],
"select_item": [
"items",
"item_list_name",
"item_list_id"
],
"select_promotion": [
"items",
"promotion_id",
"promotion_name",
"creative_name",
"creative_slot",
"location_id",
],
"session_start": [],
"share": [
"content_type",
"item_id"
],
"sign_up": [
"method"
],
"view_search_results": [
"search_term"
],
"spend_virtual_currency": [
"item_name",
"virtual_currency_name",
"value"
],
"tutorial_begin": [],
"tutorial_complete": [],
"unlock_achievement": [
"achievement_id"
],
"user_engagement": [
"engagement_time_msec"
],
"video_start": [
"video_current_time",
"video_duration",
"video_percent",
"video_provider",
"video_title",
"video_url",
"visible",
],
"video_progress": [
"video_current_time",
"video_duration",
"video_percent",
"video_provider",
"video_title",
"video_url",
"visible",
],
"video_complete": [
"video_current_time",
"video_duration",
"video_percent",
"video_provider",
"video_title",
"video_url",
"visible",
],
"view_cart": [
"currency",
"items",
"value"
],
"view_item": [
"currency",
"items",
"value"
],
"view_item_list": [
"items",
"item_list_name",
"item_list_id"
],
"view_promotion": [
"items",
"promotion_id",
"promotion_name",
"creative_name",
"creative_slot",
"location_id",
],
}

View File

@ -1,10 +1,9 @@
"""Know your media files better."""
__title__ = 'knowit'
__version__ = '0.4.0'
__short_version__ = '.'.join(__version__.split('.')[:2])
__version__ = '0.5.2'
__short_version__ = '0.5'
__author__ = 'Rato AQ2'
__license__ = 'MIT'
__copyright__ = 'Copyright 2016-2021, Rato AQ2'
__url__ = 'https://github.com/ratoaq2/knowit'
#: Video extensions

View File

@ -169,7 +169,7 @@ def dumps(
return convert(info, context)
def main(args: typing.List[str] = None) -> None:
def main(args: typing.Optional[typing.List[str]] = None) -> None:
"""Execute main function for entry point."""
argument_parser = build_argument_parser()
args = args or sys.argv[1:]

View File

@ -65,7 +65,7 @@ def know(
raise KnowitException(debug_info(context=context, exc_info=True))
def dependencies(context: typing.Mapping = None) -> typing.Mapping:
def dependencies(context: typing.Optional[typing.Mapping] = None) -> typing.Mapping:
"""Return all dependencies detected by knowit."""
deps = {}
try:

View File

@ -63,6 +63,17 @@ class Property(Reportable[T]):
# Used to detect duplicated values. e.g.: en / en or High@L4.0 / High@L4.0 or Progressive / Progressive
self.delimiter = delimiter
@classmethod
def _extract_value(cls,
track: typing.Mapping,
name: str,
names: typing.List[str]):
if len(names) == 2:
parent_value = track.get(names[0], track.get(names[0].upper(), {}))
return parent_value.get(names[1], parent_value.get(names[1].upper()))
return track.get(name, track.get(name.upper()))
def extract_value(
self,
track: typing.Mapping,
@ -71,7 +82,7 @@ class Property(Reportable[T]):
"""Extract the property value from a given track."""
for name in self.names:
names = name.split('.')
value = track.get(names[0], {}).get(names[1]) if len(names) == 2 else track.get(name)
value = self._extract_value(track, name, names)
if value is None:
if self.default is None:
continue
@ -216,9 +227,10 @@ class MultiValue(Property):
class Rule(Reportable[T]):
"""Rule abstract class."""
def __init__(self, name: str, override=False, **kwargs):
def __init__(self, name: str, private=False, override=False, **kwargs):
"""Initialize the object."""
super().__init__(name, **kwargs)
self.private = private
self.override = override
def execute(self, props, pv_props, context: typing.Mapping):

View File

@ -455,46 +455,46 @@ profiles:
VideoProfileLevel:
L1:
default: "1"
default: '1'
technical: Level 1
L11:
default: "1.1"
default: '1.1'
technical: Level 1.1
L13:
default: "1.3"
default: '1.3'
technical: Level 1.3
L2:
default: "2"
default: '2'
technical: Level 2
L21:
default: "2.1"
default: '2.1'
technical: Level 2.1
L22:
default: "2.2"
default: '2.2'
technical: Level 2.2
L3:
default: "3"
default: '3'
technical: Level 3
L31:
default: "3.1"
default: '3.1'
technical: Level 3.1
L32:
default: "3.2"
default: '3.2'
technical: Level 3.2
L4:
default: "4"
default: '4'
technical: Level 4
L41:
default: "4.1"
default: '4.1'
technical: Level 4.1
L42:
default: "4.2"
default: '4.2'
technical: Level 4.2
L5:
default: "5"
default: '5'
technical: Level 5
L51:
default: "5.1"
default: '5.1'
technical: Level 5.1
LOW:
default: Low

View File

@ -106,11 +106,12 @@ class Ratio(Property[Decimal]):
if (width, height) == ('0', '1'): # identity
return Decimal('1.0')
result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
if self.unit:
result *= self.unit
if height:
result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
if self.unit:
result *= self.unit
return result
return result
self.report(value, context)
return None

10
libs/knowit/provider.py Normal file → Executable file
View File

@ -103,10 +103,7 @@ class Provider:
value = prop.extract_value(track, context)
if value is not None:
if not prop.private:
which = props
else:
which = pv_props
which = props if not prop.private else pv_props
which[name] = value
for name, rule in self.rules.get(track_type, {}).items():
@ -116,8 +113,9 @@ class Provider:
value = rule.execute(props, pv_props, context)
if value is not None:
props[name] = value
elif name in props and not rule.override:
which = props if not rule.private else pv_props
which[name] = value
elif name in props and (not rule.override or props[name] is None):
del props[name]
return props

View File

@ -26,6 +26,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
from knowit.rules.general import GuessTitleRule
from knowit.serializer import get_json_encoder
from knowit.units import units
from knowit.utils import to_dict
@ -83,17 +84,20 @@ class EnzymeProvider(Provider):
},
}, {
'video': {
'language': LanguageRule('video language'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
'language': LanguageRule('audio language'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
},
'subtitle': {
'language': LanguageRule('subtitle language'),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
'closed_caption': ClosedCaptionRule('closed caption'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('subtitle language', override=True),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
'closed_caption': ClosedCaptionRule('closed caption', override=True),
}
})
@ -130,7 +134,8 @@ class EnzymeProvider(Provider):
if logger.level == logging.DEBUG:
logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
video_path=video_path, version=enzyme.__version__, data=json.dumps(data))
video_path=video_path, version=enzyme.__version__,
data=json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False))
result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
data.get('audio_tracks'), data.get('subtitle_tracks'), context)

View File

@ -34,6 +34,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
from knowit.rules.general import GuessTitleRule
from knowit.serializer import get_json_encoder
from knowit.units import units
from knowit.utils import (
@ -77,7 +78,7 @@ class FFmpegExecutor:
def extract_info(self, filename):
"""Extract media info."""
json_dump = self._execute(filename)
return json.loads(json_dump)
return json.loads(json_dump) if json_dump else {}
def _execute(self, filename):
raise NotImplementedError
@ -144,7 +145,7 @@ class FFmpegProvider(Provider):
'id': Basic('index', data_type=int, allow_fallback=True, description='video track number'),
'name': Property('tags.title', description='video track name'),
'language': Language('tags.language', description='video language'),
'duration': Duration('duration', description='video duration'),
'duration': Duration('duration', 'tags.duration', description='video duration'),
'width': Quantity('width', unit=units.pixel),
'height': Quantity('height', unit=units.pixel),
'scan_type': ScanType(config, 'field_order', default='Progressive', description='video scan type'),
@ -153,7 +154,7 @@ class FFmpegProvider(Provider):
'resolution': None, # populated with ResolutionRule
'frame_rate': Ratio('r_frame_rate', unit=units.FPS, description='video frame rate'),
# frame_rate_mode
'bit_rate': Quantity('bit_rate', unit=units.bps, description='video bit rate'),
'bit_rate': Quantity('bit_rate', 'tags.bps', unit=units.bps, description='video bit rate'),
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='video bit depth'),
'codec': VideoCodec(config, 'codec_name', description='video codec'),
'profile': VideoProfile(config, 'profile', description='video codec profile'),
@ -166,13 +167,13 @@ class FFmpegProvider(Provider):
'id': Basic('index', data_type=int, allow_fallback=True, description='audio track number'),
'name': Property('tags.title', description='audio track name'),
'language': Language('tags.language', description='audio language'),
'duration': Duration('duration', description='audio duration'),
'duration': Duration('duration', 'tags.duration', description='audio duration'),
'codec': AudioCodec(config, 'profile', 'codec_name', description='audio codec'),
'profile': AudioProfile(config, 'profile', description='audio codec profile'),
'channels_count': AudioChannels('channels', description='audio channels count'),
'channels': None, # populated with AudioChannelsRule
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='audio bit depth'),
'bit_rate': Quantity('bit_rate', unit=units.bps, description='audio bit rate'),
'bit_rate': Quantity('bit_rate', 'tags.bps', unit=units.bps, description='audio bit rate'),
'sampling_rate': Quantity('sample_rate', unit=units.Hz, description='audio sampling rate'),
'forced': YesNo('disposition.forced', hide_value=False, description='audio track forced'),
'default': YesNo('disposition.default', hide_value=False, description='audio track default'),
@ -190,17 +191,20 @@ class FFmpegProvider(Provider):
},
}, {
'video': {
'language': LanguageRule('video language'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
'language': LanguageRule('audio language'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
},
'subtitle': {
'language': LanguageRule('subtitle language'),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
'closed_caption': ClosedCaptionRule('closed caption'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('subtitle language', override=True),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
'closed_caption': ClosedCaptionRule('closed caption', override=True),
},
})
self.executor = FFmpegExecutor.get_executor_instance(suggested_path)

View File

@ -1,5 +1,6 @@
import ctypes
import json
import os
import re
from ctypes import c_void_p, c_wchar_p
from decimal import Decimal
@ -43,6 +44,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
from knowit.rules.general import GuessTitleRule
from knowit.units import units
from knowit.utils import (
define_candidate,
@ -77,7 +79,7 @@ class MediaInfoExecutor:
locations = {
'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'),
'windows': ('__PATH__', ),
'windows': ('C:\\Program Files\\MediaInfo', 'C:\\Program Files (x86)\\MediaInfo', '__PATH__'),
'macos': ('__PATH__', ),
}
@ -121,12 +123,28 @@ class MediaInfoCliExecutor(MediaInfoExecutor):
}
def _execute(self, filename):
return json.loads(check_output([self.location, '--Output=JSON', '--Full', filename]).decode())
data = check_output([self.location, '--Output=JSON', '--Full', filename]).decode()
return json.loads(data) if data else {}
@classmethod
def _is_gui_exe(cls, candidate: str):
if not candidate.endswith('MediaInfo.exe') or not os.path.isfile(candidate):
return False
try:
shell32 = ctypes.WinDLL('shell32', use_last_error=True) # type: ignore
return bool(shell32.ExtractIconExW(candidate, 0, None, None, 1))
except Exception:
return False
@classmethod
def create(cls, os_family=None, suggested_path=None):
"""Create the executor instance."""
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
if cls._is_gui_exe(candidate):
continue
try:
output = check_output([candidate, '--version']).decode()
version = cls._get_version(output)
@ -154,7 +172,9 @@ class MediaInfoCTypesExecutor(MediaInfoExecutor):
def _execute(self, filename):
# Create a MediaInfo handle
return json.loads(MediaInfo.parse(filename, library_file=self.location, output='JSON'))
data = MediaInfo.parse(filename, library_file=self.location, output='JSON')
return json.loads(data) if data else {}
@classmethod
def create(cls, os_family=None, suggested_path=None):
@ -254,19 +274,22 @@ class MediaInfoProvider(Provider):
},
}, {
'video': {
'language': LanguageRule('video language'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
'language': LanguageRule('audio language'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
'_atmosrule': AtmosRule(config, 'atmos rule'),
'_dtshdrule': DtsHdRule(config, 'dts-hd rule'),
'atmos': AtmosRule(config, 'atmos rule', private=True),
'dtshd': DtsHdRule(config, 'dts-hd rule', private=True),
},
'subtitle': {
'language': LanguageRule('subtitle language'),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
'closed_caption': ClosedCaptionRule('closed caption'),
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('subtitle language', override=True),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
'closed_caption': ClosedCaptionRule('closed caption', override=True),
}
})
self.executor = MediaInfoExecutor.get_executor_instance(suggested_path)

View File

@ -28,6 +28,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
from knowit.rules.general import GuessTitleRule
from knowit.serializer import get_json_encoder
from knowit.units import units
from knowit.utils import define_candidate, detect_os
@ -67,7 +68,7 @@ class MkvMergeExecutor:
def extract_info(self, filename):
"""Extract media info."""
json_dump = self._execute(filename)
return json.loads(json_dump)
return json.loads(json_dump) if json_dump else {}
def _execute(self, filename):
raise NotImplementedError
@ -166,17 +167,20 @@ class MkvMergeProvider(Provider):
},
}, {
'video': {
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
},
'subtitle': {
'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('subtitle language', override=True),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
'closed_caption': ClosedCaptionRule('closed caption'),
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
'closed_caption': ClosedCaptionRule('closed caption', override=True),
}
})
self.executor = MkvMergeExecutor.get_executor_instance(suggested_path)

View File

@ -1,8 +1,6 @@
import re
from logging import NullHandler, getLogger
import babelfish
from trakit.api import trakit
from knowit.core import Rule
@ -10,22 +8,27 @@ logger = getLogger(__name__)
logger.addHandler(NullHandler())
class LanguageRule(Rule):
"""Language rules."""
name_re = re.compile(r'(?P<name>\w+)\b', re.IGNORECASE)
class GuessTitleRule(Rule):
"""Guess properties from track title."""
def execute(self, props, pv_props, context):
"""Language detection using name."""
if 'language' in props:
if 'name' in props:
language = props.get('language')
options = {'expected_language': language} if language else {}
guessed = trakit(props['name'], options)
if guessed:
return guessed
class LanguageRule(Rule):
"""Language rules."""
def execute(self, props, pv_props, context):
"""Language detection using name."""
if 'guessed' not in pv_props:
return
if 'name' in props:
name = props.get('name', '')
match = self.name_re.match(name)
if match:
try:
return babelfish.Language.fromname(match.group('name'))
except babelfish.Error:
pass
logger.info('Invalid %s: %r', self.description, name)
guess = pv_props['guessed']
if 'language' in guess:
return guess['language']

View File

@ -10,18 +10,19 @@ class ClosedCaptionRule(Rule):
def execute(self, props, pv_props, context):
"""Execute closed caption rule."""
for name in (pv_props.get('_closed_caption'), props.get('name')):
if name and self.cc_re.search(name):
return True
if '_closed_caption' in pv_props and self.cc_re.search(pv_props['_closed_caption']):
return True
if 'guessed' in pv_props:
guessed = pv_props['guessed']
return guessed.get('closed_caption')
class HearingImpairedRule(Rule):
"""Hearing Impaired rule."""
hi_re = re.compile(r'(\bsdh\b)', re.IGNORECASE)
def execute(self, props, pv_props, context):
"""Hearing Impaired."""
name = props.get('name')
if name and self.hi_re.search(name):
return True
if 'guessed' in pv_props:
guessed = pv_props['guessed']
return guessed.get('hearing_impaired')

View File

@ -1,10 +1,5 @@
import typing
try:
import pint
except ImportError:
pint = False
class NullRegistry:
"""A NullRegistry that masquerades as a pint.UnitRegistry."""
@ -25,9 +20,18 @@ class NullRegistry:
def _build_unit_registry():
registry = pint.UnitRegistry() if pint else NullRegistry()
registry.define('FPS = 1 * hertz')
return registry
try:
import pint
registry = pint.UnitRegistry()
registry.define('FPS = 1 * hertz')
pint.set_application_registry(registry)
return registry
except ModuleNotFoundError:
pass
return NullRegistry()
units = _build_unit_registry()

View File

@ -1,8 +0,0 @@
from pyga.requests import Q
def shutdown():
'''
Fire all stored GIF requests One by One.
You should call this if you set Config.queue_requests = True
'''
map(lambda func: func(), Q.REQ_ARRAY)

View File

@ -1,512 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import datetime
from operator import itemgetter
from pyga import utils
from pyga import exceptions
try:
from urlparse import urlparse
from urllib import unquote_plus
except ImportError as e:
from urllib.parse import urlparse
from urllib.parse import unquote_plus
__author__ = "Arun KR (kra3) <the1.arun@gmail.com>"
__license__ = "Simplified BSD"
class Campaign(object):
'''
A representation of Campaign
Properties:
_type -- See TYPE_* constants, will be mapped to "__utmz" parameter.
creation_time -- Time of the creation of this campaign, will be mapped to "__utmz" parameter.
response_count -- Response Count, will be mapped to "__utmz" parameter.
Is also used to determine whether the campaign is new or repeated,
which will be mapped to "utmcn" and "utmcr" parameters.
id -- Campaign ID, a.k.a. "utm_id" query parameter for ga.js
Will be mapped to "__utmz" parameter.
source -- Source, a.k.a. "utm_source" query parameter for ga.js.
Will be mapped to "utmcsr" key in "__utmz" parameter.
g_click_id -- Google AdWords Click ID, a.k.a. "gclid" query parameter for ga.js.
Will be mapped to "utmgclid" key in "__utmz" parameter.
d_click_id -- DoubleClick (?) Click ID. Will be mapped to "utmdclid" key in "__utmz" parameter.
name -- Name, a.k.a. "utm_campaign" query parameter for ga.js.
Will be mapped to "utmccn" key in "__utmz" parameter.
medium -- Medium, a.k.a. "utm_medium" query parameter for ga.js.
Will be mapped to "utmcmd" key in "__utmz" parameter.
term -- Terms/Keywords, a.k.a. "utm_term" query parameter for ga.js.
Will be mapped to "utmctr" key in "__utmz" parameter.
content -- Ad Content Description, a.k.a. "utm_content" query parameter for ga.js.
Will be mapped to "utmcct" key in "__utmz" parameter.
'''
TYPE_DIRECT = 'direct'
TYPE_ORGANIC = 'organic'
TYPE_REFERRAL = 'referral'
CAMPAIGN_DELIMITER = '|'
UTMZ_PARAM_MAP = {
'utmcid': 'id',
'utmcsr': 'source',
'utmgclid': 'g_click_id',
'utmdclid': 'd_click_id',
'utmccn': 'name',
'utmcmd': 'medium',
'utmctr': 'term',
'utmcct': 'content',
}
def __init__(self, typ):
self._type = None
self.creation_time = None
self.response_count = 0
self.id = None
self.source = None
self.g_click_id = None
self.d_click_id = None
self.name = None
self.medium = None
self.term = None
self.content = None
if typ:
if typ not in ('direct', 'organic', 'referral'):
raise ValueError('Campaign type has to be one of the Campaign::TYPE_* constant values.')
self._type = typ
if typ == Campaign.TYPE_DIRECT:
self.name = '(direct)'
self.source = '(direct)'
self.medium = '(none)'
elif typ == Campaign.TYPE_REFERRAL:
self.name = '(referral)'
self.medium = 'referral'
elif typ == Campaign.TYPE_ORGANIC:
self.name = '(organic)'
self.medium = 'organic'
else:
self._type = None
self.creation_time = datetime.utcnow()
def validate(self):
if not self.source:
raise exceptions.ValidationError('Campaigns need to have at least the "source" attribute defined.')
@staticmethod
def create_from_referrer(url):
obj = Campaign(Campaign.TYPE_REFERRAL)
parse_rslt = urlparse(url)
obj.source = parse_rslt.netloc
obj.content = parse_rslt.path
return obj
def extract_from_utmz(self, utmz):
parts = utmz.split('.', 4)
if len(parts) != 5:
raise ValueError('The given "__utmz" cookie value is invalid.')
self.creation_time = utils.convert_ga_timestamp(parts[1])
self.response_count = int(parts[3])
params = parts[4].split(Campaign.CAMPAIGN_DELIMITER)
for param in params:
key, val = param.split('=')
try:
setattr(self, self.UTMZ_PARAM_MAP[key], unquote_plus(val))
except KeyError:
continue
return self
class CustomVariable(object):
'''
Represent a Custom Variable
Properties:
index -- Is the slot, you have 5 slots
name -- Name given to custom variable
value -- Value for the variable
scope -- Scope can be any one of 1, 2 or 3.
WATCH OUT: It's a known issue that GA will not decode URL-encoded
characters in custom variable names and values properly, so spaces
will show up as "%20" in the interface etc. (applicable to name & value)
http://www.google.com/support/forum/p/Google%20Analytics/thread?tid=2cdb3ec0be32e078
'''
SCOPE_VISITOR = 1
SCOPE_SESSION = 2
SCOPE_PAGE = 3
def __init__(self, index=None, name=None, value=None, scope=3):
self.index = index
self.name = name
self.value = value
self.scope = CustomVariable.SCOPE_PAGE
if scope:
self.scope = scope
def __setattr__(self, name, value):
if name == 'scope':
if value and value not in range(1, 4):
raise ValueError('Custom Variable scope has to be one of the 1,2 or 3')
if name == 'index':
# Custom Variables are limited to five slots officially, but there seems to be a
# trick to allow for more of them which we could investigate at a later time (see
# http://analyticsimpact.com/2010/05/24/get-more-than-5-custom-variables-in-google-analytics/
if value and (value < 0 or value > 5):
raise ValueError('Custom Variable index has to be between 1 and 5.')
object.__setattr__(self, name, value)
def validate(self):
'''
According to the GA documentation, there is a limit to the combined size of
name and value of 64 bytes after URL encoding,
see http://code.google.com/apis/analytics/docs/tracking/gaTrackingCustomVariables.html#varTypes
and http://xahlee.org/js/google_analytics_tracker_2010-07-01_expanded.js line 563
This limit was increased to 128 bytes BEFORE encoding with the 2012-01 release of ga.js however,
see http://code.google.com/apis/analytics/community/gajs_changelog.html
'''
if len('%s%s' % (self.name, self.value)) > 128:
raise exceptions.ValidationError('Custom Variable combined name and value length must not be larger than 128 bytes.')
class Event(object):
'''
Represents an Event
https://developers.google.com/analytics/devguides/collection/gajs/eventTrackerGuide
Properties:
category -- The general event category
action -- The action for the event
label -- An optional descriptor for the event
value -- An optional value associated with the event. You can see your
event values in the Overview, Categories, and Actions reports,
where they are listed by event or aggregated across events,
depending upon your report view.
noninteraction -- By default, event hits will impact a visitor's bounce rate.
By setting this parameter to true, this event hit
will not be used in bounce rate calculations.
(default False)
'''
def __init__(self, category=None, action=None, label=None, value=None, noninteraction=False):
self.category = category
self.action = action
self.label = label
self.value = value
self.noninteraction = bool(noninteraction)
if self.noninteraction and not self.value:
self.value = 0
def validate(self):
if not(self.category and self.action):
raise exceptions.ValidationError('Events, at least need to have a category and action defined.')
class Item(object):
'''
Represents an Item in Transaction
Properties:
order_id -- Order ID, will be mapped to "utmtid" parameter
sku -- Product Code. This is the sku code for a given product, will be mapped to "utmipc" parameter
name -- Product Name, will be mapped to "utmipn" parameter
variation -- Variations on an item, will be mapped to "utmiva" parameter
price -- Unit Price. Value is set to numbers only, will be mapped to "utmipr" parameter
quantity -- Unit Quantity, will be mapped to "utmiqt" parameter
'''
def __init__(self):
self.order_id = None
self.sku = None
self.name = None
self.variation = None
self.price = None
self.quantity = 1
def validate(self):
if not self.sku:
raise exceptions.ValidationError('sku/product is a required parameter')
class Page(object):
'''
Contains all parameters needed for tracking a page
Properties:
path -- Page request URI, will be mapped to "utmp" parameter
title -- Page title, will be mapped to "utmdt" parameter
charset -- Charset encoding, will be mapped to "utmcs" parameter
referrer -- Referer URL, will be mapped to "utmr" parameter
load_time -- Page load time in milliseconds, will be encoded into "utme" parameter.
'''
REFERRER_INTERNAL = '0'
def __init__(self, path):
self.path = None
self.title = None
self.charset = None
self.referrer = None
self.load_time = None
if path:
self.path = path
def __setattr__(self, name, value):
if name == 'path':
if value and value != '':
if value[0] != '/':
raise ValueError('The page path should always start with a slash ("/").')
elif name == 'load_time':
if value and not isinstance(value, int):
raise ValueError('Page load time must be specified in integer milliseconds.')
object.__setattr__(self, name, value)
class Session(object):
'''
You should serialize this object and store it in the user session to keep it
persistent between requests (similar to the "__umtb" cookie of the GA Javascript client).
Properties:
session_id -- A unique per-session ID, will be mapped to "utmhid" parameter
track_count -- The amount of pageviews that were tracked within this session so far,
will be part of the "__utmb" cookie parameter.
Will get incremented automatically upon each request
start_time -- Timestamp of the start of this new session, will be part of the "__utmb" cookie parameter
'''
def __init__(self):
self.session_id = utils.get_32bit_random_num()
self.track_count = 0
self.start_time = datetime.utcnow()
@staticmethod
def generate_session_id():
return utils.get_32bit_random_num()
def extract_from_utmb(self, utmb):
'''
Will extract information for the "trackCount" and "startTime"
properties from the given "__utmb" cookie value.
'''
parts = utmb.split('.')
if len(parts) != 4:
raise ValueError('The given "__utmb" cookie value is invalid.')
self.track_count = int(parts[1])
self.start_time = utils.convert_ga_timestamp(parts[3])
return self
class SocialInteraction(object):
'''
Properties:
action -- Required. A string representing the social action being tracked,
will be mapped to "utmsa" parameter
network -- Required. A string representing the social network being tracked,
will be mapped to "utmsn" parameter
target -- Optional. A string representing the URL (or resource) which receives the action.
'''
def __init__(self, action=None, network=None, target=None):
self.action = action
self.network = network
self.target = target
def validate(self):
if not(self.action and self.network):
raise exceptions.ValidationError('Social interactions need to have at least the "network" and "action" attributes defined.')
class Transaction(object):
'''
Represents parameters for a Transaction call
Properties:
order_id -- Order ID, will be mapped to "utmtid" parameter
affiliation -- Affiliation, Will be mapped to "utmtst" parameter
total -- Total Cost, will be mapped to "utmtto" parameter
tax -- Tax Cost, will be mapped to "utmttx" parameter
shipping -- Shipping Cost, values as for unit and price, will be mapped to "utmtsp" parameter
city -- Billing City, will be mapped to "utmtci" parameter
state -- Billing Region, will be mapped to "utmtrg" parameter
country -- Billing Country, will be mapped to "utmtco" parameter
items -- @entity.Items in a transaction
'''
def __init__(self):
self.items = []
self.order_id = None
self.affiliation = None
self.total = None
self.tax = None
self.shipping = None
self.city = None
self.state = None
self.country = None
def __setattr__(self, name, value):
if name == 'order_id':
for itm in self.items:
itm.order_id = value
object.__setattr__(self, name, value)
def validate(self):
if len(self.items) == 0:
raise exceptions.ValidationError('Transaction need to consist of at least one item')
def add_item(self, item):
''' item of type entities.Item '''
if isinstance(item, Item):
item.order_id = self.order_id
self.items.append(item)
class Visitor(object):
'''
You should serialize this object and store it in the user database to keep it
persistent for the same user permanently (similar to the "__umta" cookie of
the GA Javascript client).
Properties:
unique_id -- Unique user ID, will be part of the "__utma" cookie parameter
first_visit_time -- Time of the very first visit of this user, will be part of the "__utma" cookie parameter
previous_visit_time -- Time of the previous visit of this user, will be part of the "__utma" cookie parameter
current_visit_time -- Time of the current visit of this user, will be part of the "__utma" cookie parameter
visit_count -- Amount of total visits by this user, will be part of the "__utma" cookie parameter
ip_address -- IP Address of the end user, will be mapped to "utmip" parameter and "X-Forwarded-For" request header
user_agent -- User agent string of the end user, will be mapped to "User-Agent" request header
locale -- Locale string (country part optional) will be mapped to "utmul" parameter
flash_version -- Visitor's Flash version, will be maped to "utmfl" parameter
java_enabled -- Visitor's Java support, will be mapped to "utmje" parameter
screen_colour_depth -- Visitor's screen color depth, will be mapped to "utmsc" parameter
screen_resolution -- Visitor's screen resolution, will be mapped to "utmsr" parameter
'''
def __init__(self):
now = datetime.utcnow()
self.unique_id = None
self.first_visit_time = now
self.previous_visit_time = now
self.current_visit_time = now
self.visit_count = 1
self.ip_address = None
self.user_agent = None
self.locale = None
self.flash_version = None
self.java_enabled = None
self.screen_colour_depth = None
self.screen_resolution = None
def __setattr__(self, name, value):
if name == 'unique_id':
if value and (value < 0 or value > 0x7fffffff):
raise ValueError('Visitor unique ID has to be a 32-bit integer between 0 and 0x7fffffff')
object.__setattr__(self, name, value)
def __getattribute__(self, name):
if name == 'unique_id':
tmp = object.__getattribute__(self, name)
if tmp is None:
self.unique_id = self.generate_unique_id()
return object.__getattribute__(self, name)
def __getstate__(self):
state = self.__dict__
if state.get('user_agent') is None:
state['unique_id'] = self.generate_unique_id()
return state
def extract_from_utma(self, utma):
'''
Will extract information for the "unique_id", "first_visit_time", "previous_visit_time",
"current_visit_time" and "visit_count" properties from the given "__utma" cookie value.
'''
parts = utma.split('.')
if len(parts) != 6:
raise ValueError('The given "__utma" cookie value is invalid.')
self.unique_id = int(parts[1])
self.first_visit_time = utils.convert_ga_timestamp(parts[2])
self.previous_visit_time = utils.convert_ga_timestamp(parts[3])
self.current_visit_time = utils.convert_ga_timestamp(parts[4])
self.visit_count = int(parts[5])
return self
def extract_from_server_meta(self, meta):
'''
Will extract information for the "ip_address", "user_agent" and "locale"
properties from the given WSGI REQUEST META variable or equivalent.
'''
if 'REMOTE_ADDR' in meta and meta['REMOTE_ADDR']:
ip = None
for key in ('HTTP_X_FORWARDED_FOR', 'REMOTE_ADDR'):
if key in meta and not ip:
ips = meta.get(key, '').split(',')
ip = ips[-1].strip()
if not utils.is_valid_ip(ip):
ip = ''
if utils.is_private_ip(ip):
ip = ''
if ip:
self.ip_address = ip
if 'HTTP_USER_AGENT' in meta and meta['HTTP_USER_AGENT']:
self.user_agent = meta['HTTP_USER_AGENT']
if 'HTTP_ACCEPT_LANGUAGE' in meta and meta['HTTP_ACCEPT_LANGUAGE']:
user_locals = []
matched_locales = utils.validate_locale(meta['HTTP_ACCEPT_LANGUAGE'])
if matched_locales:
lang_lst = map((lambda x: x.replace('-', '_')), (i[1] for i in matched_locales))
quality_lst = map((lambda x: x and x or 1), (float(i[4] and i[4] or '0') for i in matched_locales))
lang_quality_map = map((lambda x, y: (x, y)), lang_lst, quality_lst)
user_locals = [x[0] for x in sorted(lang_quality_map, key=itemgetter(1), reverse=True)]
if user_locals:
self.locale = user_locals[0]
return self
def generate_hash(self):
'''Generates a hashed value from user-specific properties.'''
tmpstr = "%s%s%s" % (self.user_agent, self.screen_resolution, self.screen_colour_depth)
return utils.generate_hash(tmpstr)
def generate_unique_id(self):
'''Generates a unique user ID from the current user-specific properties.'''
return ((utils.get_32bit_random_num() ^ self.generate_hash()) & 0x7fffffff)
def add_session(self, session):
'''
Updates the "previousVisitTime", "currentVisitTime" and "visitCount"
fields based on the given session object.
'''
start_time = session.start_time
if start_time != self.current_visit_time:
self.previous_visit_time = self.current_visit_time
self.current_visit_time = start_time
self.visit_count = self.visit_count + 1

View File

@ -1,2 +0,0 @@
class ValidationError(Exception):
pass

File diff suppressed because it is too large Load Diff

View File

@ -1,125 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from random import randint
import re
import sys
from datetime import datetime
try:
from urllib import quote
except ImportError as e:
from urllib.parse import quote
if sys.version_info < (3,):
text_type = unicode
else:
text_type = str
__author__ = "Arun KR (kra3) <the1.arun@gmail.com>"
__license__ = "Simplified BSD"
RE_IP = re.compile(r'^[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}$', re.I)
RE_PRIV_IP = re.compile(r'^(?:127\.0\.0\.1|10\.|192\.168\.|172\.(?:1[6-9]|2[0-9]|3[0-1])\.)')
RE_LOCALE = re.compile(r'(^|\s*,\s*)([a-zA-Z]{1,8}(-[a-zA-Z]{1,8})*)\s*(;\s*q\s*=\s*(1(\.0{0,3})?|0(\.[0-9]{0,3})))?', re.I)
RE_GA_ACCOUNT_ID = re.compile(r'^(UA|MO)-[0-9]*-[0-9]*$')
RE_FIRST_THREE_OCTETS_OF_IP = re.compile(r'^((\d{1,3}\.){3})\d{1,3}$')
def convert_ga_timestamp(timestamp_string):
timestamp = float(timestamp_string)
if timestamp > ((2 ** 31) - 1):
timestamp /= 1000
return datetime.utcfromtimestamp(timestamp)
def get_32bit_random_num():
return randint(0, 0x7fffffff)
def is_valid_ip(ip):
return True if RE_IP.match(str(ip)) else False
def is_private_ip(ip):
return True if RE_PRIV_IP.match(str(ip)) else False
def validate_locale(locale):
return RE_LOCALE.findall(str(locale))
def is_valid_google_account(account):
return True if RE_GA_ACCOUNT_ID.match(str(account)) else False
def generate_hash(tmpstr):
hash_val = 1
if tmpstr:
hash_val = 0
for ordinal in map(ord, tmpstr[::-1]):
hash_val = ((hash_val << 6) & 0xfffffff) + ordinal + (ordinal << 14)
left_most_7 = hash_val & 0xfe00000
if left_most_7 != 0:
hash_val ^= left_most_7 >> 21
return hash_val
def anonymize_ip(ip):
if ip:
match = RE_FIRST_THREE_OCTETS_OF_IP.findall(str(ip))
if match:
return '%s%s' % (match[0][0], '0')
return ''
def encode_uri_components(value):
'''Mimics Javascript's encodeURIComponent() function for consistency with the GA Javascript client.'''
return convert_to_uri_component_encoding(quote(value))
def convert_to_uri_component_encoding(value):
return value.replace('%21', '!').replace('%2A', '*').replace('%27', "'").replace('%28', '(').replace('%29', ')')
# Taken from expicient.com BJs repo.
def stringify(s, stype=None, fn=None):
''' Converts elements of a complex data structure to strings
The data structure can be a multi-tiered one - with tuples and lists etc
This method will loop through each and convert everything to string.
For example - it can be -
[[{'a1': {'a2': {'a3': ('a4', timedelta(0, 563)), 'a5': {'a6': datetime()}}}}]]
which will be converted to -
[[{'a1': {'a2': {'a3': ('a4', '0:09:23'), 'a5': {'a6': '2009-05-27 16:19:52.401500' }}}}]]
@param stype: If only one type of data element needs to be converted to
string without affecting others, stype can be used.
In the earlier example, if it is called with stringify(s, stype=datetime.timedelta)
the result would be
[[{'a1': {'a2': {'a3': ('a4', '0:09:23'), 'a5': {'a6': datetime() }}}}]]
Also, even though the name is stringify, any function can be run on it, based on
parameter fn. If fn is None, it will be stringified.
'''
if type(s) in [list, set, dict, tuple]:
if isinstance(s, dict):
for k in s:
s[k] = stringify(s[k], stype, fn)
elif type(s) in [list, set]:
for i, k in enumerate(s):
s[i] = stringify(k, stype, fn)
else: #tuple
tmp = []
for k in s:
tmp.append(stringify(k, stype, fn))
s = tuple(tmp)
else:
if fn:
if not stype or (stype == type(s)):
return fn(s)
else:
# To do str(s). But, str() can fail on unicode. So, use .encode instead
if not stype or (stype == type(s)):
try:
return text_type(s)
#return s.encode('ascii', 'replace')
except AttributeError:
return str(s)
except UnicodeDecodeError:
return s.decode('ascii', 'replace')
return s

View File

@ -386,7 +386,7 @@ class MediaInfo:
A higher value will yield more precise results in some cases
but will also increase parsing time.
:param bool full: display additional tags, including computer-readable values
for sizes and durations.
for sizes and durations, corresponds to the CLI's ``--Full``/``-f`` parameter.
:param bool legacy_stream_display: display additional information about streams.
:param dict mediainfo_options: additional options that will be passed to the
`MediaInfo_Option` function, for example: ``{"Language": "raw"}``.

View File

@ -473,7 +473,7 @@ class SZProviderPool(ProviderPool):
for s in subtitles:
# get the matches
if s.language not in languages:
if s.language.basename not in [x.basename for x in languages]:
logger.debug("%r: Skipping, language not searched for", s)
continue
@ -502,12 +502,12 @@ class SZProviderPool(ProviderPool):
break
# stop when all languages are downloaded
if set(s.language for s in downloaded_subtitles) == languages:
if set(s.language.basename for s in downloaded_subtitles) == languages:
logger.debug('All languages downloaded')
break
# check downloaded languages
if subtitle.language in set(s.language for s in downloaded_subtitles):
if subtitle.language in set(s.language.basename for s in downloaded_subtitles):
logger.debug('%r: Skipping subtitle: already downloaded', subtitle.language)
continue
@ -1127,7 +1127,7 @@ def save_subtitles(file_path, subtitles, single=False, directory=None, chmod=Non
continue
# check language
if subtitle.language in set(s.language for s in saved_subtitles):
if subtitle.language in set(s.language.basename for s in saved_subtitles):
logger.debug('Skipping subtitle %r: language already saved', subtitle)
continue

View File

@ -55,7 +55,7 @@ class OpenSubtitlesComSubtitle(Subtitle):
hash_verifiable = False
def __init__(self, language, forced, hearing_impaired, page_link, file_id, releases, uploader, title, year,
hash_matched, file_hash=None, season=None, episode=None):
hash_matched, file_hash=None, season=None, episode=None, imdb_match=False):
language = Language.rebuild(language, hi=hearing_impaired, forced=forced)
self.title = title
@ -75,6 +75,7 @@ class OpenSubtitlesComSubtitle(Subtitle):
self.hash = file_hash
self.encoding = 'utf-8'
self.hash_matched = hash_matched
self.imdb_match = imdb_match
@property
def id(self):
@ -88,24 +89,28 @@ class OpenSubtitlesComSubtitle(Subtitle):
if type_ == "episode":
# series
matches.add('series')
# year
if video.year == self.year:
matches.add('year')
# season
if video.season == self.season:
matches.add('season')
# episode
if video.episode == self.episode:
matches.add('episode')
# imdb
if self.imdb_match:
matches.add('series_imdb_id')
else:
# title
matches.add('title')
# year
if video.year == self.year:
matches.add('year')
# imdb
if self.imdb_match:
matches.add('imdb_id')
# rest is same for both groups
# year
if video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.releases and
any(r in sanitize_release_group(self.releases)
@ -282,9 +287,11 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
res = self.retry(
lambda: checked(
lambda: self.session.get(self.server_url + 'subtitles',
params=(('episode_number', self.video.episode),
params=(('ai_translated', 'exclude'),
('episode_number', self.video.episode),
('imdb_id', imdb_id if not title_id else None),
('languages', langs.lower()),
('machine_translated', 'exclude'),
('moviehash', file_hash),
('parent_feature_id', title_id if title_id else None),
('season_number', self.video.season)),
@ -298,9 +305,11 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
res = self.retry(
lambda: checked(
lambda: self.session.get(self.server_url + 'subtitles',
params=(('id', title_id if title_id else None),
params=(('ai_translated', 'exclude'),
('id', title_id if title_id else None),
('imdb_id', imdb_id if not title_id else None),
('languages', langs.lower()),
('machine_translated', 'exclude'),
('moviehash', file_hash)),
timeout=30),
validate_json=True,
@ -340,6 +349,11 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
else:
moviehash_match = False
try:
year = int(item['attributes']['feature_details']['year'])
except TypeError:
year = item['attributes']['feature_details']['year']
if len(item['attributes']['files']):
subtitle = OpenSubtitlesComSubtitle(
language=Language.fromietf(item['attributes']['language']),
@ -350,10 +364,11 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
releases=item['attributes']['release'],
uploader=item['attributes']['uploader']['name'],
title=item['attributes']['feature_details']['movie_name'],
year=item['attributes']['feature_details']['year'],
year=year,
season=season_number,
episode=episode_number,
hash_matched=moviehash_match
hash_matched=moviehash_match,
imdb_match=True if imdb_id else False
)
subtitle.get_matches(self.video)
subtitles.append(subtitle)

View File

@ -200,10 +200,9 @@ class SubdivxSubtitlesProvider(Provider):
# download the subtitle
logger.debug("Downloading subtitle %r", subtitle)
# download zip / rar file with the subtitle
response = self.session.get(
subtitle.download_url,
headers={"Referer": subtitle.page_link},
headers={"Referer": _SERVER_URL},
timeout=30,
)
response.raise_for_status()

View File

@ -7,6 +7,9 @@ import re
import time
import urllib.parse
from guessit import guessit
from requests import Session
from bs4 import BeautifulSoup as bso
from guessit import guessit
from requests import Session
@ -283,8 +286,11 @@ class Subf2mProvider(Provider):
if not clean_text:
continue
# It will return list values
guess = _memoized_episode_guess(clean_text)
# First try with the special episode matches for subf2m
guess = _get_episode_from_release(clean_text)
if guess is None:
guess = _memoized_episode_guess(clean_text)
if "season" not in guess:
if "complete series" in clean_text.lower():
@ -390,6 +396,24 @@ def _memoized_episode_guess(content):
)
_EPISODE_SPECIAL_RE = re.compile(
r"(season|s)\s*?(?P<x>\d{,2})\s?[-]\s?(?P<y>\d{,2})", flags=re.IGNORECASE
)
def _get_episode_from_release(release: str):
match = _EPISODE_SPECIAL_RE.search(release)
if match is None:
return None
try:
season, episode = [int(item) for item in match.group("x", "y")]
except (IndexError, ValueError):
return None
return {"season": [season], "episode": [episode]}
def _get_subtitle_from_item(item, language, episode_number=None):
release_info = [
rel.text.strip() for rel in item.find("ul", {"class": "scrolllist"})

View File

@ -59,19 +59,6 @@ class SuperSubtitlesSubtitle(Subtitle):
"""SuperSubtitles Subtitle."""
provider_name = 'supersubtitles'
def __str__(self):
subtit = "Subtitle id: " + str(self.subtitle_id) \
+ " Series: " + self.series \
+ " Season: " + str(self.season) \
+ " Episode: " + str(self.episode) \
+ " Version: " + str(self.version) \
+ " Releases: " + str(self.releases) \
+ " DownloadLink: " + str(self.page_link) \
+ " Matches: " + str(self.matches)
if self.year:
subtit = subtit + " Year: " + str(self.year)
return subtit.encode('utf-8')
def __init__(self, language, page_link, subtitle_id, series, season, episode, version,
releases, year, imdb_id, uploader, asked_for_episode=None, asked_for_release_group=None):
super(SuperSubtitlesSubtitle, self).__init__(language, page_link=page_link)
@ -86,8 +73,7 @@ class SuperSubtitlesSubtitle(Subtitle):
if year:
self.year = int(year)
self.release_info = u" ,".join([u"%s (%s)" % (self.__get_name(), releases[0])] +
(releases[1:] if len(releases) > 1 else []))
self.release_info = "\n".join([self.__get_name(), *self.releases])
self.page_link = page_link
self.asked_for_release_group = asked_for_release_group
self.asked_for_episode = asked_for_episode
@ -98,21 +84,18 @@ class SuperSubtitlesSubtitle(Subtitle):
def numeric_id(self):
return self.subtitle_id
def __get_name(self):
ep_addon = (" S%02dE%02d" % (self.season, self.episode)) if self.episode else ""
return u"%s%s%s" % (self.series, " (%s)" % self.year if self.year else "", ep_addon)
def __repr__(self):
return '<%s %r [%s]>' % (
self.__class__.__name__, u"%s [%s]" % (self.__get_name(), self.version), self.language)
@property
def id(self):
return str(self.subtitle_id)
def __get_name(self):
ep_addon = f"S{self.season:02}E{self.episode:02}" if self.episode else ""
year_str = f" ({self.year})"
return f"{self.series}{year_str or ''} {ep_addon}".strip()
def get_matches(self, video):
matches = set()
update_matches(matches, video, self.releases)
update_matches(matches, video, self.release_info)
# episode
if isinstance(video, Episode):

View File

@ -125,7 +125,8 @@ class TitrariProvider(Provider, ProviderSubtitleArchiveMixin):
languages.update(set(Language.rebuild(lang, hi=True) for lang in languages))
video_types = (Episode, Movie)
api_url = 'https://www.titrari.ro/'
query_advanced_search = 'cautarepreaavansata'
# query_advanced_search = 'cautarepreaavansata'
query_advanced_search = "cautarecutare"
def __init__(self):
self.session = None

View File

@ -0,0 +1,291 @@
from __future__ import absolute_import
import logging
from requests import Session
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.providers import Provider
from subliminal import __short_version__
from subliminal.exceptions import ConfigurationError
from subzero.language import Language
from subliminal.video import Episode, Movie
from babelfish.exceptions import LanguageReverseError
import ffmpeg
import functools
# These are all the languages Whisper supports.
# from whisper.tokenizer import LANGUAGES
whisper_languages = {
"en": "english",
"zh": "chinese",
"de": "german",
"es": "spanish",
"ru": "russian",
"ko": "korean",
"fr": "french",
"ja": "japanese",
"pt": "portuguese",
"tr": "turkish",
"pl": "polish",
"ca": "catalan",
"nl": "dutch",
"ar": "arabic",
"sv": "swedish",
"it": "italian",
"id": "indonesian",
"hi": "hindi",
"fi": "finnish",
"vi": "vietnamese",
"he": "hebrew",
"uk": "ukrainian",
"el": "greek",
"ms": "malay",
"cs": "czech",
"ro": "romanian",
"da": "danish",
"hu": "hungarian",
"ta": "tamil",
"no": "norwegian",
"th": "thai",
"ur": "urdu",
"hr": "croatian",
"bg": "bulgarian",
"lt": "lithuanian",
"la": "latin",
"mi": "maori",
"ml": "malayalam",
"cy": "welsh",
"sk": "slovak",
"te": "telugu",
"fa": "persian",
"lv": "latvian",
"bn": "bengali",
"sr": "serbian",
"az": "azerbaijani",
"sl": "slovenian",
"kn": "kannada",
"et": "estonian",
"mk": "macedonian",
"br": "breton",
"eu": "basque",
"is": "icelandic",
"hy": "armenian",
"ne": "nepali",
"mn": "mongolian",
"bs": "bosnian",
"kk": "kazakh",
"sq": "albanian",
"sw": "swahili",
"gl": "galician",
"mr": "marathi",
"pa": "punjabi",
"si": "sinhala",
"km": "khmer",
"sn": "shona",
"yo": "yoruba",
"so": "somali",
"af": "afrikaans",
"oc": "occitan",
"ka": "georgian",
"be": "belarusian",
"tg": "tajik",
"sd": "sindhi",
"gu": "gujarati",
"am": "amharic",
"yi": "yiddish",
"lo": "lao",
"uz": "uzbek",
"fo": "faroese",
"ht": "haitian creole",
"ps": "pashto",
"tk": "turkmen",
"nn": "nynorsk",
"mt": "maltese",
"sa": "sanskrit",
"lb": "luxembourgish",
"my": "myanmar",
"bo": "tibetan",
"tl": "tagalog",
"mg": "malagasy",
"as": "assamese",
"tt": "tatar",
"haw": "hawaiian",
"ln": "lingala",
"ha": "hausa",
"ba": "bashkir",
"jw": "javanese",
"su": "sundanese",
}
logger = logging.getLogger(__name__)
@functools.lru_cache(2)
def encode_audio_stream(path, audio_stream_language=None):
logger.debug("Encoding audio stream to WAV with ffmpeg")
try:
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
inp = ffmpeg.input(path, threads=0)
if audio_stream_language:
logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}")
inp = inp[f'a:m:language:{audio_stream_language}']
out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000) \
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
logger.debug(f"Finished encoding audio stream in {path} with no errors")
return out
def whisper_get_language(code, name):
# Whisper uses an inconsistent mix of alpha2 and alpha3 language codes
try:
return Language.fromalpha2(code)
except LanguageReverseError:
return Language.fromname(name)
def whisper_get_language_reverse(alpha3):
# Returns the whisper language code given an alpha3b language
for wl in whisper_languages:
lan = whisper_get_language(wl, whisper_languages[wl])
if lan.alpha3 == alpha3:
return wl
raise ValueError
class WhisperAISubtitle(Subtitle):
'''Whisper AI Subtitle.'''
provider_name = 'whisperai'
hash_verifiable = False
def __init__(self, language, video):
super(WhisperAISubtitle, self).__init__(language)
self.video = video
self.task = None
self.audio_language = None
self.force_audio_stream = None
@property
def id(self):
return self.video.original_name
def get_matches(self, video):
matches = set()
if isinstance(video, Episode):
matches.update(["series", "season", "episode"])
elif isinstance(video, Movie):
matches.update(["title"])
return matches
class WhisperAIProvider(Provider):
'''Whisper AI Provider.'''
languages = set()
for lan in whisper_languages:
languages.update({whisper_get_language(lan, whisper_languages[lan])})
languages.update(set(Language.rebuild(lang, hi=True) for lang in languages))
languages.update(set(Language.rebuild(lang, forced=True) for lang in languages))
video_types = (Episode, Movie)
def __init__(self, endpoint=None, timeout=None):
if not endpoint:
raise ConfigurationError('Whisper Web Service Endpoint must be provided')
if not timeout:
raise ConfigurationError('Whisper Web Service Timeout must be provided')
self.endpoint = endpoint
self.timeout = int(timeout)
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
def terminate(self):
self.session.close()
@functools.lru_cache(2048)
def detect_language(self, path) -> Language:
out = encode_audio_stream(path)
r = self.session.post(f"{self.endpoint}/detect-language",
params={'encode': 'false'},
files={'audio_file': out},
timeout=self.timeout)
logger.info(f"Whisper detected language of {path} as {r.json()['detected_language']}")
return whisper_get_language(r.json()["language_code"], r.json()["detected_language"])
def query(self, language, video):
if language not in self.languages:
return None
sub = WhisperAISubtitle(language, video)
sub.task = "transcribe"
if video.audio_languages and not (list(video.audio_languages)[0] == "und" and len(video.audio_languages) == 1):
if language.alpha3 in video.audio_languages:
sub.audio_language = language.alpha3
if len(list(video.audio_languages)) > 1:
sub.force_audio_stream = language.alpha3
else:
sub.task = "translate"
eligible_languages = list(video.audio_languages)
if len(eligible_languages) > 1:
if "und" in eligible_languages:
eligible_languages.remove("und")
sub.audio_language = eligible_languages[0]
else:
# We must detect the language manually
detected_lang = self.detect_language(video.original_path)
if detected_lang != language:
sub.task = "translate"
sub.audio_language = detected_lang.alpha3
if sub.task == "translate":
if language.alpha3 != "eng":
logger.info(f"Translation only possible from {language} to English")
return None
logger.debug(f"Whisper ({video.original_path}): {sub.audio_language} -> {language.alpha3} [TASK: {sub.task}]")
return sub
def list_subtitles(self, video, languages):
subtitles = [self.query(l, video) for l in languages]
return [s for s in subtitles if s is not None]
def download_subtitle(self, subtitle: WhisperAISubtitle):
# Invoke Whisper through the API. This may take a long time depending on the file.
# TODO: This loads the entire file into memory, find a good way to stream the file in chunks
out = encode_audio_stream(subtitle.video.original_path, subtitle.force_audio_stream)
r = self.session.post(f"{self.endpoint}/asr",
params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
files={'audio_file': out},
timeout=self.timeout)
subtitle.content = r.content

View File

@ -95,7 +95,7 @@ class YifySubtitlesProvider(Provider):
languages = {Language(l, c) for (_, l, c) in YifyLanguages}
languages.update(set(Language.rebuild(l, hi=True) for l in languages))
server_url = 'https://yifysubtitles.org'
server_url = 'https://yifysubtitles.ch'
video_types = (Movie,)
def initialize(self):

View File

@ -1,11 +1,13 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import base64
import io
import logging
import os
import zipfile
import re
import copy
from PIL import Image
try:
from urlparse import urljoin
@ -20,6 +22,7 @@ from requests import Session
from six import text_type
from random import randint
from python_anticaptcha import AnticaptchaClient, ImageToTextTask
from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.providers import Provider
from subliminal.subtitle import (
@ -39,6 +42,7 @@ language_converters.register('zimuku = subliminal_patch.converters.zimuku:zimuku
supported_languages = list(language_converters['zimuku'].to_zimuku.keys())
class ZimukuSubtitle(Subtitle):
"""Zimuku Subtitle."""
@ -80,6 +84,13 @@ class ZimukuSubtitle(Subtitle):
return matches
def string_to_hex(s):
val = ""
for i in s:
val += hex(ord(i))[2:]
return val
class ZimukuProvider(Provider):
"""Zimuku Provider."""
@ -87,40 +98,58 @@ class ZimukuProvider(Provider):
video_types = (Episode, Movie)
logger.info(str(supported_languages))
server_url = "http://zimuku.org"
search_url = "/search?q={}&security_verify_data={}"
download_url = "http://zimuku.org/"
server_url = "https://so.zimuku.org"
search_url = "/search?q={}"
subtitle_class = ZimukuSubtitle
def __init__(self):
self.session = None
def stringToHex(self, s):
val = ""
for i in s:
val += hex(ord(i))[2:]
return val
vertoken = ""
verify_token = ""
code = ""
location_re = re.compile(
r'self\.location = "(.*)" \+ stringToHex\(screendate\)')
r'self\.location = "(.*)" \+ stringToHex\(text\)')
verification_image_re = re.compile(r'<img.*?src="data:image/bmp;base64,(.*?)".*?>')
def yunsuo_bypass(self, url, *args, **kwargs):
def parse_verification_image(image_content: str):
def bmp_to_image(base64_str, img_type='png'):
img_data = base64.b64decode(base64_str)
img = Image.open(io.BytesIO(img_data))
img = img.convert("RGB")
img_fp = io.BytesIO()
img.save(img_fp, img_type)
img_fp.seek(0)
return img_fp
fp = bmp_to_image(image_content)
task = ImageToTextTask(fp)
client = AnticaptchaClient(os.environ.get('ANTICAPTCHA_ACCOUNT_KEY'))
job = client.createTask(task)
job.join()
return job.get_captcha_text()
i = -1
while True:
i += 1
r = self.session.get(url, *args, **kwargs)
if(r.status_code == 404):
if r.status_code == 404:
# mock js script logic
tr = self.location_re.findall(r.text)
self.session.cookies.set("srcurl", self.stringToHex(r.url))
if(tr):
verification_image = self.verification_image_re.findall(r.text)
self.code = parse_verification_image(verification_image[0])
self.session.cookies.set("srcurl", string_to_hex(r.url))
if tr:
verify_resp = self.session.get(
self.server_url+tr[0]+self.stringToHex("1920,1080"), allow_redirects=False)
if(verify_resp.status_code == 302 and self.session.cookies.get("security_session_verify") != None):
self.server_url + tr[0] + string_to_hex(self.code), allow_redirects=False)
if verify_resp.status_code == 302 \
and self.session.cookies.get("security_session_verify") is not None:
pass
continue
if len(self.location_re.findall(r.text)) == 0:
self.vertoken = self.stringToHex("1920,1080")
self.verify_token = string_to_hex(self.code)
return r
def initialize(self):
@ -147,14 +176,14 @@ class ZimukuProvider(Provider):
language = Language("eng")
for img in sub.find("td", class_="tac lang").find_all("img"):
if (
"china" in img.attrs["src"]
and "hongkong" in img.attrs["src"]
"china" in img.attrs["src"]
and "hongkong" in img.attrs["src"]
):
language = Language("zho").add(Language('zho', 'TW', None))
logger.debug("language:"+str(language))
logger.debug("language:" + str(language))
elif (
"china" in img.attrs["src"]
or "jollyroger" in img.attrs["src"]
"china" in img.attrs["src"]
or "jollyroger" in img.attrs["src"]
):
language = Language("zho")
elif "hongkong" in img.attrs["src"]:
@ -171,8 +200,6 @@ class ZimukuProvider(Provider):
return subs
def query(self, keyword, season=None, episode=None, year=None):
if self.vertoken == "":
self.yunsuo_bypass(self.server_url + '/')
params = keyword
if season:
params += ".S{season:02d}".format(season=season)
@ -181,8 +208,8 @@ class ZimukuProvider(Provider):
logger.debug("Searching subtitles %r", params)
subtitles = []
search_link = self.server_url + text_type(self.search_url).format(params, self.vertoken)
search_link = self.server_url + text_type(self.search_url).format(params)
r = self.yunsuo_bypass(search_link, timeout=30)
r.raise_for_status()
@ -198,7 +225,7 @@ class ZimukuProvider(Provider):
while parts:
parts.reverse()
redirect_url = urljoin(self.server_url, "".join(parts))
r = self.query_resp(redirect_url, timeout=30)
r = self.session.get(redirect_url, timeout=30)
html = r.content.decode("utf-8", "ignore")
parts = re.findall(pattern, html)
logger.debug("search url located: " + redirect_url)
@ -267,26 +294,22 @@ class ZimukuProvider(Provider):
return subtitles
def download_subtitle(self, subtitle):
def _get_archive_dowload_link(yunsuopass, sub_page_link):
r = yunsuopass(sub_page_link)
def _get_archive_download_link(yunsuopass, sub_page_link):
res = yunsuopass(sub_page_link)
bs_obj = ParserBeautifulSoup(
r.content.decode("utf-8", "ignore"), ["html.parser"]
res.content.decode("utf-8", "ignore"), ["html.parser"]
)
down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"]
down_page_link = urljoin(sub_page_link, down_page_link)
r = yunsuopass(down_page_link)
res = yunsuopass(down_page_link)
bs_obj = ParserBeautifulSoup(
r.content.decode("utf-8", "ignore"), ["html.parser"]
res.content.decode("utf-8", "ignore"), ["html.parser"]
)
download_link = bs_obj.find("a", {"rel": "nofollow"})
download_link = download_link.attrs["href"]
download_link = urljoin(sub_page_link, download_link)
return download_link
return urljoin(sub_page_link, bs_obj.find("a", {"rel": "nofollow"}).attrs["href"])
# download the subtitle
logger.info("Downloading subtitle %r", subtitle)
self.session = subtitle.session
download_link = _get_archive_dowload_link(self.yunsuo_bypass, subtitle.page_link)
download_link = _get_archive_download_link(self.yunsuo_bypass, subtitle.page_link)
r = self.yunsuo_bypass(download_link, headers={'Referer': subtitle.page_link}, timeout=30)
r.raise_for_status()
try:
@ -404,7 +427,7 @@ def _extract_name(name):
result = [start, end]
start = end
end += 1
new_name = name[result[0] : result[1]]
new_name = name[result[0]: result[1]]
new_name = new_name.strip() + suffix
return new_name
@ -413,7 +436,7 @@ def num_to_cn(number):
""" convert numbers(1-99) to Chinese """
assert number.isdigit() and 1 <= int(number) <= 99
trans_map = {n: c for n, c in zip(("123456789"), ("一二三四五六七八九"))}
trans_map = {n: c for n, c in zip("123456789", "一二三四五六七八九")}
if len(number) == 1:
return trans_map[number]

View File

@ -8,7 +8,8 @@ class SZFileBackend(CacheBackend):
def __init__(self, arguments):
self._cache = FileCache(arguments.pop("appname", None), flag=arguments.pop("flag", "c"),
serialize=arguments.pop("serialize", True),
app_cache_dir=arguments.pop("app_cache_dir", None))
app_cache_dir=arguments.pop("app_cache_dir", None),
mode=False)
def get(self, key):
value = self._cache.get(key, NO_VALUE)

View File

@ -1,17 +1,17 @@
from .utils import (
Result,
get_fld,
get_tld,
get_tld_names,
is_tld,
parse_tld,
Result,
update_tld_names,
)
__title__ = "tld"
__version__ = "0.12.6"
__version__ = "0.13"
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"get_fld",

View File

@ -1,16 +1,13 @@
from codecs import open as codecs_open
import logging
from codecs import open as codecs_open
from typing import Dict, ItemsView, Optional, Union
from urllib.request import urlopen
from typing import Optional, Dict, Union, ItemsView
from .exceptions import (
TldIOError,
TldImproperlyConfigured,
)
from .exceptions import TldImproperlyConfigured, TldIOError
from .helpers import project_dir
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"BaseTLDSourceParser",
@ -98,17 +95,15 @@ class BaseTLDSourceParser(metaclass=Registry):
try:
remote_file = urlopen(cls.source_url)
local_file_abs_path = project_dir(cls.local_path)
local_file = codecs_open(
local_file_abs_path, "wb", encoding="utf8"
)
local_file = codecs_open(local_file_abs_path, "wb", encoding="utf8")
local_file.write(remote_file.read().decode("utf8"))
local_file.close()
remote_file.close()
LOGGER.debug(
LOGGER.info(
f"Fetched '{cls.source_url}' as '{local_file_abs_path}'"
)
except Exception as err:
LOGGER.debug(
LOGGER.error(
f"Failed fetching '{cls.source_url}'. Reason: {str(err)}"
)
if fail_silently:

View File

@ -1,8 +1,9 @@
from typing import Any
from . import defaults
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2020 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"get_setting",

View File

@ -1,7 +1,7 @@
from os.path import dirname
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2020 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"DEBUG",

View File

@ -1,5 +1,5 @@
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"TldBadUrl",

View File

@ -3,7 +3,7 @@ from os.path import abspath, join
from .conf import get_setting
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"project_dir",

View File

@ -1,8 +1,9 @@
import warnings
from .base import Registry
from .base import Registry # noqa
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = ("Registry",)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@ from typing import Any, Dict
from urllib.parse import SplitResult
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = ("Result",)

View File

@ -1,8 +0,0 @@
import unittest
from .test_core import *
from .test_commands import *
if __name__ == "__main__":
unittest.main()

View File

@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-
from functools import lru_cache
import logging
import socket
from functools import lru_cache
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"internet_available_only",

View File

@ -1,14 +1,13 @@
# -*- coding: utf-8 -*-
import logging
import subprocess
import unittest
import subprocess
from .base import log_info, internet_available_only
from .base import internet_available_only, log_info
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "GPL 2.0/LGPL 2.1"
__all__ = ("TestCommands",)

View File

@ -2,12 +2,11 @@
import copy
import logging
from os.path import abspath, join
import unittest
from os.path import abspath, join
from tempfile import gettempdir
from typing import Type
from urllib.parse import urlsplit, SplitResult
from urllib.parse import SplitResult, urlsplit
from faker import Faker # type: ignore
@ -22,23 +21,22 @@ from ..exceptions import (
)
from ..helpers import project_dir
from ..utils import (
BaseMozillaTLDSourceParser,
MozillaTLDSourceParser,
get_fld,
get_tld,
get_tld_names,
get_tld_names_container,
is_tld,
MozillaTLDSourceParser,
BaseMozillaTLDSourceParser,
parse_tld,
reset_tld_names,
update_tld_names,
update_tld_names_cli,
)
from .base import internet_available_only, log_info
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = ("TestCore",)
@ -647,9 +645,7 @@ class TestCore(unittest.TestCase):
Assert raise TldIOError on wrong `NAMES_SOURCE_URL` for `parse_tld`.
"""
parser_class = self.get_custom_parser_class(
source_url="i-do-not-exist"
)
parser_class = self.get_custom_parser_class(source_url="i-do-not-exist")
parsed_tld = parse_tld(
self.bad_url, fail_silently=False, parser_class=parser_class
)
@ -810,9 +806,7 @@ class TestCore(unittest.TestCase):
"""Test len of the trie nodes."""
get_tld("http://delusionalinsanity.com")
tld_names = get_tld_names_container()
self.assertGreater(
len(tld_names[MozillaTLDSourceParser.local_path]), 0
)
self.assertGreater(len(tld_names[MozillaTLDSourceParser.local_path]), 0)
@log_info
def test_25_get_tld_names_no_arguments(self):
@ -842,3 +836,16 @@ class TestCore(unittest.TestCase):
fragment="",
),
)
@log_info
def test_27_tld_fail_silently_pass(self):
"""Test `get_tld` bad URL patterns that would raise exception
if `fail_silently` isn't `True`.
"""
res = []
bad_url = ["https://user:password[@host.com", "https://user[@host.com"]
for url in bad_url:
_res = get_tld(url, fail_silently=True)
self.assertEqual(_res, None)
res.append(_res)
return res

View File

@ -1,7 +1,7 @@
import unittest
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = ("TestRegistry",)
@ -11,4 +11,4 @@ class TestRegistry(unittest.TestCase):
def test_import_from_registry(self):
"""Test import from deprecated `valuta.registry` module."""
from ..registry import Registry
from ..registry import Registry # noqa

View File

@ -1,5 +1,5 @@
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"Trie",

View File

@ -1,13 +1,12 @@
from __future__ import unicode_literals
import argparse
import sys
from codecs import open as codecs_open
from functools import lru_cache
# codecs_open = open
from os.path import isabs
import sys
from typing import Dict, Type, Union, Tuple, List, Optional
from urllib.parse import urlsplit, SplitResult
from typing import Dict, List, Optional, Tuple, Type, Union
from urllib.parse import SplitResult, urlsplit
from .base import BaseTLDSourceParser, Registry
from .exceptions import (
@ -17,11 +16,14 @@ from .exceptions import (
TldIOError,
)
from .helpers import project_dir
from .trie import Trie
from .result import Result
from .trie import Trie
# codecs_open = open
__author__ = "Artur Barseghyan"
__copyright__ = "2013-2021 Artur Barseghyan"
__copyright__ = "2013-2023 Artur Barseghyan"
__license__ = "MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later"
__all__ = (
"BaseMozillaTLDSourceParser",
@ -132,9 +134,7 @@ def update_tld_names_cli() -> int:
parser_uid = args.parser_uid
fail_silently = args.fail_silently
return int(
not update_tld_names(
parser_uid=parser_uid, fail_silently=fail_silently
)
not update_tld_names(parser_uid=parser_uid, fail_silently=fail_silently)
)
@ -229,7 +229,7 @@ class BaseMozillaTLDSourceParser(BaseTLDSourceParser):
update_tld_names_container(cls.local_path, trie)
local_file.close()
except IOError as err:
except IOError:
# Grab the file
cls.update_tld_names(fail_silently=fail_silently)
# Increment ``retry_count`` in order to avoid infinite loops
@ -314,20 +314,14 @@ def process_url(
parsed_url = urlsplit(url)
except ValueError as e:
if fail_silently:
parsed_url = url
return None, None, url
else:
raise e
else:
parsed_url = url
# Get (sub) domain name
try:
domain_name = parsed_url.hostname
except AttributeError as e:
if fail_silently:
domain_name = None
else:
raise e
domain_name = parsed_url.hostname
if not domain_name:
if fail_silently:

8
libs/trakit/__init__.py Normal file
View File

@ -0,0 +1,8 @@
__title__ = 'trakit'
__version__ = '0.2.1'
__short_version__ = '0.2'
__author__ = 'RatoAQ'
__license__ = 'MIT'
__url__ = 'https://github.com/ratoaq2/trakit'
from .api import TrakItApi, trakit

108
libs/trakit/__main__.py Normal file
View File

@ -0,0 +1,108 @@
import argparse
import json
import logging
import sys
import typing
import babelfish
from trakit import TrakItApi, __version__
logging.basicConfig(stream=sys.stdout, format='%(message)s')
logging.getLogger('CONSOLE').setLevel(logging.INFO)
logging.getLogger('trakit').setLevel(logging.WARNING)
console = logging.getLogger('CONSOLE')
logger = logging.getLogger('trakit')
def build_argument_parser() -> argparse.ArgumentParser:
"""Build the argument parser."""
opts = argparse.ArgumentParser()
opts.add_argument(
dest='value',
help='track title to guess',
type=str,
)
conf_opts = opts.add_argument_group('Configuration')
conf_opts.add_argument(
'-l',
'--expected-language',
dest='expected_language',
help='The expected language to be guessed',
type=str,
)
output_opts = opts.add_argument_group('Output')
output_opts.add_argument(
'--debug',
action='store_true',
dest='debug',
help='Print information for debugging trakit and for reporting bugs.'
)
output_opts.add_argument(
'-y',
'--yaml',
action='store_true',
dest='yaml',
help='Display output in yaml format'
)
information_opts = opts.add_argument_group('Information')
information_opts.add_argument('--version', action='version', version=__version__)
return opts
def _as_yaml(value: str, info: typing.Mapping[str, typing.Any]) -> str:
"""Convert info to string using YAML format."""
import yaml
def default_representer(r: yaml.representer.SafeRepresenter, data: typing.Any):
return r.represent_scalar('tag:yaml.org,2002:str', str(data))
yaml.representer.SafeRepresenter.add_representer(babelfish.Language, default_representer)
return yaml.safe_dump({value: dict(info)}, allow_unicode=True, sort_keys=False)
def _as_json(info: typing.Mapping[str, typing.Any]) -> str:
"""Convert info to string using JSON format."""
return json.dumps(info, ensure_ascii=False, indent=2, default=str)
def dump(value: str, info: typing.Mapping[str, typing.Any], opts: argparse.Namespace) -> str:
"""Convert info to string using json or yaml format."""
if opts.yaml:
return _as_yaml(value, info)
return _as_json(info)
def trakit(value: str, opts: argparse.Namespace) -> typing.Mapping:
"""Extract video metadata."""
if not opts.yaml:
console.info('Parsing: %s', value)
options = {k: v for k, v in vars(opts).items() if v is not None}
info = TrakItApi().trakit(value, options)
console.info('TrakIt %s found: ', __version__)
console.info(dump(value, info, opts))
return info
def main(args: typing.Optional[typing.List[str]] = None):
"""Execute main function for entry point."""
argument_parser = build_argument_parser()
args = args or sys.argv[1:]
opts = argument_parser.parse_args(args)
if opts.debug:
logger.setLevel(logging.DEBUG)
logging.getLogger('rebulk').setLevel(logging.DEBUG)
return trakit(opts.value, opts)
if __name__ == '__main__':
main(sys.argv[1:])

24
libs/trakit/api.py Normal file
View File

@ -0,0 +1,24 @@
import typing
from trakit.config import Config
from trakit.context import Context
from trakit.patterns import configure
class TrakItApi:
def __init__(self, config: typing.Optional[typing.Mapping[str, typing.Any]] = None):
self.rebulk = configure(Config(config))
def trakit(self, string: str, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
"""Return a mapping of extracted information."""
matches = self.rebulk.matches(string, Context(options))
guess: typing.Mapping[str, typing.Any] = matches.to_dict()
return guess
default_api = TrakItApi()
def trakit(string: str, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
return default_api.trakit(string, options)

19
libs/trakit/config.py Normal file
View File

@ -0,0 +1,19 @@
import json
import typing
from pkg_resources import resource_stream
class Config:
def __init__(self, config: typing.Optional[typing.Mapping[str, typing.Any]]):
with resource_stream('trakit', 'data/config.json') as f:
cfg: typing.Dict[str, typing.Any] = json.load(f)
if config:
cfg.update(config)
self.ignored: typing.Set[str] = set(cfg.get('ignored', []))
self.countries: typing.Mapping[str, str] = cfg.get('countries', {})
self.languages: typing.Mapping[str, str] = cfg.get('languages', {})
self.scripts: typing.Mapping[str, str] = cfg.get('scripts', {})
self.regions: typing.Mapping[str, str] = cfg.get('regions', {})
self.implicit_languages: typing.Mapping[str, str] = cfg.get('implicit-languages', {})

22
libs/trakit/context.py Normal file
View File

@ -0,0 +1,22 @@
import typing
import babelfish
class Context(dict):
def __init__(self, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
super().__init__(options or {})
language = self['expected_language'] if 'expected_language' in self else None
if language and not isinstance(language, babelfish.Language):
language = babelfish.Language.fromietf(str(language))
self.expected_language: typing.Optional[babelfish.Language] = language
def accept(self, lang: babelfish.Language):
if self.expected_language is None:
return True
if self.expected_language.alpha3 != lang.alpha3:
return False
if self.expected_language.script and self.expected_language != lang.script:
return False
return not self.expected_language.country or self.expected_language == lang.country

View File

View File

@ -0,0 +1,32 @@
import typing
from babelfish import Country, CountryReverseConverter, CountryReverseError
from babelfish.converters import CaseInsensitiveDict
class GuessCountryConverter(CountryReverseConverter):
def __init__(self, config: typing.Mapping[str, str]):
self.synonyms = CaseInsensitiveDict(config)
def convert(self, alpha2):
return str(Country(alpha2))
def reverse(self, name: str):
try:
return self.synonyms[name]
except KeyError:
pass
if name.isupper() and len(name) == 2:
try:
return Country(name).alpha2
except ValueError:
pass
for conv in (Country.fromname,):
try:
return conv(name).alpha2
except CountryReverseError:
pass
raise CountryReverseError(name)

View File

@ -0,0 +1,30 @@
import typing
from babelfish import Language, LanguageReverseConverter, LanguageReverseError
from babelfish.converters import CaseInsensitiveDict
class GuessLanguageConverter(LanguageReverseConverter):
def __init__(self, config: typing.Mapping[str, str]):
self.synonyms = CaseInsensitiveDict()
for synonym, code in config.items():
lang = Language.fromietf(code) if '-' in code else Language(code)
self.synonyms[synonym] = (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script)
def convert(self, alpha3: str, country=None, script=None):
return str(Language(alpha3, country, script))
def reverse(self, name: str):
try:
return self.synonyms[name]
except KeyError:
pass
for conv in (Language.fromname,):
try:
reverse = conv(name)
return reverse.alpha3, reverse.country, reverse.script
except (ValueError, LanguageReverseError):
pass
raise LanguageReverseError(name)

View File

@ -0,0 +1,860 @@
{
"countries": {
"Afghan": "AF",
"Aforika Borwa": "ZA",
"Afrika Borwa": "ZA",
"Afrika Dzonga": "ZA",
"Afurika Tshipembe": "ZA",
"Aland": "AX",
"Alandish": "AX",
"Albanian": "AL",
"Algerian": "DZ",
"American": "US",
"American Islander": "UM",
"American Samoan": "AS",
"American Virgin Islander": "VI",
"Andorran": "AD",
"Angolan": "AO",
"Anguillian": "AI",
"Antarctican": "AQ",
"Antiguan Barbudan": "AG",
"Ao Men": "MO",
"Aotearoa": "NZ",
"Argentine": "AR",
"Armenian": "AM",
"Aruban": "AW",
"Australian": "AU",
"Austrian": "AT",
"Ayiti": "HT",
"Azerbaidzhan": "AZ",
"Azerbaijani": "AZ",
"Azərbaycan": "AZ",
"Bahamian": "BS",
"Bahraini": "BH",
"Bangladeshi": "BD",
"Barbadian": "BB",
"Beafrika": "CF",
"Belarusian": "BY",
"Belau": "PW",
"Belgian": "BE",
"Belgie": "BE",
"Belgien": "BE",
"Belgique": "BE",
"België": "BE",
"Belice": "BZ",
"Belizean": "BZ",
"Beninese": "BJ",
"Bermudian": "BM",
"Bhutanese": "BT",
"Blgariia": "BG",
"Bolivia": "BO",
"Bolivian": "BO",
"Boneiru Sint Eustatius y Saba": "BQ",
"Bosna i Hercegovina": "BA",
"Bosna i Khertsegovina": "BA",
"Bosnian Herzegovinian": "BA",
"Bouvetoya": "BV",
"Bouvetøya": "BV",
"Brasil": "BR",
"Brazilian": "BR",
"British": "GB",
"British Virgin Islander": "VG",
"British Virgin Islands": "VG",
"Bruneian": "BN",
"Bulgarian": "BG",
"Buliwya": "BO",
"Burkinabe": "BF",
"Burmese": "MM",
"Burundian": "BI",
"Bénin": "BJ",
"Bêafrîka": "CF",
"Cabo Verde": "CV",
"Cambodian": "KH",
"Cameroonian": "CM",
"Cameroun": "CM",
"Canadian": "CA",
"Cape Verdian": "CV",
"Caribisch Nederland": "BQ",
"Caymanian": "KY",
"Central African": "CF",
"Cesko": "CZ",
"Chadian": "TD",
"Channel Islander": "JE",
"Chilean": "CL",
"Chinese": "CN",
"Christmas Islander": "CX",
"Cocos Islander": "CC",
"Cocos Keeling Islands": "CC",
"Colombian": "CO",
"Comoran": "KM",
"Comores": "KM",
"Congolese": "CD",
"Cook Islander": "CK",
"Costa Rican": "CR",
"Cote dIvoire": "CI",
"Croatian": "HR",
"Cuban": "CU",
"Curacao": "CW",
"Curacaoan": "CW",
"Curaçaoan": "CW",
"Cypriot": "CY",
"Czech": "CZ",
"Côte dIvoire": "CI",
"Danish": "DK",
"Danmark": "DK",
"Deutschland": "DE",
"Dgernesiais": "GG",
"Dgèrnésiais": "GG",
"Ditunga dia Kongu wa Mungalaata": "CD",
"Dominican": "DO",
"Dutch": "NL",
"East Timorese": "TL",
"Ecuadorean": "EC",
"Eesti": "EE",
"Egyptian": "EG",
"Eire": "IE",
"Ellada": "GR",
"Emirati": "AE",
"Equatorial Guinean": "GQ",
"Eritrean": "ER",
"Espana": "ES",
"España": "ES",
"Estados Unidos": "US",
"Estonian": "EE",
"Eswatini": "SZ",
"Ethiopian": "ET",
"Faereyjar": "FO",
"Faeroerne": "FO",
"Falkland Islander": "FK",
"Falkland Islands": "FK",
"Faroese": "FO",
"Fijian": "FJ",
"Filipino": "PH",
"Finnish": "FI",
"Foroyar": "FO",
"French": "FR",
"French Polynesian": "PF",
"Færeyjar": "FO",
"Færøerne": "FO",
"Føroyar": "FO",
"Gabonese": "GA",
"Gambian": "GM",
"Georgian": "GE",
"German": "DE",
"Ghanaian": "GH",
"Greek": "GR",
"Greenlandic": "GL",
"Grenadian": "GD",
"Guadeloupian": "GP",
"Guahan": "GU",
"Guamanian": "GU",
"Guatemalan": "GT",
"Guernesey": "GG",
"Guianan": "GF",
"Guine Bissau": "GW",
"Guine Equatorial": "GQ",
"Guinea Bissauan": "GW",
"Guinea Ecuatorial": "GQ",
"Guinean": "GN",
"Guinee": "GN",
"Guinee equatoriale": "GQ",
"Guiné Bissau": "GW",
"Guiné Equatorial": "GQ",
"Guinée": "GN",
"Guinée équatoriale": "GQ",
"Guyane francaise": "GF",
"Guyane française": "GF",
"Guyanese": "GY",
"Guåhån": "GU",
"Haitian": "HT",
"Hayastan": "AM",
"Haïti": "HT",
"Heard and McDonald Islander": "HM",
"Honduran": "HN",
"Hong Konger": "HK",
"Hrvatska": "HR",
"Hungarian": "HU",
"I Kiribati": "KI",
"Icelander": "IS",
"Indian": "IN",
"Indonesian": "ID",
"Iranian": "IR",
"Iraqi": "IQ",
"Irish": "IE",
"Island": "IS",
"Israeli": "IL",
"Italia": "IT",
"Italian": "IT",
"Ivorian": "CI",
"Jamaican": "JM",
"Jamhuri ya Kidemokrasia ya Kongo": "CD",
"Japanese": "JP",
"Jerri": "JE",
"Jordanian": "JO",
"Jèrri": "JE",
"Kalaallit Nunaat": "GL",
"Kampuchea": "KH",
"Kazakhstani": "KZ",
"Kazakstan": "KZ",
"Kenyan": "KE",
"Kibris": "CY",
"Kirghiz": "KG",
"Kirgiziia": "KG",
"Kittitian or Nevisian": "KN",
"Komori": "KM",
"Kuki Airani": "CK",
"Kupros": "CY",
"Kuwaiti": "KW",
"Kâmpŭchéa": "KH",
"Kıbrıs": "CY",
"Kūki Āirani": "CK",
"La Reunion": "RE",
"La Réunion": "RE",
"Laotian": "LA",
"Latvian": "LV",
"Latvija": "LV",
"Lebanese": "LB",
"Letzebuerg": "LU",
"Liban": "LB",
"Liberian": "LR",
"Libyan": "LY",
"Liechtensteiner": "LI",
"Lietuva": "LT",
"Lithuanian": "LT",
"Luxembourger": "LU",
"Luxemburg": "LU",
"Lëtzebuerg": "LU",
"Macanese": "MO",
"Macau": "MO",
"Macedonian": "MK",
"Madagasikara": "MG",
"Magyarorszag": "HU",
"Magyarország": "HU",
"Mahoran": "YT",
"Majel": "MH",
"Makedonija": "MK",
"Makedonski": "MK",
"Malagasy": "MG",
"Malawian": "MW",
"Malaysian": "MY",
"Malaŵi": "MW",
"Maldivan": "MV",
"Malian": "ML",
"Maltese": "MT",
"Mannin": "IM",
"Manx": "IM",
"Marshallese": "MH",
"Martinican": "MQ",
"Maurice": "MU",
"Mauritanian": "MR",
"Mauritian": "MU",
"Mexican": "MX",
"Micronesia": "FM",
"Micronesian": "FM",
"Mocambique": "MZ",
"Moldova": "MD",
"Moldovan": "MD",
"Monegasque": "MC",
"Mongol uls": "MN",
"Mongolian": "MN",
"Montenegrin": "ME",
"Montserratian": "MS",
"Moris": "MU",
"Moroccan": "MA",
"Mosotho": "LS",
"Motswana": "BW",
"Mozambican": "MZ",
"Moçambique": "MZ",
"Mzantsi Afrika": "ZA",
"México": "MX",
"M̧ajeļ": "MH",
"Na Islas Marianas": "MP",
"Na Islas Mariånas": "MP",
"Namibian": "NA",
"Namibie": "NA",
"Namibië": "NA",
"Nauruan": "NR",
"Nederland": "NL",
"Negara Brunei Darussalam": "BN",
"Nepalese": "NP",
"New Caledonian": "NC",
"New Zealander": "NZ",
"Ni Vanuatu": "VU",
"Nicaraguan": "NI",
"Nigerian": "NG",
"Nigerien": "NE",
"Ningizimu Afrika": "ZA",
"Niuean": "NU",
"Niuē": "NU",
"Noreg": "NO",
"Norfk Ailen": "NF",
"Norfolk Islander": "NF",
"Norge": "NO",
"Norgga": "NO",
"North Korean": "KP",
"Norwegian": "NO",
"Nouvelle Caledonie": "NC",
"Nouvelle Calédonie": "NC",
"Omani": "OM",
"Osterreich": "AT",
"Owganystan": "AF",
"Ozbekiston": "UZ",
"Ozbekiston": "UZ",
"Pais Korsou": "CW",
"Pais Kòrsou": "CW",
"Pakistani": "PK",
"Palauan": "PW",
"Palestinian": "PS",
"Panamanian": "PA",
"Panamá": "PA",
"Papua New Guinean": "PG",
"Papua Niu Gini": "PG",
"Papua Niugini": "PG",
"Paraguai": "PY",
"Paraguayan": "PY",
"Paraguái": "PY",
"Peruvian": "PE",
"Perú": "PE",
"Pilipinas": "PH",
"Piruw": "PE",
"Pitcairn Islander": "PN",
"Pitcairn Islands": "PN",
"Polish": "PL",
"Polska": "PL",
"Polynesie francaise": "PF",
"Polynésie française": "PF",
"Portuguese": "PT",
"Puerto Rican": "PR",
"Qatari": "QA",
"RD Congo": "CD",
"Repubilika ya Kongo": "CG",
"Repubilika ya Kongo Demokratiki": "CD",
"Republica Dominicana": "DO",
"Republiki ya Kongo": "CG",
"Republiki ya Kongo Demokratiki": "CD",
"Republiki ya Kongó Demokratiki": "CD",
"Republique centrafricaine": "CF",
"Republique du Congo": "CG",
"Republíki ya Kongó": "CG",
"República Dominicana": "DO",
"Reunionese": "RE",
"Ri Ben": "JP",
"Romanian": "RO",
"România": "RO",
"Rossiia": "RU",
"Russian": "RU",
"Rwandan": "RW",
"République centrafricaine": "CF",
"République du Congo": "CG",
"Réunionese": "RE",
"Sahara Occidental": "EH",
"Sahrawi": "EH",
"Saint Barthelemy": "BL",
"Saint Barthelemy Islander": "BL",
"Saint Barthélemy Islander": "BL",
"Saint Helena Ascension and Tristan da Cunha": "SH",
"Saint Helenian": "SH",
"Saint Lucian": "LC",
"Saint Martin": "MF",
"Saint Martin Islander": "MF",
"Saint Pierrais Miquelonnais": "PM",
"Saint Pierre et Miquelon": "PM",
"Saint Vincentian": "VC",
"Salvadoran": "SV",
"Sammarinese": "SM",
"Samoa Amelika": "AS",
"Samoan": "WS",
"Sao Tome e Principe": "ST",
"Sao Tomean": "ST",
"Saudi Arabian": "SA",
"Schweiz": "CH",
"Senegalese": "SN",
"Serbian": "RS",
"Sesel": "SC",
"Sewula Afrika": "ZA",
"Seychellois": "SC",
"Shqiperia": "AL",
"Shqipëria": "AL",
"Sierra Leonean": "SL",
"Singaporean": "SG",
"Singapura": "SG",
"Sint Maarten": "SX",
"Slovak": "SK",
"Slovene": "SI",
"Slovenija": "SI",
"Slovensko": "SK",
"Solomon Islander": "SB",
"Somali": "SO",
"Soomaaliya": "SO",
"South African": "ZA",
"South Georgia": "GS",
"South Georgian South Sandwich Islander": "GS",
"South Korean": "KR",
"South Sudanese": "SS",
"Spanish": "ES",
"Srbija": "RS",
"Sri Lankan": "LK",
"St Maartener": "SX",
"Sudanese": "SD",
"Suisse": "CH",
"Suomi": "FI",
"Surinamer": "SR",
"Svalbard og Jan Mayen": "SJ",
"Sverige": "SE",
"Svizra": "CH",
"Svizzera": "CH",
"Swazi": "SZ",
"Swedish": "SE",
"Swiss": "CH",
"Syrian": "SY",
"São Tomé e Príncipe": "ST",
"Sénégal": "SN",
"Sāmoa": "WS",
"Sāmoa Amelika": "AS",
"Tadzhik": "TJ",
"Tadzhikistan": "TJ",
"Tai Wan": "TW",
"Taiwanese": "TW",
"Tanzania": "TZ",
"Tanzanian": "TZ",
"Tchad": "TD",
"Terres australes et antarctiques francaises": "TF",
"Terres australes et antarctiques françaises": "TF",
"Thai": "TH",
"Timor Leste": "TL",
"Timór Leste": "TL",
"Tochikiston": "TJ",
"Togolese": "TG",
"Tokelauan": "TK",
"Tongan": "TO",
"Trinidadian": "TT",
"Tsrna Gora": "ME",
"Tunisian": "TN",
"Turkish": "TR",
"Turkiye": "TR",
"Turkmen": "TM",
"Turkmeniia": "TM",
"Turks and Caicos Islander": "TC",
"Tuvaluan": "TV",
"Türkiye": "TR",
"Türkmenistan": "TM",
"UK": "GB",
"US": "US",
"Uburundi": "BI",
"Ugandan": "UG",
"Ukrainian": "UA",
"Ukrayina": "UA",
"United States Virgin Islands": "VI",
"Uruguayan": "UY",
"Uzbekistani": "UZ",
"Vatican": "VA",
"Vaticanae": "VA",
"Vaticano": "VA",
"Vaticanæ": "VA",
"Venezuela": "VE",
"Venezuelan": "VE",
"Vietnam": "VN",
"Vietnamese": "VN",
"Viti": "FJ",
"Việt Nam": "VN",
"Volivia": "BO",
"Volívia": "BO",
"Wallis and Futuna Islander": "WF",
"Wallis et Futuna": "WF",
"Wuliwya": "BO",
"Xiang Gang": "HK",
"Xin Jia Po": "SG",
"Yemeni": "YE",
"Zambian": "ZM",
"Zhong Guo": "CN",
"Zhong Guo Da Lu": "CN",
"Zimbabwean": "ZW",
"`mn": "OM",
"baaNlaadesh": "BD",
"bbaart nuuN": "IN",
"bhaart": "IN",
"brug-yul-": "BT",
"canadien": "CA",
"cingkppuur": "SG",
"dhivehiraajeyge": "MV",
"eSwatini": "SZ",
"eereteraa": "ER",
"fGnstn": "AF",
"flsTyn": "PS",
"hangug": "KR",
"ilngkai": "LK",
"intiyaa": "IN",
"joseon": "KP",
"jybwty": "DJ",
"khoemry": "IQ",
"lSwml": "SO",
"l`rq": "IQ",
"lbHryn": "BH",
"lbnn": "LB",
"ljzyr": "DZ",
"lkwyt": "KW",
"lmGrb": "MA",
"lqmr": "KM",
"lrdn": "JO",
"lswdn": "SD",
"lyaman": "YE",
"lyby": "LY",
"mSr": "EG",
"mlysy": "MY",
"mnmaa": "MM",
"mwrytny": "MR",
"nepaal": "NP",
"phijii": "FJ",
"pkstn": "PK",
"praethsaithy": "TH",
"qTr": "QA",
"qwutnA": "IQ",
"rtry": "ER",
"sak`art`velo": "GE",
"shrii lNkaav": "LK",
"spplaaw": "LA",
"sryyl": "IL",
"swry": "SY",
"teyopheyaa": "ET",
"tshd": "TD",
"twns": "TN",
"ySHrAl": "IL",
"yrn": "IR",
"Åland": "AX",
"Ålandish": "AX",
"Éire": "IE",
"Ísland": "IS",
"Österreich": "AT",
"Česko": "CZ",
"Ελλάδα": "GR",
"Κύπρος": "CY",
"Азербайджан": "AZ",
"Белару́сь": "BY",
"Беларусь": "BY",
оснa и Херцеговина": "BA",
"България": "BG",
"Казахстан": "KZ",
"Киргизия": "KG",
"Кыргызстан": "KG",
"Македонија": "MK",
"Македонски": "MK",
"Монгол улс": "MN",
"Россия": "RU",
"Србија": "RS",
"Таджикистан": "TJ",
"Тоҷикистон": "TJ",
"Туркмения": "TM",
"Узбекистан": "UZ",
"Україна": "UA",
"Црна Гора": "ME",
"Қазақстан": "KZ",
"Հայաստան": "AM",
"ישראל": "IL",
"إرتريا‎": "ER",
"إسرائيل": "IL",
"افغانستان": "AF",
"الأردن": "JO",
"البحرين": "BH",
"الجزائر": "DZ",
"السعودية": "SA",
"السودان": "SD",
"الصحراء الغربية": "EH",
"الصومال‎‎": "SO",
"العراق": "IQ",
"العربية السعودية": "SA",
"القمر‎": "KM",
"الكويت": "KW",
"المغرب": "MA",
"اليَمَن": "YE",
"ایران": "IR",
"تشاد‎": "TD",
"تونس": "TN",
"جيبوتي‎": "DJ",
"دولة الإمارات العربية المتحدة": "AE",
"سوريا": "SY",
"عمان": "OM",
"فلسطين": "PS",
"قطر": "QA",
"لبنان": "LB",
"ليبيا": "LY",
"مصر": "EG",
"مليسيا": "MY",
"موريتانيا": "MR",
"پاكستان": "PK",
"کۆماری": "IQ",
"ܩܘܼܛܢܵܐ": "IQ",
"ދިވެހިރާއްޖޭގެ": "MV",
"नेपाल": "NP",
"फिजी": "FJ",
"भारत": "IN",
"বাংলাদেশ": "BD",
"ভারত": "IN",
"ਭਾਰਤ ਨੂੰ": "IN",
"இந்தியா": "IN",
"இலங்கை": "LK",
"சிங்கப்பூர்": "SG",
"ශ්‍රී ලංකාව": "LK",
"ประเทศไทย": "TH",
"ສປປລາວ": "LA",
"འབྲུག་ཡུལ་": "BT",
"မြန်မာ": "MM",
"საქართველო": "GE",
"ኢትዮጵያ": "ET",
"ኤርትራ": "ER",
"ⵍⵎⴰⵖⵔⵉⴱ": "MA",
"中国": "CN",
"中国大陆": "CN",
"台灣": "TW",
"新加坡": "SG",
"日本": "JP",
"澳门": "MO",
"香港": "HK",
"조선": "KP",
"한국": "KR"
},
"ignored": [
"bit",
"cc",
"ch",
"dan",
"day",
"gun",
"hr",
"jordan",
"la",
"ma",
"na",
"the",
"to"
],
"implicit-languages": {
"419": "es-419",
"BR": "pt-BR",
"CA": "fr-CA",
"Cantonese": "zh",
"Castilian": "es",
"FR": "fr-FR",
"GR": "ell",
"HK": "zh-HK",
"ID": "id-ID",
"Mandarin": "zh",
"Parisian": "fr-FR",
"Simplified": "zh-Hans",
"Traditional": "zh-Hant",
"UA": "uk-UA",
"UK": "en-GB",
"US": "en-US",
"VFF": "fr-FR",
"VFQ": "fr-CA",
"VN": "vie",
"cant": "zh",
"eng": "en",
"ita": "it",
"简体双语": "zh-Hans",
"繁体双语": "zh-Hant"
},
"languages": {
"Adygebze": "ady",
"Avanee": "grn",
"Avañeẽ": "grn",
"Aymar aru": "aym",
"Azərbaycan dili": "aze",
"Bahasa Indonesia": "ind",
"Bahasa Melayu": "msa",
"Basa Jawa": "jav",
"Basa Sunda": "sun",
"Belaruskaia": "bel",
"Blgarski": "bul",
"Bosanski": "bos",
"Brezhoneg": "bre",
"Catala": "cat",
"Català": "cat",
"Cestina": "ces",
"Cymraeg": "cym",
"Dansk": "dan",
"Davvisamegiella": "sme",
"Davvisámegiella": "sme",
"Deutsch": "deu",
"Dolnoserbscina": "dsb",
"Dolnoserbšćina": "dsb",
"Eesti": "est",
"Ellenika": "ell",
"Espanol": "spa",
"Espanol Latinoamerica": "es-419",
"Español": "spa",
"Español Latinoamérica": "es-419",
"Euskara": "eus",
"Foroyskt": "fao",
"Francais": "fra",
"Français": "fra",
"Frysk": "fry",
"Føroyskt": "fao",
"Gaeilge": "gle",
"Gaelg": "glv",
"Gaidhlig": "gla",
"Galego": "glg",
"Greek": "ell",
"Guang Dong Hua ": "zho",
"Gàidhlig": "gla",
"Hayeren": "hye",
"Hornjoserbscina": "hsb",
"Hornjoserbšćina": "hsb",
"Hrvatski": "hrv",
"Islenska": "isl",
"Italiano": "ita",
"Kazaksha": "kaz",
"Kernewek": "cor",
"Kiswahili": "swa",
"Kreyol": "hat",
"Kreyòl": "hat",
"Kurdi": "kur",
"Kurdî": "kur",
"Latviesu": "lav",
"Latviešu": "lav",
"Lemborgs": "lim",
"Letzebuergesch": "ltz",
"Lietuviu": "lit",
"Lietuvių": "lit",
"Lwo": "ach",
"Lèmbörgs": "lim",
"Lëtzebuergesch": "ltz",
"Magyar": "hun",
"Makedonski": "mkd",
"Malay": "msa",
"Malti": "mlt",
"Maya Kaqchikel": "cak",
"Melayu": "msa",
"Mongol": "mon",
"Nederlands": "nld",
"Norsk": "nor",
"Norsk bokmal": "nob",
"Norsk bokmål": "nob",
"Norsk nynorsk": "nno",
"Occitan": "oci",
"Ozbek": "uzb",
"Polski": "pol",
"Portugues": "por",
"Português": "por",
"Qhichwa": "que",
"Ri Ben Yu": "jpn",
"Romana": "ron",
"Română": "ron",
"Rumantsch": "roh",
"Russkii": "rus",
"Shqip": "sqi",
"Slovencina": "slk",
"Slovenscina": "slv",
"Slovenčina": "slk",
"Slovenščina": "slv",
"Soomaaliga": "som",
"Srpski": "srp",
"Suomi": "fin",
"Svenska": "swe",
"Taqbaylit": "kab",
"TcYi": "aka",
"Tieng Viet": "vie",
"Tiếng Việt": "vie",
"Turkce": "tur",
"Türkçe": "tur",
"Tɕɥi": "aka",
"Ukrayinska": "ukr",
"Zhong Wen": "zho",
"Zhong Wen Fan Ti": "zh-Hant",
"Zhong Wen Jian Ti": "zh-Hans",
"`bryt": "heb",
"aithy": "tha",
"baaNlaa": "ben",
"bhaasaakhmaer": "khm",
"bmaackaa": "mya",
"eesti keel": "est",
"frsy": "fas",
"gujraatii": "guj",
"hangugeo": "kor",
"hindii": "hin",
"isiXhosa": "xho",
"isiZulu": "zul",
"k`art`uli": "kat",
"knndd": "kan",
"maithilii maithilii": "mai",
"mlyaallN": "mal",
"mraatthii": "mar",
"nepaalii": "nep",
"oddiaa": "ori",
"pNjaabii": "pan",
"pStw": "pus",
"phaasaaaithy": "tha",
"rdw": "urd",
"sNskRtm": "san",
"siNhl": "sin",
"srpskokhrvatski": "hbs",
"tatarcha": "tat",
"telugu": "tel",
"tlhIngan Hol": "tlh",
"tmilll": "tam",
"tochiki": "tgk",
"yyidySH": "yid",
"zaboni tochiki": "tgk",
"Íslenska": "isl",
"Čeština": "ces",
"Ελληνικά": "ell",
"Адыгэбзэ": "ady",
"Беларуская": "bel",
"Български": "bul",
"Македонски": "mkd",
"Монгол": "mon",
"Русский": "rus",
"Српски": "srp",
"Українська": "ukr",
"забо́ни тоҷикӣ́": "tgk",
"српскохрватски": "hbs",
"татарча": "tat",
"тоҷикӣ": "tgk",
"Қазақша": "kaz",
"Հայերեն": "hye",
"ייִדיש": "yid",
"עברית": "heb",
"اردو": "urd",
"العربية": "ara",
"فارسی": "fas",
"پښتو": "pus",
"नेपाली": "nep",
"मराठी": "mar",
"मैथिली মৈথিলী": "mai",
"संस्कृतम्": "san",
"हिन्दी": "hin",
"বাংলা": "ben",
"ਪੰਜਾਬੀ": "pan",
"ગુજરાતી": "guj",
"ଓଡ଼ିଆ": "ori",
"தமிழ்": "tam",
"తెలుగు": "tel",
"ಕನ್ನಡ": "kan",
"മലയാളം": "mal",
"සිංහල": "sin",
"ภาษาไทย": "tha",
"ไทย": "tha",
"ဗမာစကာ": "mya",
"ქართული": "kat",
"ភាសាខ្មែរ": "khm",
"中文": "zho",
"中文简体": "zh-Hans",
"中文繁體": "zh-Hant",
"廣東話": "zho",
"日本語": "jpn",
"한국어": "kor"
},
"regions": {
"Latin": "419",
"Latinoamerica": "419",
"Latinoamericano": "419",
"Latinoamérica": "419"
},
"scripts": {
"Fan Ti ": "Hant",
"Jian Ti ": "Hans",
"Simplified": "Hans",
"Traditional": "Hant",
"简体": "Hans",
"繁體": "Hant"
}
}

Some files were not shown because too many files have changed in this diff Show More