bazarr/libs/pyga/utils.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from random import randint
import re
import sys
from datetime import datetime

try:
    from urllib import quote
except ImportError as e:
    from urllib.parse import quote

if sys.version_info < (3,):
    text_type = unicode
else:
    text_type = str


__author__ = "Arun KR (kra3) <the1.arun@gmail.com>"
__license__ = "Simplified BSD"

RE_IP = re.compile(r'^[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}$', re.I)
RE_PRIV_IP = re.compile(r'^(?:127\.0\.0\.1|10\.|192\.168\.|172\.(?:1[6-9]|2[0-9]|3[0-1])\.)')
RE_LOCALE = re.compile(r'(^|\s*,\s*)([a-zA-Z]{1,8}(-[a-zA-Z]{1,8})*)\s*(;\s*q\s*=\s*(1(\.0{0,3})?|0(\.[0-9]{0,3})))?', re.I)
RE_GA_ACCOUNT_ID = re.compile(r'^(UA|MO)-[0-9]*-[0-9]*$')
RE_FIRST_THREE_OCTETS_OF_IP = re.compile(r'^((\d{1,3}\.){3})\d{1,3}$')

def convert_ga_timestamp(timestamp_string):
    timestamp = float(timestamp_string)
    if timestamp > ((2 ** 31) - 1):
        timestamp /= 1000
    return datetime.utcfromtimestamp(timestamp)

def get_32bit_random_num():
    return randint(0, 0x7fffffff)

def is_valid_ip(ip):
    return True if RE_IP.match(str(ip)) else False

def is_private_ip(ip):
    return True if RE_PRIV_IP.match(str(ip)) else False

def validate_locale(locale):
    return RE_LOCALE.findall(str(locale))

def is_valid_google_account(account):
    return True if RE_GA_ACCOUNT_ID.match(str(account)) else False

def generate_hash(tmpstr):
    hash_val = 1

    if tmpstr:
        hash_val = 0
        for ordinal in map(ord, tmpstr[::-1]):
            hash_val = ((hash_val << 6) & 0xfffffff) + ordinal + (ordinal << 14)
            left_most_7 = hash_val & 0xfe00000
            if left_most_7 != 0:
                hash_val ^= left_most_7 >> 21

    return hash_val

def anonymize_ip(ip):
    if ip:
        match = RE_FIRST_THREE_OCTETS_OF_IP.findall(str(ip))
        if match:
            return '%s%s' % (match[0][0], '0')

    return ''

def encode_uri_components(value):
    '''Mimics Javascript's encodeURIComponent() function for consistency with the GA Javascript client.'''
    return convert_to_uri_component_encoding(quote(value))

def convert_to_uri_component_encoding(value):
    return value.replace('%21', '!').replace('%2A', '*').replace('%27', "'").replace('%28', '(').replace('%29', ')')

# Taken from expicient.com BJs repo.
def stringify(s, stype=None, fn=None):
    ''' Converts elements of a complex data structure to strings

    The data structure can be a multi-tiered one - with tuples and lists etc
    This method will loop through each and convert everything to string.
    For example - it can be -
    [[{'a1': {'a2': {'a3': ('a4', timedelta(0, 563)), 'a5': {'a6': datetime()}}}}]]
    which will be converted to -
    [[{'a1': {'a2': {'a3': ('a4', '0:09:23'), 'a5': {'a6': '2009-05-27 16:19:52.401500' }}}}]]

    @param stype: If only one type of data element needs to be converted to
        string without affecting others, stype can be used.
        In the earlier example, if it is called with stringify(s, stype=datetime.timedelta)
        the result would be
        [[{'a1': {'a2': {'a3': ('a4', '0:09:23'), 'a5': {'a6': datetime() }}}}]]

    Also, even though the name is stringify, any function can be run on it, based on
    parameter fn. If fn is None, it will be stringified.

    '''

    if type(s) in [list, set, dict, tuple]:
        if isinstance(s, dict):
            for k in s:
                s[k] = stringify(s[k], stype, fn)
        elif type(s) in [list, set]:
            for i, k in enumerate(s):
                s[i] = stringify(k, stype, fn)
        else: #tuple
            tmp = []
            for k in s:
                tmp.append(stringify(k, stype, fn))
            s = tuple(tmp)
    else:
        if fn:
            if not stype or (stype == type(s)):
                return fn(s)
        else:
            # To do str(s). But, str() can fail on unicode. So, use .encode instead
            if not stype or (stype == type(s)):
                try:
                    return text_type(s)
                    #return s.encode('ascii', 'replace')
                except AttributeError:
                    return str(s)
                except UnicodeDecodeError:
                    return s.decode('ascii', 'replace')
    return s
update deps 2018-10-31 16:08:29 +00:00			`# -- coding: utf-8 --`
Google Analytics implementation to get usage data in order to better understand how Bazarr is used. 2019-09-03 03:22:38 +00:00			`from __future__ import unicode_literals`
update deps 2018-10-31 16:08:29 +00:00			`from random import randint`
			`import re`
Google Analytics implementation to get usage data in order to better understand how Bazarr is used. 2019-09-03 03:22:38 +00:00			`import sys`
update deps 2018-10-31 16:08:29 +00:00			`from datetime import datetime`

Google Analytics implementation to get usage data in order to better understand how Bazarr is used. 2019-09-03 03:22:38 +00:00			`try:`
			`from urllib import quote`
			`except ImportError as e:`
			`from urllib.parse import quote`

			`if sys.version_info < (3,):`
			`text_type = unicode`
			`else:`
			`text_type = str`


update deps 2018-10-31 16:08:29 +00:00			`__author__ = "Arun KR (kra3) <the1.arun@gmail.com>"`
			`__license__ = "Simplified BSD"`

			`RE_IP = re.compile(r'^[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}$', re.I)`
			`RE_PRIV_IP = re.compile(r'^(?:127\.0\.0\.1\|10\.\|192\.168\.\|172\.(?:1[6-9]\|2[0-9]\|3[0-1])\.)')`
			`RE_LOCALE = re.compile(r'(^\|\s,\s)([a-zA-Z]{1,8}(-[a-zA-Z]{1,8}))\s(;\sq\s=\s*(1(\.0{0,3})?\|0(\.[0-9]{0,3})))?', re.I)`
			`RE_GA_ACCOUNT_ID = re.compile(r'^(UA\|MO)-[0-9]-[0-9]$')`
			`RE_FIRST_THREE_OCTETS_OF_IP = re.compile(r'^((\d{1,3}\.){3})\d{1,3}$')`

			`def convert_ga_timestamp(timestamp_string):`
			`timestamp = float(timestamp_string)`
			`if timestamp > ((2 ** 31) - 1):`
			`timestamp /= 1000`
			`return datetime.utcfromtimestamp(timestamp)`

			`def get_32bit_random_num():`
			`return randint(0, 0x7fffffff)`

			`def is_valid_ip(ip):`
			`return True if RE_IP.match(str(ip)) else False`

			`def is_private_ip(ip):`
			`return True if RE_PRIV_IP.match(str(ip)) else False`

			`def validate_locale(locale):`
			`return RE_LOCALE.findall(str(locale))`

			`def is_valid_google_account(account):`
			`return True if RE_GA_ACCOUNT_ID.match(str(account)) else False`

			`def generate_hash(tmpstr):`
			`hash_val = 1`

			`if tmpstr:`
			`hash_val = 0`
			`for ordinal in map(ord, tmpstr[::-1]):`
			`hash_val = ((hash_val << 6) & 0xfffffff) + ordinal + (ordinal << 14)`
			`left_most_7 = hash_val & 0xfe00000`
			`if left_most_7 != 0:`
			`hash_val ^= left_most_7 >> 21`

			`return hash_val`

			`def anonymize_ip(ip):`
			`if ip:`
			`match = RE_FIRST_THREE_OCTETS_OF_IP.findall(str(ip))`
			`if match:`
			`return '%s%s' % (match[0][0], '0')`

			`return ''`

			`def encode_uri_components(value):`
			`'''Mimics Javascript's encodeURIComponent() function for consistency with the GA Javascript client.'''`
Google Analytics implementation to get usage data in order to better understand how Bazarr is used. 2019-09-03 03:22:38 +00:00			`return convert_to_uri_component_encoding(quote(value))`
update deps 2018-10-31 16:08:29 +00:00
			`def convert_to_uri_component_encoding(value):`
			`return value.replace('%21', '!').replace('%2A', '*').replace('%27', "'").replace('%28', '(').replace('%29', ')')`

			`# Taken from expicient.com BJs repo.`
			`def stringify(s, stype=None, fn=None):`
			`''' Converts elements of a complex data structure to strings`

			`The data structure can be a multi-tiered one - with tuples and lists etc`
			`This method will loop through each and convert everything to string.`
			`For example - it can be -`
			`[[{'a1': {'a2': {'a3': ('a4', timedelta(0, 563)), 'a5': {'a6': datetime()}}}}]]`
			`which will be converted to -`
			`[[{'a1': {'a2': {'a3': ('a4', '0:09:23'), 'a5': {'a6': '2009-05-27 16:19:52.401500' }}}}]]`

			`@param stype: If only one type of data element needs to be converted to`
			`string without affecting others, stype can be used.`
			`In the earlier example, if it is called with stringify(s, stype=datetime.timedelta)`
			`the result would be`
			`[[{'a1': {'a2': {'a3': ('a4', '0:09:23'), 'a5': {'a6': datetime() }}}}]]`

			`Also, even though the name is stringify, any function can be run on it, based on`
			`parameter fn. If fn is None, it will be stringified.`

			`'''`

			`if type(s) in [list, set, dict, tuple]:`
			`if isinstance(s, dict):`
			`for k in s:`
			`s[k] = stringify(s[k], stype, fn)`
			`elif type(s) in [list, set]:`
			`for i, k in enumerate(s):`
			`s[i] = stringify(k, stype, fn)`
			`else: #tuple`
			`tmp = []`
			`for k in s:`
			`tmp.append(stringify(k, stype, fn))`
			`s = tuple(tmp)`
			`else:`
			`if fn:`
			`if not stype or (stype == type(s)):`
			`return fn(s)`
			`else:`
			`# To do str(s). But, str() can fail on unicode. So, use .encode instead`
			`if not stype or (stype == type(s)):`
			`try:`
Google Analytics implementation to get usage data in order to better understand how Bazarr is used. 2019-09-03 03:22:38 +00:00			`return text_type(s)`
update deps 2018-10-31 16:08:29 +00:00			`#return s.encode('ascii', 'replace')`
			`except AttributeError:`
			`return str(s)`
			`except UnicodeDecodeError:`
			`return s.decode('ascii', 'replace')`
			`return s`