mirror of
https://github.com/morpheus65535/bazarr
synced 2024-12-25 09:12:38 +00:00
512 lines
19 KiB
Python
512 lines
19 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
from datetime import datetime
|
|
from operator import itemgetter
|
|
from pyga import utils
|
|
from pyga import exceptions
|
|
try:
|
|
from urlparse import urlparse
|
|
from urllib import unquote_plus
|
|
except ImportError as e:
|
|
from urllib.parse import urlparse
|
|
from urllib.parse import unquote_plus
|
|
|
|
|
|
__author__ = "Arun KR (kra3) <the1.arun@gmail.com>"
|
|
__license__ = "Simplified BSD"
|
|
|
|
|
|
class Campaign(object):
|
|
'''
|
|
A representation of Campaign
|
|
|
|
Properties:
|
|
_type -- See TYPE_* constants, will be mapped to "__utmz" parameter.
|
|
creation_time -- Time of the creation of this campaign, will be mapped to "__utmz" parameter.
|
|
response_count -- Response Count, will be mapped to "__utmz" parameter.
|
|
Is also used to determine whether the campaign is new or repeated,
|
|
which will be mapped to "utmcn" and "utmcr" parameters.
|
|
id -- Campaign ID, a.k.a. "utm_id" query parameter for ga.js
|
|
Will be mapped to "__utmz" parameter.
|
|
source -- Source, a.k.a. "utm_source" query parameter for ga.js.
|
|
Will be mapped to "utmcsr" key in "__utmz" parameter.
|
|
g_click_id -- Google AdWords Click ID, a.k.a. "gclid" query parameter for ga.js.
|
|
Will be mapped to "utmgclid" key in "__utmz" parameter.
|
|
d_click_id -- DoubleClick (?) Click ID. Will be mapped to "utmdclid" key in "__utmz" parameter.
|
|
name -- Name, a.k.a. "utm_campaign" query parameter for ga.js.
|
|
Will be mapped to "utmccn" key in "__utmz" parameter.
|
|
medium -- Medium, a.k.a. "utm_medium" query parameter for ga.js.
|
|
Will be mapped to "utmcmd" key in "__utmz" parameter.
|
|
term -- Terms/Keywords, a.k.a. "utm_term" query parameter for ga.js.
|
|
Will be mapped to "utmctr" key in "__utmz" parameter.
|
|
content -- Ad Content Description, a.k.a. "utm_content" query parameter for ga.js.
|
|
Will be mapped to "utmcct" key in "__utmz" parameter.
|
|
|
|
'''
|
|
|
|
TYPE_DIRECT = 'direct'
|
|
TYPE_ORGANIC = 'organic'
|
|
TYPE_REFERRAL = 'referral'
|
|
|
|
CAMPAIGN_DELIMITER = '|'
|
|
|
|
UTMZ_PARAM_MAP = {
|
|
'utmcid': 'id',
|
|
'utmcsr': 'source',
|
|
'utmgclid': 'g_click_id',
|
|
'utmdclid': 'd_click_id',
|
|
'utmccn': 'name',
|
|
'utmcmd': 'medium',
|
|
'utmctr': 'term',
|
|
'utmcct': 'content',
|
|
}
|
|
|
|
def __init__(self, typ):
|
|
self._type = None
|
|
self.creation_time = None
|
|
self.response_count = 0
|
|
self.id = None
|
|
self.source = None
|
|
self.g_click_id = None
|
|
self.d_click_id = None
|
|
self.name = None
|
|
self.medium = None
|
|
self.term = None
|
|
self.content = None
|
|
|
|
if typ:
|
|
if typ not in ('direct', 'organic', 'referral'):
|
|
raise ValueError('Campaign type has to be one of the Campaign::TYPE_* constant values.')
|
|
|
|
self._type = typ
|
|
if typ == Campaign.TYPE_DIRECT:
|
|
self.name = '(direct)'
|
|
self.source = '(direct)'
|
|
self.medium = '(none)'
|
|
elif typ == Campaign.TYPE_REFERRAL:
|
|
self.name = '(referral)'
|
|
self.medium = 'referral'
|
|
elif typ == Campaign.TYPE_ORGANIC:
|
|
self.name = '(organic)'
|
|
self.medium = 'organic'
|
|
else:
|
|
self._type = None
|
|
|
|
self.creation_time = datetime.utcnow()
|
|
|
|
def validate(self):
|
|
if not self.source:
|
|
raise exceptions.ValidationError('Campaigns need to have at least the "source" attribute defined.')
|
|
|
|
@staticmethod
|
|
def create_from_referrer(url):
|
|
obj = Campaign(Campaign.TYPE_REFERRAL)
|
|
parse_rslt = urlparse(url)
|
|
obj.source = parse_rslt.netloc
|
|
obj.content = parse_rslt.path
|
|
return obj
|
|
|
|
def extract_from_utmz(self, utmz):
|
|
parts = utmz.split('.', 4)
|
|
|
|
if len(parts) != 5:
|
|
raise ValueError('The given "__utmz" cookie value is invalid.')
|
|
|
|
self.creation_time = utils.convert_ga_timestamp(parts[1])
|
|
self.response_count = int(parts[3])
|
|
params = parts[4].split(Campaign.CAMPAIGN_DELIMITER)
|
|
|
|
for param in params:
|
|
key, val = param.split('=')
|
|
|
|
try:
|
|
setattr(self, self.UTMZ_PARAM_MAP[key], unquote_plus(val))
|
|
except KeyError:
|
|
continue
|
|
|
|
return self
|
|
|
|
|
|
class CustomVariable(object):
|
|
'''
|
|
Represent a Custom Variable
|
|
|
|
Properties:
|
|
index -- Is the slot, you have 5 slots
|
|
name -- Name given to custom variable
|
|
value -- Value for the variable
|
|
scope -- Scope can be any one of 1, 2 or 3.
|
|
|
|
WATCH OUT: It's a known issue that GA will not decode URL-encoded
|
|
characters in custom variable names and values properly, so spaces
|
|
will show up as "%20" in the interface etc. (applicable to name & value)
|
|
http://www.google.com/support/forum/p/Google%20Analytics/thread?tid=2cdb3ec0be32e078
|
|
|
|
'''
|
|
|
|
SCOPE_VISITOR = 1
|
|
SCOPE_SESSION = 2
|
|
SCOPE_PAGE = 3
|
|
|
|
def __init__(self, index=None, name=None, value=None, scope=3):
|
|
self.index = index
|
|
self.name = name
|
|
self.value = value
|
|
self.scope = CustomVariable.SCOPE_PAGE
|
|
if scope:
|
|
self.scope = scope
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == 'scope':
|
|
if value and value not in range(1, 4):
|
|
raise ValueError('Custom Variable scope has to be one of the 1,2 or 3')
|
|
|
|
if name == 'index':
|
|
# Custom Variables are limited to five slots officially, but there seems to be a
|
|
# trick to allow for more of them which we could investigate at a later time (see
|
|
# http://analyticsimpact.com/2010/05/24/get-more-than-5-custom-variables-in-google-analytics/
|
|
if value and (value < 0 or value > 5):
|
|
raise ValueError('Custom Variable index has to be between 1 and 5.')
|
|
|
|
object.__setattr__(self, name, value)
|
|
|
|
def validate(self):
|
|
'''
|
|
According to the GA documentation, there is a limit to the combined size of
|
|
name and value of 64 bytes after URL encoding,
|
|
see http://code.google.com/apis/analytics/docs/tracking/gaTrackingCustomVariables.html#varTypes
|
|
and http://xahlee.org/js/google_analytics_tracker_2010-07-01_expanded.js line 563
|
|
This limit was increased to 128 bytes BEFORE encoding with the 2012-01 release of ga.js however,
|
|
see http://code.google.com/apis/analytics/community/gajs_changelog.html
|
|
'''
|
|
if len('%s%s' % (self.name, self.value)) > 128:
|
|
raise exceptions.ValidationError('Custom Variable combined name and value length must not be larger than 128 bytes.')
|
|
|
|
|
|
class Event(object):
|
|
'''
|
|
Represents an Event
|
|
https://developers.google.com/analytics/devguides/collection/gajs/eventTrackerGuide
|
|
|
|
Properties:
|
|
category -- The general event category
|
|
action -- The action for the event
|
|
label -- An optional descriptor for the event
|
|
value -- An optional value associated with the event. You can see your
|
|
event values in the Overview, Categories, and Actions reports,
|
|
where they are listed by event or aggregated across events,
|
|
depending upon your report view.
|
|
noninteraction -- By default, event hits will impact a visitor's bounce rate.
|
|
By setting this parameter to true, this event hit
|
|
will not be used in bounce rate calculations.
|
|
(default False)
|
|
'''
|
|
|
|
def __init__(self, category=None, action=None, label=None, value=None, noninteraction=False):
|
|
self.category = category
|
|
self.action = action
|
|
self.label = label
|
|
self.value = value
|
|
self.noninteraction = bool(noninteraction)
|
|
|
|
if self.noninteraction and not self.value:
|
|
self.value = 0
|
|
|
|
def validate(self):
|
|
if not(self.category and self.action):
|
|
raise exceptions.ValidationError('Events, at least need to have a category and action defined.')
|
|
|
|
|
|
class Item(object):
|
|
'''
|
|
Represents an Item in Transaction
|
|
|
|
Properties:
|
|
order_id -- Order ID, will be mapped to "utmtid" parameter
|
|
sku -- Product Code. This is the sku code for a given product, will be mapped to "utmipc" parameter
|
|
name -- Product Name, will be mapped to "utmipn" parameter
|
|
variation -- Variations on an item, will be mapped to "utmiva" parameter
|
|
price -- Unit Price. Value is set to numbers only, will be mapped to "utmipr" parameter
|
|
quantity -- Unit Quantity, will be mapped to "utmiqt" parameter
|
|
|
|
'''
|
|
|
|
def __init__(self):
|
|
self.order_id = None
|
|
self.sku = None
|
|
self.name = None
|
|
self.variation = None
|
|
self.price = None
|
|
self.quantity = 1
|
|
|
|
def validate(self):
|
|
if not self.sku:
|
|
raise exceptions.ValidationError('sku/product is a required parameter')
|
|
|
|
|
|
class Page(object):
|
|
'''
|
|
Contains all parameters needed for tracking a page
|
|
|
|
Properties:
|
|
path -- Page request URI, will be mapped to "utmp" parameter
|
|
title -- Page title, will be mapped to "utmdt" parameter
|
|
charset -- Charset encoding, will be mapped to "utmcs" parameter
|
|
referrer -- Referer URL, will be mapped to "utmr" parameter
|
|
load_time -- Page load time in milliseconds, will be encoded into "utme" parameter.
|
|
|
|
'''
|
|
REFERRER_INTERNAL = '0'
|
|
|
|
def __init__(self, path):
|
|
self.path = None
|
|
self.title = None
|
|
self.charset = None
|
|
self.referrer = None
|
|
self.load_time = None
|
|
|
|
if path:
|
|
self.path = path
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == 'path':
|
|
if value and value != '':
|
|
if value[0] != '/':
|
|
raise ValueError('The page path should always start with a slash ("/").')
|
|
elif name == 'load_time':
|
|
if value and not isinstance(value, int):
|
|
raise ValueError('Page load time must be specified in integer milliseconds.')
|
|
|
|
object.__setattr__(self, name, value)
|
|
|
|
|
|
class Session(object):
|
|
'''
|
|
You should serialize this object and store it in the user session to keep it
|
|
persistent between requests (similar to the "__umtb" cookie of the GA Javascript client).
|
|
|
|
Properties:
|
|
session_id -- A unique per-session ID, will be mapped to "utmhid" parameter
|
|
track_count -- The amount of pageviews that were tracked within this session so far,
|
|
will be part of the "__utmb" cookie parameter.
|
|
Will get incremented automatically upon each request
|
|
start_time -- Timestamp of the start of this new session, will be part of the "__utmb" cookie parameter
|
|
|
|
'''
|
|
def __init__(self):
|
|
self.session_id = utils.get_32bit_random_num()
|
|
self.track_count = 0
|
|
self.start_time = datetime.utcnow()
|
|
|
|
@staticmethod
|
|
def generate_session_id():
|
|
return utils.get_32bit_random_num()
|
|
|
|
def extract_from_utmb(self, utmb):
|
|
'''
|
|
Will extract information for the "trackCount" and "startTime"
|
|
properties from the given "__utmb" cookie value.
|
|
'''
|
|
parts = utmb.split('.')
|
|
if len(parts) != 4:
|
|
raise ValueError('The given "__utmb" cookie value is invalid.')
|
|
|
|
self.track_count = int(parts[1])
|
|
self.start_time = utils.convert_ga_timestamp(parts[3])
|
|
|
|
return self
|
|
|
|
|
|
class SocialInteraction(object):
|
|
'''
|
|
|
|
Properties:
|
|
action -- Required. A string representing the social action being tracked,
|
|
will be mapped to "utmsa" parameter
|
|
network -- Required. A string representing the social network being tracked,
|
|
will be mapped to "utmsn" parameter
|
|
target -- Optional. A string representing the URL (or resource) which receives the action.
|
|
|
|
'''
|
|
|
|
def __init__(self, action=None, network=None, target=None):
|
|
self.action = action
|
|
self.network = network
|
|
self.target = target
|
|
|
|
def validate(self):
|
|
if not(self.action and self.network):
|
|
raise exceptions.ValidationError('Social interactions need to have at least the "network" and "action" attributes defined.')
|
|
|
|
|
|
class Transaction(object):
|
|
'''
|
|
Represents parameters for a Transaction call
|
|
|
|
Properties:
|
|
order_id -- Order ID, will be mapped to "utmtid" parameter
|
|
affiliation -- Affiliation, Will be mapped to "utmtst" parameter
|
|
total -- Total Cost, will be mapped to "utmtto" parameter
|
|
tax -- Tax Cost, will be mapped to "utmttx" parameter
|
|
shipping -- Shipping Cost, values as for unit and price, will be mapped to "utmtsp" parameter
|
|
city -- Billing City, will be mapped to "utmtci" parameter
|
|
state -- Billing Region, will be mapped to "utmtrg" parameter
|
|
country -- Billing Country, will be mapped to "utmtco" parameter
|
|
items -- @entity.Items in a transaction
|
|
|
|
'''
|
|
def __init__(self):
|
|
self.items = []
|
|
self.order_id = None
|
|
self.affiliation = None
|
|
self.total = None
|
|
self.tax = None
|
|
self.shipping = None
|
|
self.city = None
|
|
self.state = None
|
|
self.country = None
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == 'order_id':
|
|
for itm in self.items:
|
|
itm.order_id = value
|
|
object.__setattr__(self, name, value)
|
|
|
|
def validate(self):
|
|
if len(self.items) == 0:
|
|
raise exceptions.ValidationError('Transaction need to consist of at least one item')
|
|
|
|
def add_item(self, item):
|
|
''' item of type entities.Item '''
|
|
if isinstance(item, Item):
|
|
item.order_id = self.order_id
|
|
self.items.append(item)
|
|
|
|
|
|
class Visitor(object):
|
|
'''
|
|
You should serialize this object and store it in the user database to keep it
|
|
persistent for the same user permanently (similar to the "__umta" cookie of
|
|
the GA Javascript client).
|
|
|
|
Properties:
|
|
unique_id -- Unique user ID, will be part of the "__utma" cookie parameter
|
|
first_visit_time -- Time of the very first visit of this user, will be part of the "__utma" cookie parameter
|
|
previous_visit_time -- Time of the previous visit of this user, will be part of the "__utma" cookie parameter
|
|
current_visit_time -- Time of the current visit of this user, will be part of the "__utma" cookie parameter
|
|
visit_count -- Amount of total visits by this user, will be part of the "__utma" cookie parameter
|
|
ip_address -- IP Address of the end user, will be mapped to "utmip" parameter and "X-Forwarded-For" request header
|
|
user_agent -- User agent string of the end user, will be mapped to "User-Agent" request header
|
|
locale -- Locale string (country part optional) will be mapped to "utmul" parameter
|
|
flash_version -- Visitor's Flash version, will be maped to "utmfl" parameter
|
|
java_enabled -- Visitor's Java support, will be mapped to "utmje" parameter
|
|
screen_colour_depth -- Visitor's screen color depth, will be mapped to "utmsc" parameter
|
|
screen_resolution -- Visitor's screen resolution, will be mapped to "utmsr" parameter
|
|
'''
|
|
def __init__(self):
|
|
now = datetime.utcnow()
|
|
|
|
self.unique_id = None
|
|
self.first_visit_time = now
|
|
self.previous_visit_time = now
|
|
self.current_visit_time = now
|
|
self.visit_count = 1
|
|
self.ip_address = None
|
|
self.user_agent = None
|
|
self.locale = None
|
|
self.flash_version = None
|
|
self.java_enabled = None
|
|
self.screen_colour_depth = None
|
|
self.screen_resolution = None
|
|
|
|
def __setattr__(self, name, value):
|
|
if name == 'unique_id':
|
|
if value and (value < 0 or value > 0x7fffffff):
|
|
raise ValueError('Visitor unique ID has to be a 32-bit integer between 0 and 0x7fffffff')
|
|
object.__setattr__(self, name, value)
|
|
|
|
def __getattribute__(self, name):
|
|
if name == 'unique_id':
|
|
tmp = object.__getattribute__(self, name)
|
|
if tmp is None:
|
|
self.unique_id = self.generate_unique_id()
|
|
return object.__getattribute__(self, name)
|
|
|
|
def __getstate__(self):
|
|
state = self.__dict__
|
|
if state.get('user_agent') is None:
|
|
state['unique_id'] = self.generate_unique_id()
|
|
|
|
return state
|
|
|
|
def extract_from_utma(self, utma):
|
|
'''
|
|
Will extract information for the "unique_id", "first_visit_time", "previous_visit_time",
|
|
"current_visit_time" and "visit_count" properties from the given "__utma" cookie value.
|
|
'''
|
|
parts = utma.split('.')
|
|
if len(parts) != 6:
|
|
raise ValueError('The given "__utma" cookie value is invalid.')
|
|
|
|
self.unique_id = int(parts[1])
|
|
self.first_visit_time = utils.convert_ga_timestamp(parts[2])
|
|
self.previous_visit_time = utils.convert_ga_timestamp(parts[3])
|
|
self.current_visit_time = utils.convert_ga_timestamp(parts[4])
|
|
self.visit_count = int(parts[5])
|
|
|
|
return self
|
|
|
|
def extract_from_server_meta(self, meta):
|
|
'''
|
|
Will extract information for the "ip_address", "user_agent" and "locale"
|
|
properties from the given WSGI REQUEST META variable or equivalent.
|
|
'''
|
|
if 'REMOTE_ADDR' in meta and meta['REMOTE_ADDR']:
|
|
ip = None
|
|
for key in ('HTTP_X_FORWARDED_FOR', 'REMOTE_ADDR'):
|
|
if key in meta and not ip:
|
|
ips = meta.get(key, '').split(',')
|
|
ip = ips[-1].strip()
|
|
if not utils.is_valid_ip(ip):
|
|
ip = ''
|
|
if utils.is_private_ip(ip):
|
|
ip = ''
|
|
if ip:
|
|
self.ip_address = ip
|
|
|
|
if 'HTTP_USER_AGENT' in meta and meta['HTTP_USER_AGENT']:
|
|
self.user_agent = meta['HTTP_USER_AGENT']
|
|
|
|
if 'HTTP_ACCEPT_LANGUAGE' in meta and meta['HTTP_ACCEPT_LANGUAGE']:
|
|
user_locals = []
|
|
matched_locales = utils.validate_locale(meta['HTTP_ACCEPT_LANGUAGE'])
|
|
if matched_locales:
|
|
lang_lst = map((lambda x: x.replace('-', '_')), (i[1] for i in matched_locales))
|
|
quality_lst = map((lambda x: x and x or 1), (float(i[4] and i[4] or '0') for i in matched_locales))
|
|
lang_quality_map = map((lambda x, y: (x, y)), lang_lst, quality_lst)
|
|
user_locals = [x[0] for x in sorted(lang_quality_map, key=itemgetter(1), reverse=True)]
|
|
|
|
if user_locals:
|
|
self.locale = user_locals[0]
|
|
|
|
return self
|
|
|
|
def generate_hash(self):
|
|
'''Generates a hashed value from user-specific properties.'''
|
|
tmpstr = "%s%s%s" % (self.user_agent, self.screen_resolution, self.screen_colour_depth)
|
|
return utils.generate_hash(tmpstr)
|
|
|
|
def generate_unique_id(self):
|
|
'''Generates a unique user ID from the current user-specific properties.'''
|
|
return ((utils.get_32bit_random_num() ^ self.generate_hash()) & 0x7fffffff)
|
|
|
|
def add_session(self, session):
|
|
'''
|
|
Updates the "previousVisitTime", "currentVisitTime" and "visitCount"
|
|
fields based on the given session object.
|
|
'''
|
|
start_time = session.start_time
|
|
if start_time != self.current_visit_time:
|
|
self.previous_visit_time = self.current_visit_time
|
|
self.current_visit_time = start_time
|
|
self.visit_count = self.visit_count + 1
|