mirror of https://github.com/morpheus65535/bazarr
190 lines
6.3 KiB
Python
190 lines
6.3 KiB
Python
|
import pyjsparser
|
||
|
import pyjsparser.parser
|
||
|
from . import translating_nodes
|
||
|
|
||
|
import hashlib
|
||
|
import re
|
||
|
|
||
|
# Enable Js2Py exceptions and pyimport in parser
|
||
|
pyjsparser.parser.ENABLE_PYIMPORT = True
|
||
|
|
||
|
# the re below is how we'll recognise numeric constants.
|
||
|
# it finds any 'simple numeric that is not preceded with an alphanumeric character
|
||
|
# the numeric can be a float (so a dot is found) but
|
||
|
# it does not recognise notation such as 123e5, 0xFF, infinity or NaN
|
||
|
CP_NUMERIC_RE = re.compile(r'(?<![a-zA-Z0-9_"\'])([0-9\.]+)')
|
||
|
CP_NUMERIC_PLACEHOLDER = '__PyJsNUM_%i_PyJsNUM__'
|
||
|
CP_NUMERIC_PLACEHOLDER_REVERSE_RE = re.compile(
|
||
|
CP_NUMERIC_PLACEHOLDER.replace('%i', r'([0-9\.]+)'))
|
||
|
|
||
|
# the re below is how we'll recognise string constants
|
||
|
# it finds a ' or ", then reads until the next matching ' or "
|
||
|
# this re only services simple cases, it can not be used when
|
||
|
# there are escaped quotes in the expression
|
||
|
|
||
|
#CP_STRING_1 = re.compile(r'(["\'])(.*?)\1') # this is how we'll recognise string constants
|
||
|
|
||
|
CP_STRING = '"([^\\\\"]+|\\\\([bfnrtv\'"\\\\]|[0-3]?[0-7]{1,2}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}))*"|\'([^\\\\\']+|\\\\([bfnrtv\'"\\\\]|[0-3]?[0-7]{1,2}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}))*\''
|
||
|
CP_STRING_RE = re.compile(
|
||
|
CP_STRING) # this is how we'll recognise string constants
|
||
|
CP_STRING_PLACEHOLDER = '__PyJsSTR_%i_PyJsSTR__'
|
||
|
CP_STRING_PLACEHOLDER_REVERSE_RE = re.compile(
|
||
|
CP_STRING_PLACEHOLDER.replace('%i', r'([0-9\.]+)'))
|
||
|
|
||
|
cache = {}
|
||
|
|
||
|
# This crap is still needed but I removed it for speed reasons. Have to think ofa better idea
|
||
|
# import js2py.pyjs, sys
|
||
|
# # Redefine builtin objects... Do you have a better idea?
|
||
|
# for m in list(sys.modules):
|
||
|
# if m.startswith('js2py'):
|
||
|
# del sys.modules[m]
|
||
|
# del js2py.pyjs
|
||
|
# del js2py
|
||
|
|
||
|
DEFAULT_HEADER = u'''from js2py.pyjs import *
|
||
|
# setting scope
|
||
|
var = Scope( JS_BUILTINS )
|
||
|
set_global_object(var)
|
||
|
|
||
|
# Code follows:
|
||
|
'''
|
||
|
|
||
|
|
||
|
def dbg(x):
|
||
|
"""does nothing, legacy dummy function"""
|
||
|
return ''
|
||
|
|
||
|
|
||
|
def translate_js(js, HEADER=DEFAULT_HEADER, use_compilation_plan=False):
|
||
|
"""js has to be a javascript source code.
|
||
|
returns equivalent python code."""
|
||
|
if use_compilation_plan and not '//' in js and not '/*' in js:
|
||
|
return translate_js_with_compilation_plan(js, HEADER=HEADER)
|
||
|
parser = pyjsparser.PyJsParser()
|
||
|
parsed = parser.parse(js) # js to esprima syntax tree
|
||
|
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
|
||
|
# parsed = esprima.parse(js).to_dict()
|
||
|
translating_nodes.clean_stacks()
|
||
|
return HEADER + translating_nodes.trans(
|
||
|
parsed) # syntax tree to python code
|
||
|
|
||
|
|
||
|
class match_unumerator(object):
|
||
|
"""This class ise used """
|
||
|
matchcount = -1
|
||
|
|
||
|
def __init__(self, placeholder_mask):
|
||
|
self.placeholder_mask = placeholder_mask
|
||
|
self.matches = []
|
||
|
|
||
|
def __call__(self, match):
|
||
|
self.matchcount += 1
|
||
|
self.matches.append(match.group(0))
|
||
|
return self.placeholder_mask % self.matchcount
|
||
|
|
||
|
def __repr__(self):
|
||
|
return '\n'.join(self.placeholder_mask % counter + '=' + match
|
||
|
for counter, match in enumerate(self.matches))
|
||
|
|
||
|
def wrap_up(self, output):
|
||
|
for counter, value in enumerate(self.matches):
|
||
|
output = output.replace(
|
||
|
"u'" + self.placeholder_mask % (counter) + "'", value, 1)
|
||
|
return output
|
||
|
|
||
|
|
||
|
def get_compilation_plan(js):
|
||
|
match_increaser_str = match_unumerator(CP_STRING_PLACEHOLDER)
|
||
|
compilation_plan = re.sub(CP_STRING, match_increaser_str, js)
|
||
|
|
||
|
match_increaser_num = match_unumerator(CP_NUMERIC_PLACEHOLDER)
|
||
|
compilation_plan = re.sub(CP_NUMERIC_RE, match_increaser_num,
|
||
|
compilation_plan)
|
||
|
# now put quotes, note that just patching string replaces is somewhat faster than
|
||
|
# using another re:
|
||
|
compilation_plan = compilation_plan.replace(
|
||
|
'__PyJsNUM_', '"__PyJsNUM_').replace('_PyJsNUM__', '_PyJsNUM__"')
|
||
|
compilation_plan = compilation_plan.replace(
|
||
|
'__PyJsSTR_', '"__PyJsSTR_').replace('_PyJsSTR__', '_PyJsSTR__"')
|
||
|
|
||
|
return match_increaser_str, match_increaser_num, compilation_plan
|
||
|
|
||
|
|
||
|
def translate_js_with_compilation_plan(js, HEADER=DEFAULT_HEADER):
|
||
|
"""js has to be a javascript source code.
|
||
|
returns equivalent python code.
|
||
|
|
||
|
compile plans only work with the following restrictions:
|
||
|
- only enabled for oneliner expressions
|
||
|
- when there are comments in the js code string substitution is disabled
|
||
|
- when there nested escaped quotes string substitution is disabled, so
|
||
|
|
||
|
cacheable:
|
||
|
Q1 == 1 && name == 'harry'
|
||
|
|
||
|
not cacheable:
|
||
|
Q1 == 1 && name == 'harry' // some comment
|
||
|
|
||
|
not cacheable:
|
||
|
Q1 == 1 && name == 'o\'Reilly'
|
||
|
|
||
|
not cacheable:
|
||
|
Q1 == 1 && name /* some comment */ == 'o\'Reilly'
|
||
|
"""
|
||
|
|
||
|
match_increaser_str, match_increaser_num, compilation_plan = get_compilation_plan(
|
||
|
js)
|
||
|
|
||
|
cp_hash = hashlib.md5(compilation_plan.encode('utf-8')).digest()
|
||
|
try:
|
||
|
python_code = cache[cp_hash]['proto_python_code']
|
||
|
except:
|
||
|
parser = pyjsparser.PyJsParser()
|
||
|
parsed = parser.parse(compilation_plan) # js to esprima syntax tree
|
||
|
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
|
||
|
# parsed = esprima.parse(js).to_dict()
|
||
|
translating_nodes.clean_stacks()
|
||
|
python_code = translating_nodes.trans(
|
||
|
parsed) # syntax tree to python code
|
||
|
cache[cp_hash] = {
|
||
|
'compilation_plan': compilation_plan,
|
||
|
'proto_python_code': python_code,
|
||
|
}
|
||
|
|
||
|
python_code = match_increaser_str.wrap_up(python_code)
|
||
|
python_code = match_increaser_num.wrap_up(python_code)
|
||
|
|
||
|
return HEADER + python_code
|
||
|
|
||
|
|
||
|
def trasnlate(js, HEADER=DEFAULT_HEADER):
|
||
|
"""js has to be a javascript source code.
|
||
|
returns equivalent python code.
|
||
|
|
||
|
Equivalent to translate_js"""
|
||
|
return translate_js(js, HEADER)
|
||
|
|
||
|
|
||
|
syntax_tree_translate = translating_nodes.trans
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
PROFILE = False
|
||
|
import js2py
|
||
|
import codecs
|
||
|
|
||
|
def main():
|
||
|
with codecs.open("esprima.js", "r", "utf-8") as f:
|
||
|
d = f.read()
|
||
|
r = js2py.translate_js(d)
|
||
|
|
||
|
with open('res.py', 'wb') as f2:
|
||
|
f2.write(r)
|
||
|
exec (r, {})
|
||
|
|
||
|
if PROFILE:
|
||
|
import cProfile
|
||
|
cProfile.run('main()', sort='tottime')
|
||
|
else:
|
||
|
main()
|