bazarr/libs/js2py/legecy_translators/jsparser.py

328 lines
9.8 KiB
Python

"""
The process of translating JS will go like that: # TOP = 'imports and scope set'
1. Remove all the comments
2. Replace number, string and regexp literals with markers
4. Remove global Functions and move their translation to the TOP. Also add register code there.
5. Replace inline functions with lvals
6. Remove List and Object literals and replace them with lvals
7. Find and remove var declarations, generate python register code that would go on TOP.
Here we should be left with global code only where 1 line of js code = 1 line of python code.
Routine translating this code should be called glob_translate:
1. Search for outer structures and translate them using glob and inside using exps_translate
exps_translate routine:
1. Remove outer {}
2. Split lines at ;
3. Convert line by line using exp_translate
4. In case of error in 3 try to insert ; according to ECMA rules and repeat 3.
exp_translate routine:
It takes a single line of JS code and returns a SINGLE line of Python code.
Note var is not present here because it was removed in previous stages.
If case of parsing errors it must return a pos of error.
1. Convert all assignment operations to put operations, this may be hard :(
2. Convert all gets and calls to get and callprop.
3. Convert unary operators like typeof, new, !, delete.
Delete can be handled by replacing last get method with delete.
4. Convert remaining operators that are not handled by python eg: === and ,
lval format PyJsLvalNR
marker PyJs(TYPE_NAME)(NR)
TODO
1. Number literal replacement
2. Array literal replacement
3. Object literal replacement
5. Function replacement
4. Literal replacement translators
"""
from __future__ import print_function
from utils import *
OP_METHODS = {
'*': '__mul__',
'/': '__div__',
'%': '__mod__',
'+': '__add__',
'-': '__sub__',
'<<': '__lshift__',
'>>': '__rshift__',
'&': '__and__',
'^': '__xor__',
'|': '__or__'
}
def dbg(source):
try:
with open(r'C:\Users\Piotrek\Desktop\dbg.py', 'w') as f:
f.write(source)
except:
pass
def indent(lines, ind=4):
return ind * ' ' + lines.replace('\n', '\n' + ind * ' ').rstrip(' ')
def inject_before_lval(source, lval, code):
if source.count(lval) > 1:
dbg(source)
print()
print(lval)
raise RuntimeError('To many lvals (%s)' % lval)
elif not source.count(lval):
dbg(source)
print()
print(lval)
assert lval not in source
raise RuntimeError('No lval found "%s"' % lval)
end = source.index(lval)
inj = source.rfind('\n', 0, end)
ind = inj
while source[ind + 1] == ' ':
ind += 1
ind -= inj
return source[:inj + 1] + indent(code, ind) + source[inj + 1:]
def bracket_split(source, brackets=('()', '{}', '[]'), strip=False):
"""DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True)"""
starts = [e[0] for e in brackets]
in_bracket = 0
n = 0
last = 0
while n < len(source):
e = source[n]
if not in_bracket and e in starts:
in_bracket = 1
start = n
b_start, b_end = brackets[starts.index(e)]
elif in_bracket:
if e == b_start:
in_bracket += 1
elif e == b_end:
in_bracket -= 1
if not in_bracket:
if source[last:start]:
yield source[last:start]
last = n + 1
yield source[start + strip:n + 1 - strip]
n += 1
if source[last:]:
yield source[last:]
def pass_bracket(source, start, bracket='()'):
"""Returns content of brackets with brackets and first pos after brackets
if source[start] is followed by some optional white space and brackets.
Otherwise None"""
e = bracket_split(source[start:], [bracket], False)
try:
cand = e.next()
except StopIteration:
return None, None
if not cand.strip(): #white space...
try:
res = e.next()
return res, start + len(cand) + len(res)
except StopIteration:
return None, None
elif cand[-1] == bracket[1]:
return cand, start + len(cand)
else:
return None, None
def startswith_keyword(start, keyword):
start = start.lstrip()
if start.startswith(keyword):
if len(keyword) < len(start):
if start[len(keyword)] in IDENTIFIER_PART:
return False
return True
return False
def endswith_keyword(ending, keyword):
ending = ending.rstrip()
if ending.endswith(keyword):
if len(keyword) < len(ending):
if ending[len(ending) - len(keyword) - 1] in IDENTIFIER_PART:
return False
return True
return False
def pass_white(source, start):
n = start
while n < len(source):
if source[n] in SPACE:
n += 1
else:
break
return n
def except_token(source, start, token, throw=True):
"""Token can be only a single char. Returns position after token if found. Otherwise raises syntax error if throw
otherwise returns None"""
start = pass_white(source, start)
if start < len(source) and source[start] == token:
return start + 1
if throw:
raise SyntaxError('Missing token. Expected %s' % token)
return None
def except_keyword(source, start, keyword):
""" Returns position after keyword if found else None
Note: skips white space"""
start = pass_white(source, start)
kl = len(keyword) #keyword len
if kl + start > len(source):
return None
if source[start:start + kl] != keyword:
return None
if kl + start < len(source) and source[start + kl] in IDENTIFIER_PART:
return None
return start + kl
def parse_identifier(source, start, throw=True):
"""passes white space from start and returns first identifier,
if identifier invalid and throw raises SyntaxError otherwise returns None"""
start = pass_white(source, start)
end = start
if not end < len(source):
if throw:
raise SyntaxError('Missing identifier!')
return None
if source[end] not in IDENTIFIER_START:
if throw:
raise SyntaxError('Invalid identifier start: "%s"' % source[end])
return None
end += 1
while end < len(source) and source[end] in IDENTIFIER_PART:
end += 1
if not is_valid_lval(source[start:end]):
if throw:
raise SyntaxError(
'Invalid identifier name: "%s"' % source[start:end])
return None
return source[start:end], end
def argsplit(args, sep=','):
"""used to split JS args (it is not that simple as it seems because
sep can be inside brackets).
pass args *without* brackets!
Used also to parse array and object elements, and more"""
parsed_len = 0
last = 0
splits = []
for e in bracket_split(args, brackets=['()', '[]', '{}']):
if e[0] not in {'(', '[', '{'}:
for i, char in enumerate(e):
if char == sep:
splits.append(args[last:parsed_len + i])
last = parsed_len + i + 1
parsed_len += len(e)
splits.append(args[last:])
return splits
def split_add_ops(text):
"""Specialized function splitting text at add/sub operators.
Operands are *not* translated. Example result ['op1', '+', 'op2', '-', 'op3']"""
n = 0
text = text.replace('++', '##').replace(
'--', '@@') #text does not normally contain any of these
spotted = False # set to true if noticed anything other than +- or white space
last = 0
while n < len(text):
e = text[n]
if e == '+' or e == '-':
if spotted:
yield text[last:n].replace('##', '++').replace('@@', '--')
yield e
last = n + 1
spotted = False
elif e == '/' or e == '*' or e == '%':
spotted = False
elif e != ' ':
spotted = True
n += 1
yield text[last:n].replace('##', '++').replace('@@', '--')
def split_at_any(text,
lis,
translate=False,
not_before=[],
not_after=[],
validitate=None):
""" doc """
lis.sort(key=lambda x: len(x), reverse=True)
last = 0
n = 0
text_len = len(text)
while n < text_len:
if any(text[:n].endswith(e)
for e in not_before): #Cant end with end before
n += 1
continue
for e in lis:
s = len(e)
if s + n > text_len:
continue
if validitate and not validitate(e, text[:n], text[n + s:]):
continue
if any(text[n + s:].startswith(e)
for e in not_after): #Cant end with end before
n += 1
break
if e == text[n:n + s]:
yield text[last:n] if not translate else translate(
text[last:n])
yield e
n += s
last = n
break
else:
n += 1
yield text[last:n] if not translate else translate(text[last:n])
def split_at_single(text, sep, not_before=[], not_after=[]):
"""Works like text.split(sep) but separated fragments
cant end with not_before or start with not_after"""
n = 0
lt, s = len(text), len(sep)
last = 0
while n < lt:
if not s + n > lt:
if sep == text[n:n + s]:
if any(text[last:n].endswith(e) for e in not_before):
pass
elif any(text[n + s:].startswith(e) for e in not_after):
pass
else:
yield text[last:n]
last = n + s
n += s - 1
n += 1
yield text[last:]