mylar/lib/js2py/legecy_translators/flow.py

457 lines
16 KiB
Python

"""This module translates JS flow into PY flow.
Translates:
IF ELSE
DO WHILE
WHILE
FOR 123
FOR iter
CONTINUE, BREAK, RETURN, LABEL, THROW, TRY, SWITCH
"""
from utils import *
from jsparser import *
from nodevisitor import exp_translator
import random
TO_REGISTER = []
CONTINUE_LABEL = 'JS_CONTINUE_LABEL_%s'
BREAK_LABEL = 'JS_BREAK_LABEL_%s'
PREPARE = '''HOLDER = var.own.get(NAME)\nvar.force_own_put(NAME, PyExceptionToJs(PyJsTempException))\n'''
RESTORE = '''if HOLDER is not None:\n var.own[NAME] = HOLDER\nelse:\n del var.own[NAME]\ndel HOLDER\n'''
TRY_CATCH = '''%stry:\nBLOCKfinally:\n%s''' % (PREPARE, indent(RESTORE))
def get_continue_label(label):
return CONTINUE_LABEL%label.encode('hex')
def get_break_label(label):
return BREAK_LABEL%label.encode('hex')
def pass_until(source, start, tokens=(';',)):
while start < len(source) and source[start] not in tokens:
start+=1
return start+1
def do_bracket_exp(source, start, throw=True):
bra, cand = pass_bracket(source, start, '()')
if throw and not bra:
raise SyntaxError('Missing bracket expression')
bra = exp_translator(bra[1:-1])
if throw and not bra:
raise SyntaxError('Empty bracket condition')
return bra, cand if bra else start
def do_if(source, start):
start += 2 # pass this if
bra, start = do_bracket_exp(source, start, throw=True)
statement, start = do_statement(source, start)
if statement is None:
raise SyntaxError('Invalid if statement')
translated = 'if %s:\n'%bra+indent(statement)
elseif = except_keyword(source, start, 'else')
is_elseif = False
if elseif:
start = elseif
if except_keyword(source, start, 'if'):
is_elseif = True
elseif, start = do_statement(source, start)
if elseif is None:
raise SyntaxError('Invalid if statement)')
if is_elseif:
translated += 'el' + elseif
else:
translated += 'else:\n'+ indent(elseif)
return translated, start
def do_statement(source, start):
"""returns none if not found other functions that begin with 'do_' raise
also this do_ type function passes white space"""
start = pass_white(source, start)
# start is the fist position after initial start that is not a white space or \n
if not start < len(source): #if finished parsing return None
return None, start
if any(startswith_keyword(source[start:], e) for e in {'case', 'default'}):
return None, start
rest = source[start:]
for key, meth in KEYWORD_METHODS.iteritems(): # check for statements that are uniquely defined by their keywords
if rest.startswith(key):
# has to startwith this keyword and the next letter after keyword must be either EOF or not in IDENTIFIER_PART
if len(key)==len(rest) or rest[len(key)] not in IDENTIFIER_PART:
return meth(source, start)
if rest[0] == '{': #Block
return do_block(source, start)
# Now only label and expression left
cand = parse_identifier(source, start, False)
if cand is not None: # it can mean that its a label
label, cand_start = cand
cand_start = pass_white(source, cand_start)
if source[cand_start]==':':
return do_label(source, start)
return do_expression(source, start)
def do_while(source, start):
start += 5 # pass while
bra, start = do_bracket_exp(source, start, throw=True)
statement, start = do_statement(source, start)
if statement is None:
raise SyntaxError('Missing statement to execute in while loop!')
return 'while %s:\n'%bra + indent(statement), start
def do_dowhile(source, start):
start += 2 # pass do
statement, start = do_statement(source, start)
if statement is None:
raise SyntaxError('Missing statement to execute in do while loop!')
start = except_keyword(source, start, 'while')
if not start:
raise SyntaxError('Missing while keyword in do-while loop')
bra, start = do_bracket_exp(source, start, throw=True)
statement += 'if not %s:\n' % bra + indent('break\n')
return 'while 1:\n' + indent(statement), start
def do_block(source, start):
bra, start = pass_bracket(source, start, '{}')
#print source[start:], bra
#return bra +'\n', start
if bra is None:
raise SyntaxError('Missing block ( {code} )')
code = ''
bra = bra[1:-1]+';'
bra_pos = 0
while bra_pos<len(bra):
st, bra_pos = do_statement(bra, bra_pos)
if st is None:
break
code += st
bra_pos = pass_white(bra, bra_pos)
if bra_pos<len(bra):
raise SyntaxError('Block has more code that could not be parsed:\n'+bra[bra_pos:])
return code, start
def do_empty(source, start):
return 'pass\n', start + 1
def do_expression(source, start):
start = pass_white(source, start)
end = pass_until(source, start, tokens=(';',))
if end==start+1: #empty statement
return 'pass\n', end
# AUTOMATIC SEMICOLON INSERTION FOLLOWS
# Without ASI this function would end with: return exp_translator(source[start:end].rstrip(';'))+'\n', end
# ASI makes things a bit more complicated:
# we will try to parse as much as possible, inserting ; in place of last new line in case of error
rev = False
rpos = 0
while True:
try:
code = source[start:end].rstrip(';')
cand = exp_translator(code)+'\n', end
just_to_test = compile(cand[0], '', 'exec')
return cand
except Exception as e:
if not rev:
rev = source[start:end][::-1]
lpos = rpos
while True:
rpos = pass_until(rev, rpos, LINE_TERMINATOR)
if rpos>=len(rev):
raise
if filter(lambda x: x not in SPACE, rev[lpos:rpos]):
break
end = start + len(rev) - rpos + 1
def do_var(source, start):
#todo auto ; insertion
start += 3 #pass var
end = pass_until(source, start, tokens=(';',))
defs = argsplit(source[start:end-1]) # defs is the list of defined vars with optional initializer
code = ''
for de in defs:
var, var_end = parse_identifier(de, 0, True)
TO_REGISTER.append(var)
var_end = pass_white(de, var_end)
if var_end<len(de): # we have something more to parse... It has to start with =
if de[var_end] != '=':
raise SyntaxError('Unexpected initializer in var statement. Expected "=", got "%s"'%de[var_end])
code += exp_translator(de) + '\n'
if not code.strip():
code = 'pass\n'
return code, end
def do_label(source, start):
label, end = parse_identifier(source, start)
end = pass_white(source, end)
#now source[end] must be :
assert source[end]==':'
end += 1
inside, end = do_statement(source, end)
if inside is None:
raise SyntaxError('Missing statement after label')
defs = ''
if inside.startswith('while ') or inside.startswith('for ') or inside.startswith('#for'):
# we have to add contine label as well...
# 3 or 1 since #for loop type has more lines before real for.
sep = 1 if not inside.startswith('#for') else 3
cont_label = get_continue_label(label)
temp = inside.split('\n')
injected = 'try:\n'+'\n'.join(temp[sep:])
injected += 'except %s:\n pass\n'%cont_label
inside = '\n'.join(temp[:sep])+'\n'+indent(injected)
defs += 'class %s(Exception): pass\n'%cont_label
break_label = get_break_label(label)
inside = 'try:\n%sexcept %s:\n pass\n'% (indent(inside), break_label)
defs += 'class %s(Exception): pass\n'%break_label
return defs + inside, end
def do_for(source, start):
start += 3 # pass for
entered = start
bra, start = pass_bracket(source, start , '()')
inside, start = do_statement(source, start)
if inside is None:
raise SyntaxError('Missing statement after for')
bra = bra[1:-1]
if ';' in bra:
init = argsplit(bra, ';')
if len(init)!=3:
raise SyntaxError('Invalid for statement')
args = []
for i, item in enumerate(init):
end = pass_white(item, 0)
if end==len(item):
args.append('' if i!=1 else '1')
continue
if not i and except_keyword(item, end, 'var') is not None:
# var statement
args.append(do_var(item, end)[0])
continue
args.append(do_expression(item, end)[0])
return '#for JS loop\n%swhile %s:\n%s%s\n' %(args[0], args[1].strip(), indent(inside), indent(args[2])), start
# iteration
end = pass_white(bra, 0)
register = False
if bra[end:].startswith('var '):
end+=3
end = pass_white(bra, end)
register = True
name, end = parse_identifier(bra, end)
if register:
TO_REGISTER.append(name)
end = pass_white(bra, end)
if bra[end:end+2]!='in' or bra[end+2] in IDENTIFIER_PART:
#print source[entered-10:entered+50]
raise SyntaxError('Invalid "for x in y" statement')
end+=2 # pass in
exp = exp_translator(bra[end:])
res = 'for temp in %s:\n' % exp
res += indent('var.put(%s, temp)\n' % name.__repr__()) + indent(inside)
return res, start
# todo - IMPORTANT
def do_continue(source, start, name='continue'):
start += len(name) #pass continue
start = pass_white(source, start)
if start<len(source) and source[start] == ';':
return '%s\n'%name, start+1
# labeled statement or error
label, start = parse_identifier(source, start)
start = pass_white(source, start)
if start<len(source) and source[start] != ';':
raise SyntaxError('Missing ; after label name in %s statement'%name)
return 'raise %s("%s")\n' % (get_continue_label(label) if name=='continue' else get_break_label(label), name), start+1
def do_break(source, start):
return do_continue(source, start, 'break')
def do_return(source, start):
start += 6 # pass return
end = source.find(';', start)+1
if end==-1:
end = len(source)
trans = exp_translator(source[start:end].rstrip(';'))
return 'return %s\n' % (trans if trans else "var.get('undefined')"), end
# todo later?- Also important
def do_throw(source, start):
start += 5 # pass throw
end = source.find(';', start)+1
if not end:
end = len(source)
trans = exp_translator(source[start:end].rstrip(';'))
if not trans:
raise SyntaxError('Invalid throw statement: nothing to throw')
res = 'PyJsTempException = JsToPyException(%s)\nraise PyJsTempException\n' % trans
return res, end
def do_try(source, start):
start += 3 # pass try
block, start = do_block(source, start)
result = 'try:\n%s' %indent(block)
catch = except_keyword(source, start, 'catch')
if catch:
bra, catch = pass_bracket(source, catch, '()')
bra = bra[1:-1]
identifier, bra_end = parse_identifier(bra, 0)
holder = 'PyJsHolder_%s_%d'%(identifier.encode('hex'), random.randrange(1e8))
identifier = identifier.__repr__()
bra_end = pass_white(bra, bra_end)
if bra_end<len(bra):
raise SyntaxError('Invalid content of catch statement')
result += 'except PyJsException as PyJsTempException:\n'
block, catch = do_block(source, catch)
# fill in except ( catch ) block and remember to recover holder variable to its previous state
result += indent(TRY_CATCH.replace('HOLDER', holder).replace('NAME', identifier).replace('BLOCK', indent(block)))
start = max(catch, start)
final = except_keyword(source, start, 'finally')
if not (final or catch):
raise SyntaxError('Try statement has to be followed by catch or finally')
if not final:
return result, start
# translate finally statement
block, start = do_block(source, final)
return result + 'finally:\n%s' % indent(block), start
def do_debugger(source, start):
start += 8 # pass debugger
end = pass_white(source, start)
if end<len(source) and source[end]==';':
end += 1
return 'pass\n', end #ignore errors...
# todo automatic ; insertion. fuck this crappy feature
# Least important
def do_switch(source, start):
start += 6 # pass switch
code = 'while 1:\n' + indent('SWITCHED = False\nCONDITION = (%s)\n')
# parse value of check
val, start = pass_bracket(source, start, '()')
if val is None:
raise SyntaxError('Missing () after switch statement')
if not val.strip():
raise SyntaxError('Missing content inside () after switch statement')
code = code % exp_translator(val)
bra, start = pass_bracket(source, start, '{}')
if bra is None:
raise SyntaxError('Missing block {} after switch statement')
bra_pos = 0
bra = bra[1:-1] + ';'
while True:
case = except_keyword(bra, bra_pos, 'case')
default = except_keyword(bra, bra_pos, 'default')
assert not (case and default)
if case or default: # this ?: expression makes things much harder....
case_code = None
if case:
case_code = 'if SWITCHED or PyJsStrictEq(CONDITION, %s):\n'
# we are looking for a first : with count 1. ? gives -1 and : gives +1.
count = 0
for pos, e in enumerate(bra[case:], case):
if e=='?':
count -= 1
elif e==':':
count += 1
if count==1:
break
else:
raise SyntaxError('Missing : token after case in switch statement')
case_condition = exp_translator(bra[case:pos]) # switch {case CONDITION: statements}
case_code = case_code % case_condition
case = pos + 1
if default:
case = except_token(bra, default, ':')
case_code = 'if True:\n'
# now parse case statements (things after ':' )
cand, case = do_statement(bra, case)
while cand:
case_code += indent(cand)
cand, case = do_statement(bra, case)
case_code += indent('SWITCHED = True\n')
code += indent(case_code)
bra_pos = case
else:
break
# prevent infinite loop :)
code += indent('break\n')
return code, start
def do_pyimport(source, start):
start += 8
lib, start = parse_identifier(source, start)
jlib = 'PyImport_%s' % lib
code = 'import %s as %s\n' % (lib, jlib)
#check whether valid lib name...
try:
compile(code, '', 'exec')
except:
raise SyntaxError('Invalid Python module name (%s) in pyimport statement'%lib)
# var.pyimport will handle module conversion to PyJs object
code += 'var.pyimport(%s, %s)\n' % (repr(lib), jlib)
return code, start
def do_with(source, start):
raise NotImplementedError('With statement is not implemented yet :(')
KEYWORD_METHODS = {'do': do_dowhile,
'while': do_while,
'if': do_if,
'throw': do_throw,
'return': do_return,
'continue': do_continue,
'break': do_break,
'try': do_try,
'for': do_for,
'switch': do_switch,
'var': do_var,
'debugger': do_debugger, # this one does not do anything
'with': do_with,
'pyimport': do_pyimport
}
#Also not specific statements (harder to detect)
# Block {}
# Expression or Empty Statement
# Label
#
# Its easy to recognize block but harder to distinguish between label and expression statement
def translate_flow(source):
"""Source cant have arrays, object, constant or function literals.
Returns PySource and variables to register"""
global TO_REGISTER
TO_REGISTER = []
return do_block('{%s}'%source, 0)[0], TO_REGISTER
if __name__=='__main__':
#print do_dowhile('do {} while(k+f)', 0)[0]
#print 'e: "%s"'%do_expression('++(c?g:h); mj', 0)[0]
print translate_flow('a; yimport test')[0]