mylar/lib/js2py/legecy_translators/nodevisitor.py

501 lines
16 KiB
Python

from jsparser import *
from utils import *
import re
from utils import *
#Note all white space sent to this module must be ' ' so no '\n'
REPL = {}
#PROBLEMS
# <<=, >>=, >>>=
# they are unusual so I will not fix that now. a++ +b works fine and a+++++b (a++ + ++b) does not work even in V8
ASSIGNMENT_MATCH = '(?<!=|!|<|>)=(?!=)'
def unary_validitator(keyword, before, after):
if keyword[-1] in IDENTIFIER_PART:
if not after or after[0] in IDENTIFIER_PART:
return False
if before and before[-1] in IDENTIFIER_PART: # I am not sure here...
return False
return True
def comb_validitator(keyword, before, after):
if keyword=='instanceof' or keyword=='in':
if before and before[-1] in IDENTIFIER_PART:
return False
elif after and after[0] in IDENTIFIER_PART:
return False
return True
def bracket_replace(code):
new = ''
for e in bracket_split(code, ['()','[]'], False):
if e[0]=='[':
name = '#PYJSREPL'+str(len(REPL))+'{'
new+= name
REPL[name] = e
elif e[0]=='(': # can be a function call
name = '@PYJSREPL'+str(len(REPL))+'}'
new+= name
REPL[name] = e
else:
new+=e
return new
class NodeVisitor:
def __init__(self, code):
self.code = code
def rl(self, lis, op):
"""performs this operation on a list from *right to left*
op must take 2 args
a,b,c => op(a, op(b, c))"""
it = reversed(lis)
res = trans(it.next())
for e in it:
e = trans(e)
res = op(e, res)
return res
def lr(self, lis, op):
"""performs this operation on a list from *left to right*
op must take 2 args
a,b,c => op(op(a, b), c)"""
it = iter(lis)
res = trans(it.next())
for e in it:
e = trans(e)
res = op(res, e)
return res
def translate(self):
"""Translates outer operation and calls translate on inner operation.
Returns fully translated code."""
if not self.code:
return ''
new = bracket_replace(self.code)
#Check comma operator:
cand = new.split(',') #every comma in new must be an operator
if len(cand)>1: #LR
return self.lr(cand, js_comma)
#Check = operator:
# dont split at != or !== or == or === or <= or >=
#note <<=, >>= or this >>> will NOT be supported
# maybe I will change my mind later
# Find this crappy ?:
if '?' in new:
cond_ind = new.find('?')
tenary_start = 0
for ass in re.finditer(ASSIGNMENT_MATCH, new):
cand = ass.span()[1]
if cand < cond_ind:
tenary_start = cand
else:
break
actual_tenary = new[tenary_start:]
spl = ''.join(split_at_any(new, [':', '?'], translate=trans))
tenary_translation = transform_crap(spl)
assignment = new[:tenary_start] + ' PyJsConstantTENARY'
return trans(assignment).replace('PyJsConstantTENARY', tenary_translation)
cand = list(split_at_single(new, '=', ['!', '=','<','>'], ['=']))
if len(cand)>1: # RL
it = reversed(cand)
res = trans(it.next())
for e in it:
e = e.strip()
if not e:
raise SyntaxError('Missing left-hand in assignment!')
op = ''
if e[-2:] in OP_METHODS:
op = ','+e[-2:].__repr__()
e = e[:-2]
elif e[-1:] in OP_METHODS:
op = ','+e[-1].__repr__()
e = e[:-1]
e = trans(e)
#Now replace last get method with put and change args
c = list(bracket_split(e, ['()']))
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
if beg[-4:]!='.get':
raise SyntaxError('Invalid left-hand side in assignment')
beg = beg[0:-3]+'put'
arglist = arglist[0:-1]+', '+res+op+')'
res = beg+arglist
return res
#Now check remaining 2 arg operators that are not handled by python
#They all have Left to Right (LR) associativity
order = [OR, AND, BOR, BXOR, BAND, EQS, COMPS, BSHIFTS, ADDS, MULTS]
# actually we dont need OR and AND because they can be handled easier. But just for fun
dangerous = ['<', '>']
for typ in order:
#we have to use special method for ADDS since they can be also unary operation +/++ or -/-- FUCK
if '+' in typ:
cand = list(split_add_ops(new))
else:
#dont translate. cant start or end on dangerous op.
cand = list(split_at_any(new, typ.keys(), False, dangerous, dangerous,validitate=comb_validitator))
if not len(cand)>1:
continue
n = 1
res = trans(cand[0])
if not res:
raise SyntaxError("Missing operand!")
while n<len(cand):
e = cand[n]
if not e:
raise SyntaxError("Missing operand!")
if n%2:
op = typ[e]
else:
res = op(res, trans(e))
n+=1
return res
#Now replace unary operators - only they are left
cand = list(split_at_any(new, UNARY.keys(), False, validitate=unary_validitator))
if len(cand)>1: #contains unary operators
if '++' in cand or '--' in cand: #it cant contain both ++ and --
if '--' in cand:
op = '--'
meths = js_post_dec, js_pre_dec
else:
op = '++'
meths = js_post_inc, js_pre_inc
pos = cand.index(op)
if cand[pos-1].strip(): # post increment
a = cand[pos-1]
meth = meths[0]
elif cand[pos+1].strip(): #pre increment
a = cand[pos+1]
meth = meths[1]
else:
raise SyntaxError('Invalid use of ++ operator')
if cand[pos+2:]:
raise SyntaxError('Too many operands')
operand = meth(trans(a))
cand = cand[:pos-1]
# now last cand should be operand and every other odd element should be empty
else:
operand = trans(cand[-1])
del cand[-1]
for i, e in enumerate(reversed(cand)):
if i%2:
if e.strip():
raise SyntaxError('Too many operands')
else:
operand = UNARY[e](operand)
return operand
#Replace brackets
if new[0]=='@' or new[0]=='#':
if len(list(bracket_split(new, ('#{','@}')))) ==1: # we have only one bracket, otherwise pseudobracket like @@....
assert new in REPL
if new[0]=='#':
raise SyntaxError('[] cant be used as brackets! Use () instead.')
return '('+trans(REPL[new][1:-1])+')'
#Replace function calls and prop getters
# 'now' must be a reference like: a or b.c.d but it can have also calls or getters ( for example a["b"](3))
#From here @@ means a function call and ## means get operation (note they dont have to present)
it = bracket_split(new, ('#{','@}'))
res = []
for e in it:
if e[0]!='#' and e[0]!='@':
res += [x.strip() for x in e.split('.')]
else:
res += [e.strip()]
# res[0] can be inside @@ (name)...
res = filter(lambda x: x, res)
if is_internal(res[0]):
out = res[0]
elif res[0][0] in {'#', '@'}:
out = '('+trans(REPL[res[0]][1:-1])+')'
elif is_valid_lval(res[0]) or res[0] in {'this', 'false', 'true', 'null'}:
out = 'var.get('+res[0].__repr__()+')'
else:
if is_reserved(res[0]):
raise SyntaxError('Unexpected reserved word: "%s"'%res[0])
raise SyntaxError('Invalid identifier: "%s"'%res[0])
if len(res)==1:
return out
n = 1
while n<len(res): #now every func call is a prop call
e = res[n]
if e[0]=='@': # direct call
out += trans_args(REPL[e])
n += 1
continue
args = False #assume not prop call
if n+1<len(res) and res[n+1][0]=='@': #prop call
args = trans_args(REPL[res[n+1]])[1:]
if args!=')':
args = ','+args
if e[0]=='#':
prop = trans(REPL[e][1:-1])
else:
if not is_lval(e):
raise SyntaxError('Invalid identifier: "%s"'%e)
prop = e.__repr__()
if args: # prop call
n+=1
out += '.callprop('+prop+args
else: #prop get
out += '.get('+prop+')'
n+=1
return out
def js_comma(a, b):
return 'PyJsComma('+a+','+b+')'
def js_or(a, b):
return '('+a+' or '+b+')'
def js_bor(a, b):
return '('+a+'|'+b+')'
def js_bxor(a, b):
return '('+a+'^'+b+')'
def js_band(a, b):
return '('+a+'&'+b+')'
def js_and(a, b):
return '('+a+' and '+b+')'
def js_strict_eq(a, b):
return 'PyJsStrictEq('+a+','+b+')'
def js_strict_neq(a, b):
return 'PyJsStrictNeq('+a+','+b+')'
#Not handled by python in the same way like JS. For example 2==2==True returns false.
# In JS above would return true so we need brackets.
def js_abstract_eq(a, b):
return '('+a+'=='+b+')'
#just like ==
def js_abstract_neq(a, b):
return '('+a+'!='+b+')'
def js_lt(a, b):
return '('+a+'<'+b+')'
def js_le(a, b):
return '('+a+'<='+b+')'
def js_ge(a, b):
return '('+a+'>='+b+')'
def js_gt(a, b):
return '('+a+'>'+b+')'
def js_in(a, b):
return b+'.contains('+a+')'
def js_instanceof(a, b):
return a+'.instanceof('+b+')'
def js_lshift(a, b):
return '('+a+'<<'+b+')'
def js_rshift(a, b):
return '('+a+'>>'+b+')'
def js_shit(a, b):
return 'PyJsBshift('+a+','+b+')'
def js_add(a, b): # To simplify later process of converting unary operators + and ++
return '(%s+%s)'%(a, b)
def js_sub(a, b): # To simplify
return '(%s-%s)'%(a, b)
def js_mul(a, b):
return '('+a+'*'+b+')'
def js_div(a, b):
return '('+a+'/'+b+')'
def js_mod(a, b):
return '('+a+'%'+b+')'
def js_typeof(a):
cand = list(bracket_split(a, ('()',)))
if len(cand)==2 and cand[0]=='var.get':
return cand[0]+cand[1][:-1]+',throw=False).typeof()'
return a+'.typeof()'
def js_void(a):
return '('+a+')'
def js_new(a):
cands = list(bracket_split(a, ('()',)))
lim = len(cands)
if lim < 2:
return a + '.create()'
n = 0
while n < lim:
c = cands[n]
if c[0]=='(':
if cands[n-1].endswith('.get') and n+1>=lim: # last get operation.
return a + '.create()'
elif cands[n-1][0]=='(':
return ''.join(cands[:n])+'.create' + c + ''.join(cands[n+1:])
elif cands[n-1]=='.callprop':
beg = ''.join(cands[:n-1])
args = argsplit(c[1:-1],',')
prop = args[0]
new_args = ','.join(args[1:])
create = '.get(%s).create(%s)' % (prop, new_args)
return beg + create + ''.join(cands[n+1:])
n+=1
return a + '.create()'
def js_delete(a):
#replace last get with delete.
c = list(bracket_split(a, ['()']))
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
if beg[-4:]!='.get':
raise SyntaxError('Invalid delete operation')
return beg[:-3]+'delete'+arglist
def js_neg(a):
return '(-'+a+')'
def js_pos(a):
return '(+'+a+')'
def js_inv(a):
return '(~'+a+')'
def js_not(a):
return a+'.neg()'
def postfix(a, inc, post):
bra = list(bracket_split(a, ('()',)))
meth = bra[-2]
if not meth.endswith('get'):
raise SyntaxError('Invalid ++ or -- operation.')
bra[-2] = bra[-2][:-3] + 'put'
bra[-1] = '(%s,%s%sJs(1))' % (bra[-1][1:-1], a, '+' if inc else '-')
res = ''.join(bra)
return res if not post else '(%s%sJs(1))' % (res, '-' if inc else '+')
def js_pre_inc(a):
return postfix(a, True, False)
def js_post_inc(a):
return postfix(a, True, True)
def js_pre_dec(a):
return postfix(a, False, False)
def js_post_dec(a):
return postfix(a, False, True)
OR = {'||': js_or}
AND = {'&&': js_and}
BOR = {'|': js_bor}
BXOR = {'^': js_bxor}
BAND = {'&': js_band}
EQS = {'===': js_strict_eq,
'!==': js_strict_neq,
'==': js_abstract_eq, # we need == and != too. Read a note above method
'!=': js_abstract_neq}
#Since JS does not have chained comparisons we need to implement all cmp methods.
COMPS = {'<': js_lt,
'<=': js_le,
'>=': js_ge,
'>': js_gt,
'instanceof': js_instanceof, #todo change to validitate
'in': js_in}
BSHIFTS = {'<<': js_lshift,
'>>': js_rshift,
'>>>': js_shit}
ADDS = {'+': js_add,
'-': js_sub}
MULTS = {'*': js_mul,
'/': js_div,
'%': js_mod}
#Note they dont contain ++ and -- methods because they both have 2 different methods
# correct method will be found automatically in translate function
UNARY = {'typeof': js_typeof,
'void': js_void,
'new': js_new,
'delete': js_delete,
'!': js_not,
'-': js_neg,
'+': js_pos,
'~': js_inv,
'++': None,
'--': None
}
def transform_crap(code): #needs some more tests
"""Transforms this ?: crap into if else python syntax"""
ind = code.rfind('?')
if ind==-1:
return code
sep = code.find(':', ind)
if sep==-1:
raise SyntaxError('Invalid ?: syntax (probably missing ":" )')
beg = max(code.rfind(':', 0, ind), code.find('?', 0, ind))+1
end = code.find(':',sep+1)
end = len(code) if end==-1 else end
formula = '('+code[ind+1:sep]+' if '+code[beg:ind]+' else '+code[sep+1:end]+')'
return transform_crap(code[:beg]+formula+code[end:])
from code import InteractiveConsole
#e = InteractiveConsole(globals()).interact()
import traceback
def trans(code):
return NodeVisitor(code.strip()).translate().strip()
#todo finish this trans args
def trans_args(code):
new = bracket_replace(code.strip()[1:-1])
args = ','.join(trans(e) for e in new.split(','))
return '(%s)'%args
EXP = 0
def exp_translator(code):
global REPL, EXP
EXP += 1
REPL = {}
#print EXP, code
code = code.replace('\n', ' ')
assert '@' not in code
assert ';' not in code
assert '#' not in code
#if not code.strip(): #?
# return 'var.get("undefined")'
try:
return trans(code)
except:
#print '\n\ntrans failed on \n\n' + code
#raw_input('\n\npress enter')
raise
if __name__=='__main__':
#print 'Here', trans('(eee ) . ii [ PyJsMarker ] [ jkj ] ( j , j ) .
# jiji (h , ji , i)(non )( )()()()')
for e in xrange(3):
print exp_translator('jk = kk.ik++')
#First line translated with PyJs: PyJsStrictEq(PyJsAdd((Js(100)*Js(50)),Js(30)), Js("5030")), yay!
print exp_translator('delete a.f')