mirror of https://github.com/evilhero/mylar
501 lines
16 KiB
Python
501 lines
16 KiB
Python
from jsparser import *
|
|
from utils import *
|
|
import re
|
|
from utils import *
|
|
|
|
#Note all white space sent to this module must be ' ' so no '\n'
|
|
REPL = {}
|
|
|
|
|
|
#PROBLEMS
|
|
# <<=, >>=, >>>=
|
|
# they are unusual so I will not fix that now. a++ +b works fine and a+++++b (a++ + ++b) does not work even in V8
|
|
ASSIGNMENT_MATCH = '(?<!=|!|<|>)=(?!=)'
|
|
|
|
def unary_validitator(keyword, before, after):
|
|
if keyword[-1] in IDENTIFIER_PART:
|
|
if not after or after[0] in IDENTIFIER_PART:
|
|
return False
|
|
if before and before[-1] in IDENTIFIER_PART: # I am not sure here...
|
|
return False
|
|
return True
|
|
|
|
def comb_validitator(keyword, before, after):
|
|
if keyword=='instanceof' or keyword=='in':
|
|
if before and before[-1] in IDENTIFIER_PART:
|
|
return False
|
|
elif after and after[0] in IDENTIFIER_PART:
|
|
return False
|
|
return True
|
|
|
|
def bracket_replace(code):
|
|
new = ''
|
|
for e in bracket_split(code, ['()','[]'], False):
|
|
if e[0]=='[':
|
|
name = '#PYJSREPL'+str(len(REPL))+'{'
|
|
new+= name
|
|
REPL[name] = e
|
|
elif e[0]=='(': # can be a function call
|
|
name = '@PYJSREPL'+str(len(REPL))+'}'
|
|
new+= name
|
|
REPL[name] = e
|
|
else:
|
|
new+=e
|
|
return new
|
|
|
|
class NodeVisitor:
|
|
def __init__(self, code):
|
|
self.code = code
|
|
|
|
def rl(self, lis, op):
|
|
"""performs this operation on a list from *right to left*
|
|
op must take 2 args
|
|
a,b,c => op(a, op(b, c))"""
|
|
it = reversed(lis)
|
|
res = trans(it.next())
|
|
for e in it:
|
|
e = trans(e)
|
|
res = op(e, res)
|
|
return res
|
|
|
|
def lr(self, lis, op):
|
|
"""performs this operation on a list from *left to right*
|
|
op must take 2 args
|
|
a,b,c => op(op(a, b), c)"""
|
|
it = iter(lis)
|
|
res = trans(it.next())
|
|
for e in it:
|
|
e = trans(e)
|
|
res = op(res, e)
|
|
return res
|
|
|
|
def translate(self):
|
|
"""Translates outer operation and calls translate on inner operation.
|
|
Returns fully translated code."""
|
|
if not self.code:
|
|
return ''
|
|
new = bracket_replace(self.code)
|
|
#Check comma operator:
|
|
cand = new.split(',') #every comma in new must be an operator
|
|
if len(cand)>1: #LR
|
|
return self.lr(cand, js_comma)
|
|
#Check = operator:
|
|
# dont split at != or !== or == or === or <= or >=
|
|
#note <<=, >>= or this >>> will NOT be supported
|
|
# maybe I will change my mind later
|
|
# Find this crappy ?:
|
|
if '?' in new:
|
|
cond_ind = new.find('?')
|
|
tenary_start = 0
|
|
for ass in re.finditer(ASSIGNMENT_MATCH, new):
|
|
cand = ass.span()[1]
|
|
if cand < cond_ind:
|
|
tenary_start = cand
|
|
else:
|
|
break
|
|
actual_tenary = new[tenary_start:]
|
|
spl = ''.join(split_at_any(new, [':', '?'], translate=trans))
|
|
tenary_translation = transform_crap(spl)
|
|
assignment = new[:tenary_start] + ' PyJsConstantTENARY'
|
|
return trans(assignment).replace('PyJsConstantTENARY', tenary_translation)
|
|
cand = list(split_at_single(new, '=', ['!', '=','<','>'], ['=']))
|
|
if len(cand)>1: # RL
|
|
it = reversed(cand)
|
|
res = trans(it.next())
|
|
for e in it:
|
|
e = e.strip()
|
|
if not e:
|
|
raise SyntaxError('Missing left-hand in assignment!')
|
|
op = ''
|
|
if e[-2:] in OP_METHODS:
|
|
op = ','+e[-2:].__repr__()
|
|
e = e[:-2]
|
|
elif e[-1:] in OP_METHODS:
|
|
op = ','+e[-1].__repr__()
|
|
e = e[:-1]
|
|
e = trans(e)
|
|
#Now replace last get method with put and change args
|
|
c = list(bracket_split(e, ['()']))
|
|
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
|
|
if beg[-4:]!='.get':
|
|
raise SyntaxError('Invalid left-hand side in assignment')
|
|
beg = beg[0:-3]+'put'
|
|
arglist = arglist[0:-1]+', '+res+op+')'
|
|
res = beg+arglist
|
|
return res
|
|
#Now check remaining 2 arg operators that are not handled by python
|
|
#They all have Left to Right (LR) associativity
|
|
order = [OR, AND, BOR, BXOR, BAND, EQS, COMPS, BSHIFTS, ADDS, MULTS]
|
|
# actually we dont need OR and AND because they can be handled easier. But just for fun
|
|
dangerous = ['<', '>']
|
|
for typ in order:
|
|
#we have to use special method for ADDS since they can be also unary operation +/++ or -/-- FUCK
|
|
if '+' in typ:
|
|
cand = list(split_add_ops(new))
|
|
else:
|
|
#dont translate. cant start or end on dangerous op.
|
|
cand = list(split_at_any(new, typ.keys(), False, dangerous, dangerous,validitate=comb_validitator))
|
|
if not len(cand)>1:
|
|
continue
|
|
n = 1
|
|
res = trans(cand[0])
|
|
if not res:
|
|
raise SyntaxError("Missing operand!")
|
|
while n<len(cand):
|
|
e = cand[n]
|
|
if not e:
|
|
raise SyntaxError("Missing operand!")
|
|
if n%2:
|
|
op = typ[e]
|
|
else:
|
|
res = op(res, trans(e))
|
|
n+=1
|
|
return res
|
|
#Now replace unary operators - only they are left
|
|
cand = list(split_at_any(new, UNARY.keys(), False, validitate=unary_validitator))
|
|
if len(cand)>1: #contains unary operators
|
|
if '++' in cand or '--' in cand: #it cant contain both ++ and --
|
|
if '--' in cand:
|
|
op = '--'
|
|
meths = js_post_dec, js_pre_dec
|
|
else:
|
|
op = '++'
|
|
meths = js_post_inc, js_pre_inc
|
|
pos = cand.index(op)
|
|
if cand[pos-1].strip(): # post increment
|
|
a = cand[pos-1]
|
|
meth = meths[0]
|
|
elif cand[pos+1].strip(): #pre increment
|
|
a = cand[pos+1]
|
|
meth = meths[1]
|
|
else:
|
|
raise SyntaxError('Invalid use of ++ operator')
|
|
if cand[pos+2:]:
|
|
raise SyntaxError('Too many operands')
|
|
operand = meth(trans(a))
|
|
cand = cand[:pos-1]
|
|
# now last cand should be operand and every other odd element should be empty
|
|
else:
|
|
operand = trans(cand[-1])
|
|
del cand[-1]
|
|
for i, e in enumerate(reversed(cand)):
|
|
if i%2:
|
|
if e.strip():
|
|
raise SyntaxError('Too many operands')
|
|
else:
|
|
operand = UNARY[e](operand)
|
|
return operand
|
|
#Replace brackets
|
|
if new[0]=='@' or new[0]=='#':
|
|
if len(list(bracket_split(new, ('#{','@}')))) ==1: # we have only one bracket, otherwise pseudobracket like @@....
|
|
assert new in REPL
|
|
if new[0]=='#':
|
|
raise SyntaxError('[] cant be used as brackets! Use () instead.')
|
|
return '('+trans(REPL[new][1:-1])+')'
|
|
#Replace function calls and prop getters
|
|
# 'now' must be a reference like: a or b.c.d but it can have also calls or getters ( for example a["b"](3))
|
|
#From here @@ means a function call and ## means get operation (note they dont have to present)
|
|
it = bracket_split(new, ('#{','@}'))
|
|
res = []
|
|
for e in it:
|
|
if e[0]!='#' and e[0]!='@':
|
|
res += [x.strip() for x in e.split('.')]
|
|
else:
|
|
res += [e.strip()]
|
|
# res[0] can be inside @@ (name)...
|
|
res = filter(lambda x: x, res)
|
|
if is_internal(res[0]):
|
|
out = res[0]
|
|
elif res[0][0] in {'#', '@'}:
|
|
out = '('+trans(REPL[res[0]][1:-1])+')'
|
|
elif is_valid_lval(res[0]) or res[0] in {'this', 'false', 'true', 'null'}:
|
|
out = 'var.get('+res[0].__repr__()+')'
|
|
else:
|
|
if is_reserved(res[0]):
|
|
raise SyntaxError('Unexpected reserved word: "%s"'%res[0])
|
|
raise SyntaxError('Invalid identifier: "%s"'%res[0])
|
|
if len(res)==1:
|
|
return out
|
|
n = 1
|
|
while n<len(res): #now every func call is a prop call
|
|
e = res[n]
|
|
if e[0]=='@': # direct call
|
|
out += trans_args(REPL[e])
|
|
n += 1
|
|
continue
|
|
args = False #assume not prop call
|
|
if n+1<len(res) and res[n+1][0]=='@': #prop call
|
|
args = trans_args(REPL[res[n+1]])[1:]
|
|
if args!=')':
|
|
args = ','+args
|
|
if e[0]=='#':
|
|
prop = trans(REPL[e][1:-1])
|
|
else:
|
|
if not is_lval(e):
|
|
raise SyntaxError('Invalid identifier: "%s"'%e)
|
|
prop = e.__repr__()
|
|
if args: # prop call
|
|
n+=1
|
|
out += '.callprop('+prop+args
|
|
else: #prop get
|
|
out += '.get('+prop+')'
|
|
n+=1
|
|
return out
|
|
|
|
def js_comma(a, b):
|
|
return 'PyJsComma('+a+','+b+')'
|
|
|
|
def js_or(a, b):
|
|
return '('+a+' or '+b+')'
|
|
|
|
def js_bor(a, b):
|
|
return '('+a+'|'+b+')'
|
|
|
|
def js_bxor(a, b):
|
|
return '('+a+'^'+b+')'
|
|
|
|
def js_band(a, b):
|
|
return '('+a+'&'+b+')'
|
|
|
|
def js_and(a, b):
|
|
return '('+a+' and '+b+')'
|
|
def js_strict_eq(a, b):
|
|
|
|
return 'PyJsStrictEq('+a+','+b+')'
|
|
|
|
def js_strict_neq(a, b):
|
|
return 'PyJsStrictNeq('+a+','+b+')'
|
|
|
|
#Not handled by python in the same way like JS. For example 2==2==True returns false.
|
|
# In JS above would return true so we need brackets.
|
|
def js_abstract_eq(a, b):
|
|
return '('+a+'=='+b+')'
|
|
|
|
#just like ==
|
|
def js_abstract_neq(a, b):
|
|
return '('+a+'!='+b+')'
|
|
|
|
def js_lt(a, b):
|
|
return '('+a+'<'+b+')'
|
|
|
|
def js_le(a, b):
|
|
return '('+a+'<='+b+')'
|
|
|
|
def js_ge(a, b):
|
|
return '('+a+'>='+b+')'
|
|
|
|
def js_gt(a, b):
|
|
return '('+a+'>'+b+')'
|
|
|
|
def js_in(a, b):
|
|
return b+'.contains('+a+')'
|
|
|
|
def js_instanceof(a, b):
|
|
return a+'.instanceof('+b+')'
|
|
|
|
def js_lshift(a, b):
|
|
return '('+a+'<<'+b+')'
|
|
|
|
def js_rshift(a, b):
|
|
return '('+a+'>>'+b+')'
|
|
|
|
def js_shit(a, b):
|
|
return 'PyJsBshift('+a+','+b+')'
|
|
|
|
def js_add(a, b): # To simplify later process of converting unary operators + and ++
|
|
return '(%s+%s)'%(a, b)
|
|
|
|
def js_sub(a, b): # To simplify
|
|
return '(%s-%s)'%(a, b)
|
|
|
|
def js_mul(a, b):
|
|
return '('+a+'*'+b+')'
|
|
|
|
def js_div(a, b):
|
|
return '('+a+'/'+b+')'
|
|
|
|
def js_mod(a, b):
|
|
return '('+a+'%'+b+')'
|
|
|
|
def js_typeof(a):
|
|
cand = list(bracket_split(a, ('()',)))
|
|
if len(cand)==2 and cand[0]=='var.get':
|
|
return cand[0]+cand[1][:-1]+',throw=False).typeof()'
|
|
return a+'.typeof()'
|
|
|
|
def js_void(a):
|
|
return '('+a+')'
|
|
|
|
def js_new(a):
|
|
cands = list(bracket_split(a, ('()',)))
|
|
lim = len(cands)
|
|
if lim < 2:
|
|
return a + '.create()'
|
|
n = 0
|
|
while n < lim:
|
|
c = cands[n]
|
|
if c[0]=='(':
|
|
if cands[n-1].endswith('.get') and n+1>=lim: # last get operation.
|
|
return a + '.create()'
|
|
elif cands[n-1][0]=='(':
|
|
return ''.join(cands[:n])+'.create' + c + ''.join(cands[n+1:])
|
|
elif cands[n-1]=='.callprop':
|
|
beg = ''.join(cands[:n-1])
|
|
args = argsplit(c[1:-1],',')
|
|
prop = args[0]
|
|
new_args = ','.join(args[1:])
|
|
create = '.get(%s).create(%s)' % (prop, new_args)
|
|
return beg + create + ''.join(cands[n+1:])
|
|
n+=1
|
|
return a + '.create()'
|
|
|
|
|
|
|
|
|
|
|
|
def js_delete(a):
|
|
#replace last get with delete.
|
|
c = list(bracket_split(a, ['()']))
|
|
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
|
|
if beg[-4:]!='.get':
|
|
raise SyntaxError('Invalid delete operation')
|
|
return beg[:-3]+'delete'+arglist
|
|
|
|
def js_neg(a):
|
|
return '(-'+a+')'
|
|
|
|
def js_pos(a):
|
|
return '(+'+a+')'
|
|
|
|
def js_inv(a):
|
|
return '(~'+a+')'
|
|
|
|
def js_not(a):
|
|
return a+'.neg()'
|
|
|
|
def postfix(a, inc, post):
|
|
bra = list(bracket_split(a, ('()',)))
|
|
meth = bra[-2]
|
|
if not meth.endswith('get'):
|
|
raise SyntaxError('Invalid ++ or -- operation.')
|
|
bra[-2] = bra[-2][:-3] + 'put'
|
|
bra[-1] = '(%s,%s%sJs(1))' % (bra[-1][1:-1], a, '+' if inc else '-')
|
|
res = ''.join(bra)
|
|
return res if not post else '(%s%sJs(1))' % (res, '-' if inc else '+')
|
|
|
|
def js_pre_inc(a):
|
|
return postfix(a, True, False)
|
|
|
|
def js_post_inc(a):
|
|
return postfix(a, True, True)
|
|
|
|
def js_pre_dec(a):
|
|
return postfix(a, False, False)
|
|
|
|
def js_post_dec(a):
|
|
return postfix(a, False, True)
|
|
|
|
|
|
OR = {'||': js_or}
|
|
AND = {'&&': js_and}
|
|
BOR = {'|': js_bor}
|
|
BXOR = {'^': js_bxor}
|
|
BAND = {'&': js_band}
|
|
|
|
EQS = {'===': js_strict_eq,
|
|
'!==': js_strict_neq,
|
|
'==': js_abstract_eq, # we need == and != too. Read a note above method
|
|
'!=': js_abstract_neq}
|
|
|
|
#Since JS does not have chained comparisons we need to implement all cmp methods.
|
|
COMPS = {'<': js_lt,
|
|
'<=': js_le,
|
|
'>=': js_ge,
|
|
'>': js_gt,
|
|
'instanceof': js_instanceof, #todo change to validitate
|
|
'in': js_in}
|
|
|
|
BSHIFTS = {'<<': js_lshift,
|
|
'>>': js_rshift,
|
|
'>>>': js_shit}
|
|
|
|
ADDS = {'+': js_add,
|
|
'-': js_sub}
|
|
|
|
MULTS = {'*': js_mul,
|
|
'/': js_div,
|
|
'%': js_mod}
|
|
|
|
|
|
#Note they dont contain ++ and -- methods because they both have 2 different methods
|
|
# correct method will be found automatically in translate function
|
|
UNARY = {'typeof': js_typeof,
|
|
'void': js_void,
|
|
'new': js_new,
|
|
'delete': js_delete,
|
|
'!': js_not,
|
|
'-': js_neg,
|
|
'+': js_pos,
|
|
'~': js_inv,
|
|
'++': None,
|
|
'--': None
|
|
}
|
|
|
|
|
|
def transform_crap(code): #needs some more tests
|
|
"""Transforms this ?: crap into if else python syntax"""
|
|
ind = code.rfind('?')
|
|
if ind==-1:
|
|
return code
|
|
sep = code.find(':', ind)
|
|
if sep==-1:
|
|
raise SyntaxError('Invalid ?: syntax (probably missing ":" )')
|
|
beg = max(code.rfind(':', 0, ind), code.find('?', 0, ind))+1
|
|
end = code.find(':',sep+1)
|
|
end = len(code) if end==-1 else end
|
|
formula = '('+code[ind+1:sep]+' if '+code[beg:ind]+' else '+code[sep+1:end]+')'
|
|
return transform_crap(code[:beg]+formula+code[end:])
|
|
|
|
|
|
from code import InteractiveConsole
|
|
|
|
#e = InteractiveConsole(globals()).interact()
|
|
import traceback
|
|
def trans(code):
|
|
return NodeVisitor(code.strip()).translate().strip()
|
|
|
|
|
|
#todo finish this trans args
|
|
def trans_args(code):
|
|
new = bracket_replace(code.strip()[1:-1])
|
|
args = ','.join(trans(e) for e in new.split(','))
|
|
return '(%s)'%args
|
|
|
|
EXP = 0
|
|
def exp_translator(code):
|
|
global REPL, EXP
|
|
EXP += 1
|
|
REPL = {}
|
|
#print EXP, code
|
|
code = code.replace('\n', ' ')
|
|
assert '@' not in code
|
|
assert ';' not in code
|
|
assert '#' not in code
|
|
#if not code.strip(): #?
|
|
# return 'var.get("undefined")'
|
|
try:
|
|
return trans(code)
|
|
except:
|
|
#print '\n\ntrans failed on \n\n' + code
|
|
#raw_input('\n\npress enter')
|
|
raise
|
|
|
|
if __name__=='__main__':
|
|
#print 'Here', trans('(eee ) . ii [ PyJsMarker ] [ jkj ] ( j , j ) .
|
|
# jiji (h , ji , i)(non )( )()()()')
|
|
for e in xrange(3):
|
|
print exp_translator('jk = kk.ik++')
|
|
#First line translated with PyJs: PyJsStrictEq(PyJsAdd((Js(100)*Js(50)),Js(30)), Js("5030")), yay!
|
|
print exp_translator('delete a.f')
|
|
|