""" The process of translating JS will go like that: # TOP = 'imports and scope set' 1. Remove all the comments 2. Replace number, string and regexp literals with markers 4. Remove global Functions and move their translation to the TOP. Also add register code there. 5. Replace inline functions with lvals 6. Remove List and Object literals and replace them with lvals 7. Find and remove var declarations, generate python register code that would go on TOP. Here we should be left with global code only where 1 line of js code = 1 line of python code. Routine translating this code should be called glob_translate: 1. Search for outer structures and translate them using glob and inside using exps_translate exps_translate routine: 1. Remove outer {} 2. Split lines at ; 3. Convert line by line using exp_translate 4. In case of error in 3 try to insert ; according to ECMA rules and repeat 3. exp_translate routine: It takes a single line of JS code and returns a SINGLE line of Python code. Note var is not present here because it was removed in previous stages. If case of parsing errors it must return a pos of error. 1. Convert all assignment operations to put operations, this may be hard :( 2. Convert all gets and calls to get and callprop. 3. Convert unary operators like typeof, new, !, delete. Delete can be handled by replacing last get method with delete. 4. Convert remaining operators that are not handled by python eg: === and , lval format PyJsLvalNR marker PyJs(TYPE_NAME)(NR) TODO 1. Number literal replacement 2. Array literal replacement 3. Object literal replacement 5. Function replacement 4. Literal replacement translators """ from __future__ import print_function from utils import * OP_METHODS = { '*': '__mul__', '/': '__div__', '%': '__mod__', '+': '__add__', '-': '__sub__', '<<': '__lshift__', '>>': '__rshift__', '&': '__and__', '^': '__xor__', '|': '__or__' } def dbg(source): try: with open(r'C:\Users\Piotrek\Desktop\dbg.py', 'w') as f: f.write(source) except: pass def indent(lines, ind=4): return ind * ' ' + lines.replace('\n', '\n' + ind * ' ').rstrip(' ') def inject_before_lval(source, lval, code): if source.count(lval) > 1: dbg(source) print() print(lval) raise RuntimeError('To many lvals (%s)' % lval) elif not source.count(lval): dbg(source) print() print(lval) assert lval not in source raise RuntimeError('No lval found "%s"' % lval) end = source.index(lval) inj = source.rfind('\n', 0, end) ind = inj while source[ind + 1] == ' ': ind += 1 ind -= inj return source[:inj + 1] + indent(code, ind) + source[inj + 1:] def bracket_split(source, brackets=('()', '{}', '[]'), strip=False): """DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True)""" starts = [e[0] for e in brackets] in_bracket = 0 n = 0 last = 0 while n < len(source): e = source[n] if not in_bracket and e in starts: in_bracket = 1 start = n b_start, b_end = brackets[starts.index(e)] elif in_bracket: if e == b_start: in_bracket += 1 elif e == b_end: in_bracket -= 1 if not in_bracket: if source[last:start]: yield source[last:start] last = n + 1 yield source[start + strip:n + 1 - strip] n += 1 if source[last:]: yield source[last:] def pass_bracket(source, start, bracket='()'): """Returns content of brackets with brackets and first pos after brackets if source[start] is followed by some optional white space and brackets. Otherwise None""" e = bracket_split(source[start:], [bracket], False) try: cand = e.next() except StopIteration: return None, None if not cand.strip(): #white space... try: res = e.next() return res, start + len(cand) + len(res) except StopIteration: return None, None elif cand[-1] == bracket[1]: return cand, start + len(cand) else: return None, None def startswith_keyword(start, keyword): start = start.lstrip() if start.startswith(keyword): if len(keyword) < len(start): if start[len(keyword)] in IDENTIFIER_PART: return False return True return False def endswith_keyword(ending, keyword): ending = ending.rstrip() if ending.endswith(keyword): if len(keyword) < len(ending): if ending[len(ending) - len(keyword) - 1] in IDENTIFIER_PART: return False return True return False def pass_white(source, start): n = start while n < len(source): if source[n] in SPACE: n += 1 else: break return n def except_token(source, start, token, throw=True): """Token can be only a single char. Returns position after token if found. Otherwise raises syntax error if throw otherwise returns None""" start = pass_white(source, start) if start < len(source) and source[start] == token: return start + 1 if throw: raise SyntaxError('Missing token. Expected %s' % token) return None def except_keyword(source, start, keyword): """ Returns position after keyword if found else None Note: skips white space""" start = pass_white(source, start) kl = len(keyword) #keyword len if kl + start > len(source): return None if source[start:start + kl] != keyword: return None if kl + start < len(source) and source[start + kl] in IDENTIFIER_PART: return None return start + kl def parse_identifier(source, start, throw=True): """passes white space from start and returns first identifier, if identifier invalid and throw raises SyntaxError otherwise returns None""" start = pass_white(source, start) end = start if not end < len(source): if throw: raise SyntaxError('Missing identifier!') return None if source[end] not in IDENTIFIER_START: if throw: raise SyntaxError('Invalid identifier start: "%s"' % source[end]) return None end += 1 while end < len(source) and source[end] in IDENTIFIER_PART: end += 1 if not is_valid_lval(source[start:end]): if throw: raise SyntaxError( 'Invalid identifier name: "%s"' % source[start:end]) return None return source[start:end], end def argsplit(args, sep=','): """used to split JS args (it is not that simple as it seems because sep can be inside brackets). pass args *without* brackets! Used also to parse array and object elements, and more""" parsed_len = 0 last = 0 splits = [] for e in bracket_split(args, brackets=['()', '[]', '{}']): if e[0] not in {'(', '[', '{'}: for i, char in enumerate(e): if char == sep: splits.append(args[last:parsed_len + i]) last = parsed_len + i + 1 parsed_len += len(e) splits.append(args[last:]) return splits def split_add_ops(text): """Specialized function splitting text at add/sub operators. Operands are *not* translated. Example result ['op1', '+', 'op2', '-', 'op3']""" n = 0 text = text.replace('++', '##').replace( '--', '@@') #text does not normally contain any of these spotted = False # set to true if noticed anything other than +- or white space last = 0 while n < len(text): e = text[n] if e == '+' or e == '-': if spotted: yield text[last:n].replace('##', '++').replace('@@', '--') yield e last = n + 1 spotted = False elif e == '/' or e == '*' or e == '%': spotted = False elif e != ' ': spotted = True n += 1 yield text[last:n].replace('##', '++').replace('@@', '--') def split_at_any(text, lis, translate=False, not_before=[], not_after=[], validitate=None): """ doc """ lis.sort(key=lambda x: len(x), reverse=True) last = 0 n = 0 text_len = len(text) while n < text_len: if any(text[:n].endswith(e) for e in not_before): #Cant end with end before n += 1 continue for e in lis: s = len(e) if s + n > text_len: continue if validitate and not validitate(e, text[:n], text[n + s:]): continue if any(text[n + s:].startswith(e) for e in not_after): #Cant end with end before n += 1 break if e == text[n:n + s]: yield text[last:n] if not translate else translate( text[last:n]) yield e n += s last = n break else: n += 1 yield text[last:n] if not translate else translate(text[last:n]) def split_at_single(text, sep, not_before=[], not_after=[]): """Works like text.split(sep) but separated fragments cant end with not_before or start with not_after""" n = 0 lt, s = len(text), len(sep) last = 0 while n < lt: if not s + n > lt: if sep == text[n:n + s]: if any(text[last:n].endswith(e) for e in not_before): pass elif any(text[n + s:].startswith(e) for e in not_after): pass else: yield text[last:n] last = n + s n += s - 1 n += 1 yield text[last:]