mylar/lib/js2py/legecy_translators/jsparser.py

"""
The process of translating JS will go like that:        # TOP = 'imports and scope set'

1. Remove all the comments
2. Replace number, string and regexp literals with markers
4. Remove global Functions and move their translation to the TOP. Also add register code there.
5. Replace inline functions with lvals
6. Remove List and Object literals and replace them with lvals
7. Find and remove var declarations, generate python register code that would go on TOP.

Here we should be left with global code only where 1 line of js code = 1 line of python code.
Routine translating this code should be called glob_translate:
1. Search for outer structures and translate them using glob and inside using exps_translate


exps_translate routine:
1. Remove outer {}
2. Split lines at ;
3. Convert line by line using exp_translate
4. In case of error in 3 try to insert ; according to ECMA rules and repeat 3.

exp_translate routine:
It takes a single line of JS code and returns a SINGLE line of Python code.
Note var is not present here because it was removed in previous stages.
If case of parsing errors it must return a pos of error.
1. Convert all assignment operations to put operations, this may be hard :(
2. Convert all gets and calls to get and callprop.
3. Convert unary operators like typeof, new, !, delete.
   Delete can be handled by replacing last get method with delete.
4. Convert remaining operators that are not handled by python eg: === and ,


lval format PyJsLvalNR
marker PyJs(TYPE_NAME)(NR)

TODO
1. Number literal replacement
2. Array literal replacement
3. Object literal replacement
5. Function replacement
4. Literal replacement translators


"""

from utils import *

OP_METHODS = {'*': '__mul__',
              '/': '__div__',
              '%': '__mod__',
              '+': '__add__',
              '-': '__sub__',
              '<<': '__lshift__',
              '>>': '__rshift__',
              '&': '__and__',
              '^': '__xor__',
              '|': '__or__'}

def dbg(source):
    try:
        with open('C:\Users\Piotrek\Desktop\dbg.py','w') as f:
            f.write(source)
    except:
        pass


def indent(lines, ind=4):
    return ind*' '+lines.replace('\n', '\n'+ind*' ').rstrip(' ')


def inject_before_lval(source, lval, code):
    if source.count(lval)>1:
        dbg(source)
        print
        print lval
        raise RuntimeError('To many lvals (%s)' % lval)
    elif not source.count(lval):
        dbg(source)
        print
        print lval
        assert lval not in source
        raise RuntimeError('No lval found "%s"' % lval)
    end = source.index(lval)
    inj = source.rfind('\n', 0, end)
    ind = inj
    while source[ind+1]==' ':
        ind+=1
    ind -= inj
    return source[:inj+1]+ indent(code, ind) + source[inj+1:]


def bracket_split(source, brackets=('()','{}','[]'), strip=False):
    """DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True)"""
    starts = [e[0] for e in brackets]
    in_bracket = 0
    n = 0
    last = 0
    while n<len(source):
        e = source[n]
        if not in_bracket and e in starts:
            in_bracket = 1
            start = n
            b_start, b_end = brackets[starts.index(e)]
        elif in_bracket:
            if e==b_start:
                in_bracket += 1
            elif e==b_end:
                in_bracket -= 1
                if not in_bracket:
                    if source[last:start]:
                        yield source[last:start]
                    last = n+1
                    yield source[start+strip:n+1-strip]
        n+=1
    if source[last:]:
        yield source[last:]

def pass_bracket(source, start, bracket='()'):
    """Returns content of brackets with brackets and first pos after brackets
     if source[start] is followed by some optional white space and brackets.
     Otherwise None"""
    e = bracket_split(source[start:],[bracket], False)
    try:
        cand = e.next()
    except StopIteration:
        return None, None
    if not cand.strip(): #white space...
        try:
            res = e.next()
            return res, start + len(cand) + len(res)
        except StopIteration:
            return None, None
    elif cand[-1] == bracket[1]:
        return cand, start + len(cand)
    else:
        return None, None


def startswith_keyword(start, keyword):
    start = start.lstrip()
    if start.startswith(keyword):
        if len(keyword)<len(start):
            if start[len(keyword)] in IDENTIFIER_PART:
                return False
        return True
    return False

def endswith_keyword(ending, keyword):
    ending = ending.rstrip()
    if ending.endswith(keyword):
        if len(keyword)<len(ending):
            if ending[len(ending)-len(keyword)-1] in IDENTIFIER_PART:
                return False
        return True
    return False


def pass_white(source, start):
    n = start
    while n<len(source):
        if source[n] in SPACE:
            n += 1
        else:
            break
    return n

def except_token(source, start, token, throw=True):
    """Token can be only a single char. Returns position after token if found. Otherwise raises syntax error if throw
    otherwise returns None"""
    start = pass_white(source, start)
    if start<len(source) and source[start]==token:
        return start+1
    if throw:
        raise SyntaxError('Missing token. Expected %s'%token)
    return None

def except_keyword(source, start, keyword):
    """ Returns position after keyword if found else None
        Note: skips white space"""
    start = pass_white(source, start)
    kl = len(keyword)  #keyword len
    if kl+start > len(source):
        return None
    if source[start:start+kl] != keyword:
        return None
    if kl+start<len(source) and source[start+kl] in IDENTIFIER_PART:
        return None
    return start + kl


def parse_identifier(source, start, throw=True):
    """passes white space from start and returns first identifier,
       if identifier invalid and throw raises SyntaxError otherwise returns None"""
    start = pass_white(source, start)
    end = start
    if not end<len(source):
        if throw:
            raise SyntaxError('Missing identifier!')
        return None
    if source[end] not in IDENTIFIER_START:
        if throw:
            raise SyntaxError('Invalid identifier start: "%s"'%source[end])
        return None
    end += 1
    while end < len(source) and source[end] in IDENTIFIER_PART:
        end += 1
    if not is_valid_lval(source[start:end]):
        if throw:
            raise SyntaxError('Invalid identifier name: "%s"'%source[start:end])
        return None
    return source[start:end], end


def argsplit(args, sep=','):
    """used to split JS args (it is not that simple as it seems because
       sep can be inside brackets).

       pass args *without* brackets!

       Used also to parse array and object elements, and more"""
    parsed_len  = 0
    last = 0
    splits = []
    for e in bracket_split(args, brackets=['()', '[]', '{}']):
        if e[0] not in {'(', '[', '{'}:
            for i, char in enumerate(e):
                if char==sep:
                    splits.append(args[last:parsed_len+i])
                    last = parsed_len + i + 1
        parsed_len += len(e)
    splits.append(args[last:])
    return splits

def split_add_ops(text):
    """Specialized function splitting text at add/sub operators.
    Operands are *not* translated. Example result ['op1', '+', 'op2', '-', 'op3']"""
    n = 0
    text = text.replace('++', '##').replace('--', '@@') #text does not normally contain any of these
    spotted = False # set to true if noticed anything other than +- or white space
    last = 0
    while n<len(text):
        e = text[n]
        if e=='+' or e=='-':
            if spotted:
                yield text[last:n].replace('##', '++').replace('@@', '--')
                yield e
                last = n+1
                spotted = False
        elif e=='/' or e=='*' or e=='%':
            spotted = False
        elif e!=' ':
            spotted = True
        n+=1
    yield text[last:n].replace('##', '++').replace('@@', '--')


def split_at_any(text, lis, translate=False, not_before=[], not_after=[], validitate=None):
    """ doc """
    lis.sort(key=lambda x: len(x), reverse=True)
    last = 0
    n = 0
    text_len = len(text)
    while n<text_len:
        if any(text[:n].endswith(e) for e in not_before):  #Cant end with end before
            n+=1
            continue
        for e in lis:
            s = len(e)
            if s+n>text_len:
                continue
            if validitate and not validitate(e, text[:n],  text[n+s:]):
                continue
            if any(text[n+s:].startswith(e) for e in not_after):  #Cant end with end before
                n+=1
                break
            if e==text[n:n+s]:
                yield text[last:n] if not translate else translate(text[last:n])
                yield e
                n+=s
                last = n
                break
        else:
            n+=1
    yield text[last:n] if not translate else translate(text[last:n])

def split_at_single(text, sep, not_before=[], not_after=[]):
    """Works like text.split(sep) but separated fragments
    cant end with not_before or start with not_after"""
    n = 0
    lt, s= len(text), len(sep)
    last = 0
    while n<lt:
        if not s+n>lt:
            if sep==text[n:n+s]:
                if any(text[last:n].endswith(e) for e in not_before):
                    pass
                elif any(text[n+s:].startswith(e) for e in not_after):
                    pass
                else:
                    yield text[last:n]
                    last = n+s
                    n += s-1
        n+=1
    yield text[last:]