mirror of https://github.com/evilhero/mylar
298 lines
11 KiB
Python
298 lines
11 KiB
Python
|
# The MIT License
|
||
|
#
|
||
|
# Copyright 2014, 2015 Piotr Dabkowski
|
||
|
#
|
||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||
|
# a copy of this software and associated documentation files (the 'Software'),
|
||
|
# to deal in the Software without restriction, including without limitation the rights
|
||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||
|
# the Software, and to permit persons to whom the Software is furnished to do so, subject
|
||
|
# to the following conditions:
|
||
|
#
|
||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||
|
# substantial portions of the Software.
|
||
|
#
|
||
|
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
||
|
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||
|
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
||
|
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
import sys
|
||
|
import unicodedata
|
||
|
import six
|
||
|
from collections import defaultdict
|
||
|
|
||
|
if six.PY3:
|
||
|
unichr = chr
|
||
|
xrange = range
|
||
|
unicode = str
|
||
|
|
||
|
token = {
|
||
|
'BooleanLiteral': 1,
|
||
|
'EOF': 2,
|
||
|
'Identifier': 3,
|
||
|
'Keyword': 4,
|
||
|
'NullLiteral': 5,
|
||
|
'NumericLiteral': 6,
|
||
|
'Punctuator': 7,
|
||
|
'StringLiteral': 8,
|
||
|
'RegularExpression': 9,
|
||
|
'Template': 10
|
||
|
}
|
||
|
|
||
|
|
||
|
TokenName = {v:k for k,v in token.items()}
|
||
|
|
||
|
FnExprTokens = ['(', '{', '[', 'in', 'typeof', 'instanceof', 'new',
|
||
|
'return', 'case', 'delete', 'throw', 'void',
|
||
|
# assignment operators
|
||
|
'=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '>>>=',
|
||
|
'&=', '|=', '^=', ',',
|
||
|
# binary/unary operators
|
||
|
'+', '-', '*', '/', '%', '++', '--', '<<', '>>', '>>>', '&',
|
||
|
'|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=',
|
||
|
'<=', '<', '>', '!=', '!==']
|
||
|
|
||
|
syntax= {'AssignmentExpression',
|
||
|
'AssignmentPattern',
|
||
|
'ArrayExpression',
|
||
|
'ArrayPattern',
|
||
|
'ArrowFunctionExpression',
|
||
|
'BlockStatement',
|
||
|
'BinaryExpression',
|
||
|
'BreakStatement',
|
||
|
'CallExpression',
|
||
|
'CatchClause',
|
||
|
'ClassBody',
|
||
|
'ClassDeclaration',
|
||
|
'ClassExpression',
|
||
|
'ConditionalExpression',
|
||
|
'ContinueStatement',
|
||
|
'DoWhileStatement',
|
||
|
'DebuggerStatement',
|
||
|
'EmptyStatement',
|
||
|
'ExportAllDeclaration',
|
||
|
'ExportDefaultDeclaration',
|
||
|
'ExportNamedDeclaration',
|
||
|
'ExportSpecifier',
|
||
|
'ExpressionStatement',
|
||
|
'ForStatement',
|
||
|
'ForInStatement',
|
||
|
'FunctionDeclaration',
|
||
|
'FunctionExpression',
|
||
|
'Identifier',
|
||
|
'IfStatement',
|
||
|
'ImportDeclaration',
|
||
|
'ImportDefaultSpecifier',
|
||
|
'ImportNamespaceSpecifier',
|
||
|
'ImportSpecifier',
|
||
|
'Literal',
|
||
|
'LabeledStatement',
|
||
|
'LogicalExpression',
|
||
|
'MemberExpression',
|
||
|
'MethodDefinition',
|
||
|
'NewExpression',
|
||
|
'ObjectExpression',
|
||
|
'ObjectPattern',
|
||
|
'Program',
|
||
|
'Property',
|
||
|
'RestElement',
|
||
|
'ReturnStatement',
|
||
|
'SequenceExpression',
|
||
|
'SpreadElement',
|
||
|
'Super',
|
||
|
'SwitchCase',
|
||
|
'SwitchStatement',
|
||
|
'TaggedTemplateExpression',
|
||
|
'TemplateElement',
|
||
|
'TemplateLiteral',
|
||
|
'ThisExpression',
|
||
|
'ThrowStatement',
|
||
|
'TryStatement',
|
||
|
'UnaryExpression',
|
||
|
'UpdateExpression',
|
||
|
'VariableDeclaration',
|
||
|
'VariableDeclarator',
|
||
|
'WhileStatement',
|
||
|
'WithStatement'}
|
||
|
|
||
|
|
||
|
# Error messages should be identical to V8.
|
||
|
messages = {
|
||
|
'UnexpectedToken': 'Unexpected token %s',
|
||
|
'UnexpectedNumber': 'Unexpected number',
|
||
|
'UnexpectedString': 'Unexpected string',
|
||
|
'UnexpectedIdentifier': 'Unexpected identifier',
|
||
|
'UnexpectedReserved': 'Unexpected reserved word',
|
||
|
'UnexpectedTemplate': 'Unexpected quasi %s',
|
||
|
'UnexpectedEOS': 'Unexpected end of input',
|
||
|
'NewlineAfterThrow': 'Illegal newline after throw',
|
||
|
'InvalidRegExp': 'Invalid regular expression',
|
||
|
'UnterminatedRegExp': 'Invalid regular expression: missing /',
|
||
|
'InvalidLHSInAssignment': 'Invalid left-hand side in assignment',
|
||
|
'InvalidLHSInForIn': 'Invalid left-hand side in for-in',
|
||
|
'MultipleDefaultsInSwitch': 'More than one default clause in switch statement',
|
||
|
'NoCatchOrFinally': 'Missing catch or finally after try',
|
||
|
'UnknownLabel': 'Undefined label \'%s\'',
|
||
|
'Redeclaration': '%s \'%s\' has already been declared',
|
||
|
'IllegalContinue': 'Illegal continue statement',
|
||
|
'IllegalBreak': 'Illegal break statement',
|
||
|
'IllegalReturn': 'Illegal return statement',
|
||
|
'StrictModeWith': 'Strict mode code may not include a with statement',
|
||
|
'StrictCatchVariable': 'Catch variable may not be eval or arguments in strict mode',
|
||
|
'StrictVarName': 'Variable name may not be eval or arguments in strict mode',
|
||
|
'StrictParamName': 'Parameter name eval or arguments is not allowed in strict mode',
|
||
|
'StrictParamDupe': 'Strict mode function may not have duplicate parameter names',
|
||
|
'StrictFunctionName': 'Function name may not be eval or arguments in strict mode',
|
||
|
'StrictOctalLiteral': 'Octal literals are not allowed in strict mode.',
|
||
|
'StrictDelete': 'Delete of an unqualified identifier in strict mode.',
|
||
|
'StrictLHSAssignment': 'Assignment to eval or arguments is not allowed in strict mode',
|
||
|
'StrictLHSPostfix': 'Postfix increment/decrement may not have eval or arguments operand in strict mode',
|
||
|
'StrictLHSPrefix': 'Prefix increment/decrement may not have eval or arguments operand in strict mode',
|
||
|
'StrictReservedWord': 'Use of future reserved word in strict mode',
|
||
|
'TemplateOctalLiteral': 'Octal literals are not allowed in template strings.',
|
||
|
'ParameterAfterRestParameter': 'Rest parameter must be last formal parameter',
|
||
|
'DefaultRestParameter': 'Unexpected token =',
|
||
|
'ObjectPatternAsRestParameter': 'Unexpected token {',
|
||
|
'DuplicateProtoProperty': 'Duplicate __proto__ fields are not allowed in object literals',
|
||
|
'ConstructorSpecialMethod': 'Class constructor may not be an accessor',
|
||
|
'DuplicateConstructor': 'A class may only have one constructor',
|
||
|
'StaticPrototype': 'Classes may not have static property named prototype',
|
||
|
'MissingFromClause': 'Unexpected token',
|
||
|
'NoAsAfterImportNamespace': 'Unexpected token',
|
||
|
'InvalidModuleSpecifier': 'Unexpected token',
|
||
|
'IllegalImportDeclaration': 'Unexpected token',
|
||
|
'IllegalExportDeclaration': 'Unexpected token'}
|
||
|
|
||
|
PRECEDENCE = {'||':1,
|
||
|
'&&':2,
|
||
|
'|':3,
|
||
|
'^':4,
|
||
|
'&':5,
|
||
|
'==':6,
|
||
|
'!=':6,
|
||
|
'===':6,
|
||
|
'!==':6,
|
||
|
'<':7,
|
||
|
'>':7,
|
||
|
'<=':7,
|
||
|
'>=':7,
|
||
|
'instanceof':7,
|
||
|
'in':7,
|
||
|
'<<':8,
|
||
|
'>>':8,
|
||
|
'>>>':8,
|
||
|
'+':9,
|
||
|
'-':9,
|
||
|
'*':11,
|
||
|
'/':11,
|
||
|
'%':11}
|
||
|
|
||
|
class Token: pass
|
||
|
class Syntax: pass
|
||
|
class Messages: pass
|
||
|
class PlaceHolders:
|
||
|
ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'
|
||
|
|
||
|
for k,v in token.items():
|
||
|
setattr(Token, k, v)
|
||
|
|
||
|
for e in syntax:
|
||
|
setattr(Syntax, e, e)
|
||
|
|
||
|
for k,v in messages.items():
|
||
|
setattr(Messages, k, v)
|
||
|
|
||
|
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
|
||
|
BOM = u'\uFEFF'
|
||
|
ZWJ = u'\u200D'
|
||
|
ZWNJ = u'\u200C'
|
||
|
TAB = u'\u0009'
|
||
|
VT = u'\u000B'
|
||
|
FF = u'\u000C'
|
||
|
SP = u'\u0020'
|
||
|
NBSP = u'\u00A0'
|
||
|
LF = u'\u000A'
|
||
|
CR = u'\u000D'
|
||
|
LS = u'\u2028'
|
||
|
PS = u'\u2029'
|
||
|
|
||
|
U_CATEGORIES = defaultdict(list)
|
||
|
for c in map(unichr, range(sys.maxunicode + 1)):
|
||
|
U_CATEGORIES[unicodedata.category(c)].append(c)
|
||
|
UNICODE_LETTER = set(U_CATEGORIES['Lu']+U_CATEGORIES['Ll']+
|
||
|
U_CATEGORIES['Lt']+U_CATEGORIES['Lm']+
|
||
|
U_CATEGORIES['Lo']+U_CATEGORIES['Nl'])
|
||
|
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn']+U_CATEGORIES['Mc'])
|
||
|
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
|
||
|
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
|
||
|
IDENTIFIER_START = UNICODE_LETTER.union({'$','_', '\\'}) # and some fucking unicode escape sequence
|
||
|
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT).union(UNICODE_CONNECTOR_PUNCTUATION).union({ZWJ, ZWNJ})
|
||
|
|
||
|
WHITE_SPACE = {0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680,
|
||
|
0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
|
||
|
0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
|
||
|
0x2009, 0x200A, 0x202F, 0x205F, 0x3000,
|
||
|
0xFEFF}
|
||
|
|
||
|
LINE_TERMINATORS = {0x0A, 0x0D, 0x2028, 0x2029}
|
||
|
|
||
|
def isIdentifierStart(ch):
|
||
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_START
|
||
|
|
||
|
def isIdentifierPart(ch):
|
||
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_PART
|
||
|
|
||
|
def isWhiteSpace(ch):
|
||
|
return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE
|
||
|
|
||
|
def isLineTerminator(ch):
|
||
|
return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS
|
||
|
|
||
|
OCTAL = {'0', '1', '2', '3', '4', '5', '6', '7'}
|
||
|
DEC = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
|
||
|
HEX = set('0123456789abcdefABCDEF')
|
||
|
HEX_CONV = {'0123456789abcdef'[n]:n for n in xrange(16)}
|
||
|
for i,e in enumerate('ABCDEF', 10):
|
||
|
HEX_CONV[e] = i
|
||
|
|
||
|
|
||
|
def isDecimalDigit(ch):
|
||
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC
|
||
|
|
||
|
def isHexDigit(ch):
|
||
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX
|
||
|
|
||
|
def isOctalDigit(ch):
|
||
|
return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL
|
||
|
|
||
|
def isFutureReservedWord(w):
|
||
|
return w in { 'enum', 'export', 'import', 'super'}
|
||
|
|
||
|
def isStrictModeReservedWord(w):
|
||
|
return w in {'implements', 'interface', 'package', 'private', 'protected', 'public', 'static', 'yield', 'let'}
|
||
|
|
||
|
def isRestrictedWord(w):
|
||
|
return w in {'eval', 'arguments'}
|
||
|
|
||
|
def isKeyword(w):
|
||
|
# 'const' is specialized as Keyword in V8.
|
||
|
# 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
|
||
|
# Some others are from future reserved words.
|
||
|
return w in {'if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else', 'case',
|
||
|
'void', 'with', 'enum', 'while', 'break', 'catch', 'throw', 'const', 'yield',
|
||
|
'class', 'super', 'return', 'typeof', 'delete', 'switch', 'export', 'import',
|
||
|
'default', 'finally', 'extends', 'function', 'continue', 'debugger', 'instanceof', 'pyimport'}
|
||
|
|
||
|
|
||
|
class JsSyntaxError(Exception): pass
|
||
|
|
||
|
if __name__=='__main__':
|
||
|
assert isLineTerminator('\n')
|
||
|
assert isLineTerminator(0x0A)
|
||
|
assert isIdentifierStart('$')
|
||
|
assert isIdentifierStart(100)
|
||
|
assert isWhiteSpace(' ')
|