bazarr/libs/pyjsparser/pyjsparserdata.py

408 lines
12 KiB
Python

# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
from __future__ import unicode_literals
import sys
import unicodedata
from collections import defaultdict
PY3 = sys.version_info >= (3, 0)
if PY3:
unichr = chr
xrange = range
unicode = str
token = {
'BooleanLiteral': 1,
'EOF': 2,
'Identifier': 3,
'Keyword': 4,
'NullLiteral': 5,
'NumericLiteral': 6,
'Punctuator': 7,
'StringLiteral': 8,
'RegularExpression': 9,
'Template': 10
}
TokenName = dict((v, k) for k, v in token.items())
FnExprTokens = [
'(',
'{',
'[',
'in',
'typeof',
'instanceof',
'new',
'return',
'case',
'delete',
'throw',
'void',
# assignment operators
'=',
'+=',
'-=',
'*=',
'/=',
'%=',
'<<=',
'>>=',
'>>>=',
'&=',
'|=',
'^=',
',',
# binary/unary operators
'+',
'-',
'*',
'/',
'%',
'++',
'--',
'<<',
'>>',
'>>>',
'&',
'|',
'^',
'!',
'~',
'&&',
'||',
'?',
':',
'===',
'==',
'>=',
'<=',
'<',
'>',
'!=',
'!=='
]
syntax = set(
('AssignmentExpression', 'AssignmentPattern', 'ArrayExpression',
'ArrayPattern', 'ArrowFunctionExpression', 'BlockStatement',
'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause',
'ClassBody', 'ClassDeclaration', 'ClassExpression',
'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement',
'DebuggerStatement', 'EmptyStatement', 'ExportAllDeclaration',
'ExportDefaultDeclaration', 'ExportNamedDeclaration', 'ExportSpecifier',
'ExpressionStatement', 'ForStatement', 'ForInStatement',
'FunctionDeclaration', 'FunctionExpression', 'Identifier', 'IfStatement',
'ImportDeclaration', 'ImportDefaultSpecifier', 'ImportNamespaceSpecifier',
'ImportSpecifier', 'Literal', 'LabeledStatement', 'LogicalExpression',
'MemberExpression', 'MethodDefinition', 'NewExpression',
'ObjectExpression', 'ObjectPattern', 'Program', 'Property', 'RestElement',
'ReturnStatement', 'SequenceExpression', 'SpreadElement', 'Super',
'SwitchCase', 'SwitchStatement', 'TaggedTemplateExpression',
'TemplateElement', 'TemplateLiteral', 'ThisExpression', 'ThrowStatement',
'TryStatement', 'UnaryExpression', 'UpdateExpression',
'VariableDeclaration', 'VariableDeclarator', 'WhileStatement',
'WithStatement'))
supported_syntax = set(
('AssignmentExpression', 'ArrayExpression', 'BlockStatement',
'BinaryExpression', 'BreakStatement', 'CallExpression', 'CatchClause',
'ConditionalExpression', 'ContinueStatement', 'DoWhileStatement',
'DebuggerStatement', 'EmptyStatement', 'ExpressionStatement',
'ForStatement', 'ForInStatement', 'FunctionDeclaration',
'FunctionExpression', 'Identifier', 'IfStatement', 'Literal',
'LabeledStatement', 'LogicalExpression', 'MemberExpression',
'MethodDefinition', 'NewExpression', 'ObjectExpression', 'Program',
'Property', 'ReturnStatement', 'SequenceExpression', 'SwitchCase',
'SwitchStatement', 'ThisExpression', 'ThrowStatement', 'TryStatement',
'UnaryExpression', 'UpdateExpression', 'VariableDeclaration',
'VariableDeclarator', 'WhileStatement', 'WithStatement'))
# Error messages should be identical to V8.
messages = {
'UnexpectedToken':
'Unexpected token %s',
'UnexpectedNumber':
'Unexpected number',
'UnexpectedString':
'Unexpected string',
'UnexpectedIdentifier':
'Unexpected identifier',
'UnexpectedReserved':
'Unexpected reserved word',
'UnexpectedTemplate':
'Unexpected quasi %s',
'UnexpectedEOS':
'Unexpected end of input',
'NewlineAfterThrow':
'Illegal newline after throw',
'InvalidRegExp':
'Invalid regular expression',
'UnterminatedRegExp':
'Invalid regular expression: missing /',
'InvalidLHSInAssignment':
'Invalid left-hand side in assignment',
'InvalidLHSInForIn':
'Invalid left-hand side in for-in',
'MultipleDefaultsInSwitch':
'More than one default clause in switch statement',
'NoCatchOrFinally':
'Missing catch or finally after try',
'UnknownLabel':
'Undefined label \'%s\'',
'Redeclaration':
'%s \'%s\' has already been declared',
'IllegalContinue':
'Illegal continue statement',
'IllegalBreak':
'Illegal break statement',
'IllegalReturn':
'Illegal return statement',
'StrictModeWith':
'Strict mode code may not include a with statement',
'StrictCatchVariable':
'Catch variable may not be eval or arguments in strict mode',
'StrictVarName':
'Variable name may not be eval or arguments in strict mode',
'StrictParamName':
'Parameter name eval or arguments is not allowed in strict mode',
'StrictParamDupe':
'Strict mode function may not have duplicate parameter names',
'StrictFunctionName':
'Function name may not be eval or arguments in strict mode',
'StrictOctalLiteral':
'Octal literals are not allowed in strict mode.',
'StrictDelete':
'Delete of an unqualified identifier in strict mode.',
'StrictLHSAssignment':
'Assignment to eval or arguments is not allowed in strict mode',
'StrictLHSPostfix':
'Postfix increment/decrement may not have eval or arguments operand in strict mode',
'StrictLHSPrefix':
'Prefix increment/decrement may not have eval or arguments operand in strict mode',
'StrictReservedWord':
'Use of future reserved word in strict mode',
'TemplateOctalLiteral':
'Octal literals are not allowed in template strings.',
'ParameterAfterRestParameter':
'Rest parameter must be last formal parameter',
'DefaultRestParameter':
'Unexpected token =',
'ObjectPatternAsRestParameter':
'Unexpected token {',
'DuplicateProtoProperty':
'Duplicate __proto__ fields are not allowed in object literals',
'ConstructorSpecialMethod':
'Class constructor may not be an accessor',
'DuplicateConstructor':
'A class may only have one constructor',
'StaticPrototype':
'Classes may not have static property named prototype',
'MissingFromClause':
'Unexpected token',
'NoAsAfterImportNamespace':
'Unexpected token',
'InvalidModuleSpecifier':
'Unexpected token',
'IllegalImportDeclaration':
'Unexpected token',
'IllegalExportDeclaration':
'Unexpected token'
}
PRECEDENCE = {
'||': 1,
'&&': 2,
'|': 3,
'^': 4,
'&': 5,
'==': 6,
'!=': 6,
'===': 6,
'!==': 6,
'<': 7,
'>': 7,
'<=': 7,
'>=': 7,
'instanceof': 7,
'in': 7,
'<<': 8,
'>>': 8,
'>>>': 8,
'+': 9,
'-': 9,
'*': 11,
'/': 11,
'%': 11
}
class Token:
pass
class Syntax:
pass
class Messages:
pass
class PlaceHolders:
ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'
for k, v in token.items():
setattr(Token, k, v)
for e in syntax:
setattr(Syntax, e, e)
for k, v in messages.items():
setattr(Messages, k, v)
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
BOM = u'\uFEFF'
ZWJ = u'\u200D'
ZWNJ = u'\u200C'
TAB = u'\u0009'
VT = u'\u000B'
FF = u'\u000C'
SP = u'\u0020'
NBSP = u'\u00A0'
LF = u'\u000A'
CR = u'\u000D'
LS = u'\u2028'
PS = u'\u2029'
LETTER_CATEGORIES = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'])
COMBINING_MARK_CATEGORIES = set(['Mn', 'Mc'])
DIGIT_CATEGORIES = set(['Nd'])
CONNECTOR_PUNCTUATION_CATEGORIES = set(['Pc'])
IDENTIFIER_START_CATEGORIES = LETTER_CATEGORIES.copy() # and some fucking unicode escape sequence
IDENTIFIER_PART_CATEGORIES = IDENTIFIER_START_CATEGORIES.union(COMBINING_MARK_CATEGORIES).union(DIGIT_CATEGORIES)\
.union(CONNECTOR_PUNCTUATION_CATEGORIES)
EXTRA_IDENTIFIER_START_CHARS = set(('$','_', '\\'))
EXTRA_IDENTIFIER_PART_CHARS = EXTRA_IDENTIFIER_START_CHARS.union(set((ZWJ, ZWNJ)))
WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680, 0x180E, 0x2000,
0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007,
0x2008, 0x2009, 0x200A, 0x202F, 0x205F, 0x3000, 0xFEFF))
LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029))
def isIdentifierStart(ch):
uch = (ch if isinstance(ch, unicode) else unichr(ch))
return unicodedata.category(uch) in IDENTIFIER_START_CATEGORIES or uch in EXTRA_IDENTIFIER_START_CHARS
def isIdentifierPart(ch):
uch = (ch if isinstance(ch, unicode) else unichr(ch))
return unicodedata.category(uch) in IDENTIFIER_PART_CATEGORIES or uch in EXTRA_IDENTIFIER_PART_CHARS
def isValidIdentifier(name):
if not name or isKeyword(name):
return False
check = isIdentifierStart
for e in name:
if not check(e):
return False
check = isIdentifierPart
return True
def isWhiteSpace(ch):
return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE
def isLineTerminator(ch):
return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS
OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7'))
DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
HEX = set('0123456789abcdefABCDEF')
HEX_CONV = dict(('0123456789abcdef' [n], n) for n in xrange(16))
for i, e in enumerate('ABCDEF', 10):
HEX_CONV[e] = i
def isDecimalDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC
def isHexDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX
def isOctalDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL
def isFutureReservedWord(w):
return w in ('enum', 'export', 'import', 'super')
RESERVED_WORD = set(('implements', 'interface', 'package', 'private',
'protected', 'public', 'static', 'yield', 'let'))
def isStrictModeReservedWord(w):
return w in RESERVED_WORD
def isRestrictedWord(w):
return w in ('eval', 'arguments')
KEYWORDS = set(
('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else',
'case', 'void', 'with', 'enum', 'while', 'break', 'catch', 'throw',
'const', 'yield', 'class', 'super', 'return', 'typeof', 'delete',
'switch', 'export', 'import', 'default', 'finally', 'extends', 'function',
'continue', 'debugger', 'instanceof', 'pyimport'))
def isKeyword(w):
# 'const' is specialized as Keyword in V8.
# 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
# Some others are from future reserved words.
return w in KEYWORDS
class JsSyntaxError(Exception):
pass
if __name__ == '__main__':
assert isLineTerminator('\n')
assert isLineTerminator(0x0A)
assert isIdentifierStart('$')
assert isIdentifierStart(100)
assert isWhiteSpace(' ')