mirror of
https://github.com/morpheus65535/bazarr
synced 2025-01-02 13:04:50 +00:00
272 lines
10 KiB
Python
272 lines
10 KiB
Python
"""
|
||
pygments.lexers.tnt
|
||
~~~~~~~~~~~~~~~~~~~
|
||
|
||
Lexer for Typographic Number Theory.
|
||
|
||
:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
|
||
:license: BSD, see LICENSE for details.
|
||
"""
|
||
|
||
import re
|
||
|
||
from pygments.lexer import Lexer
|
||
from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
|
||
Punctuation, Error
|
||
|
||
__all__ = ['TNTLexer']
|
||
|
||
|
||
class TNTLexer(Lexer):
|
||
"""
|
||
Lexer for Typographic Number Theory, as described in the book
|
||
Gödel, Escher, Bach, by Douglas R. Hofstadter,
|
||
or as summarized here:
|
||
https://github.com/Kenny2github/language-tnt/blob/master/README.md#summary-of-tnt
|
||
|
||
.. versionadded:: 2.7
|
||
"""
|
||
|
||
name = 'Typographic Number Theory'
|
||
aliases = ['tnt']
|
||
filenames = ['*.tnt']
|
||
|
||
cur = []
|
||
|
||
LOGIC = set('⊃→]&∧^|∨Vv')
|
||
OPERATORS = set('+.⋅*')
|
||
VARIABLES = set('abcde')
|
||
PRIMES = set("'′")
|
||
NEGATORS = set('~!')
|
||
QUANTIFIERS = set('AE∀∃')
|
||
NUMBERS = set('0123456789')
|
||
WHITESPACE = set('\t \v\n')
|
||
|
||
RULES = re.compile('''(?xi)
|
||
joining | separation | double-tilde | fantasy\\ rule
|
||
| carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
|
||
| contrapositive | De\\ Morgan | switcheroo
|
||
| specification | generalization | interchange
|
||
| existence | symmetry | transitivity
|
||
| add\\ S | drop\\ S | induction
|
||
| axiom\\ ([1-5]) | premise | push | pop
|
||
''')
|
||
LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
|
||
COMMENT = re.compile(r'\[[^\n\]]+\]')
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
Lexer.__init__(self, *args, **kwargs)
|
||
self.cur = []
|
||
|
||
def whitespace(self, start, text, required=False):
|
||
"""Tokenize whitespace."""
|
||
end = start
|
||
try:
|
||
while text[end] in self.WHITESPACE:
|
||
end += 1
|
||
except IndexError:
|
||
end = len(text)
|
||
if required and end == start:
|
||
raise AssertionError
|
||
if end != start:
|
||
self.cur.append((start, Text, text[start:end]))
|
||
return end
|
||
|
||
def variable(self, start, text):
|
||
"""Tokenize a variable."""
|
||
if text[start] not in self.VARIABLES:
|
||
raise AssertionError
|
||
end = start+1
|
||
while text[end] in self.PRIMES:
|
||
end += 1
|
||
self.cur.append((start, Name.Variable, text[start:end]))
|
||
return end
|
||
|
||
def term(self, start, text):
|
||
"""Tokenize a term."""
|
||
if text[start] == 'S': # S...S(...) or S...0
|
||
end = start+1
|
||
while text[end] == 'S':
|
||
end += 1
|
||
self.cur.append((start, Number.Integer, text[start:end]))
|
||
return self.term(end, text)
|
||
if text[start] == '0': # the singleton 0
|
||
self.cur.append((start, Number.Integer, text[start]))
|
||
return start+1
|
||
if text[start] in self.VARIABLES: # a''...
|
||
return self.variable(start, text)
|
||
if text[start] == '(': # (...+...)
|
||
self.cur.append((start, Punctuation, text[start]))
|
||
start = self.term(start+1, text)
|
||
if text[start] not in self.OPERATORS:
|
||
raise AssertionError
|
||
self.cur.append((start, Operator, text[start]))
|
||
start = self.term(start+1, text)
|
||
if text[start] != ')':
|
||
raise AssertionError
|
||
self.cur.append((start, Punctuation, text[start]))
|
||
return start+1
|
||
raise AssertionError # no matches
|
||
|
||
def formula(self, start, text):
|
||
"""Tokenize a formula."""
|
||
if text[start] in self.NEGATORS: # ~<...>
|
||
end = start+1
|
||
while text[end] in self.NEGATORS:
|
||
end += 1
|
||
self.cur.append((start, Operator, text[start:end]))
|
||
return self.formula(end, text)
|
||
if text[start] in self.QUANTIFIERS: # Aa:<...>
|
||
self.cur.append((start, Keyword.Declaration, text[start]))
|
||
start = self.variable(start+1, text)
|
||
if text[start] != ':':
|
||
raise AssertionError
|
||
self.cur.append((start, Punctuation, text[start]))
|
||
return self.formula(start+1, text)
|
||
if text[start] == '<': # <...&...>
|
||
self.cur.append((start, Punctuation, text[start]))
|
||
start = self.formula(start+1, text)
|
||
if text[start] not in self.LOGIC:
|
||
raise AssertionError
|
||
self.cur.append((start, Operator, text[start]))
|
||
start = self.formula(start+1, text)
|
||
if text[start] != '>':
|
||
raise AssertionError
|
||
self.cur.append((start, Punctuation, text[start]))
|
||
return start+1
|
||
# ...=...
|
||
start = self.term(start, text)
|
||
if text[start] != '=':
|
||
raise AssertionError
|
||
self.cur.append((start, Operator, text[start]))
|
||
start = self.term(start+1, text)
|
||
return start
|
||
|
||
def rule(self, start, text):
|
||
"""Tokenize a rule."""
|
||
match = self.RULES.match(text, start)
|
||
if match is None:
|
||
raise AssertionError
|
||
groups = sorted(match.regs[1:]) # exclude whole match
|
||
for group in groups:
|
||
if group[0] >= 0: # this group matched
|
||
self.cur.append((start, Keyword, text[start:group[0]]))
|
||
self.cur.append((group[0], Number.Integer,
|
||
text[group[0]:group[1]]))
|
||
if group[1] != match.end():
|
||
self.cur.append((group[1], Keyword,
|
||
text[group[1]:match.end()]))
|
||
break
|
||
else:
|
||
self.cur.append((start, Keyword, text[start:match.end()]))
|
||
return match.end()
|
||
|
||
def lineno(self, start, text):
|
||
"""Tokenize a line referral."""
|
||
end = start
|
||
while text[end] not in self.NUMBERS:
|
||
end += 1
|
||
self.cur.append((start, Punctuation, text[start]))
|
||
self.cur.append((start+1, Text, text[start+1:end]))
|
||
start = end
|
||
match = self.LINENOS.match(text, start)
|
||
if match is None:
|
||
raise AssertionError
|
||
if text[match.end()] != ')':
|
||
raise AssertionError
|
||
self.cur.append((match.start(), Number.Integer, match.group(0)))
|
||
self.cur.append((match.end(), Punctuation, text[match.end()]))
|
||
return match.end() + 1
|
||
|
||
def error_till_line_end(self, start, text):
|
||
"""Mark everything from ``start`` to the end of the line as Error."""
|
||
end = start
|
||
try:
|
||
while text[end] != '\n': # there's whitespace in rules
|
||
end += 1
|
||
except IndexError:
|
||
end = len(text)
|
||
if end != start:
|
||
self.cur.append((start, Error, text[start:end]))
|
||
end = self.whitespace(end, text)
|
||
return end
|
||
|
||
def get_tokens_unprocessed(self, text):
|
||
"""Returns a list of TNT tokens."""
|
||
self.cur = []
|
||
start = end = self.whitespace(0, text)
|
||
while start <= end < len(text):
|
||
try:
|
||
# try line number
|
||
while text[end] in self.NUMBERS:
|
||
end += 1
|
||
if end != start: # actual number present
|
||
self.cur.append((start, Number.Integer, text[start:end]))
|
||
# whitespace is required after a line number
|
||
orig = len(self.cur)
|
||
try:
|
||
start = end = self.whitespace(end, text, True)
|
||
except AssertionError:
|
||
del self.cur[orig:]
|
||
start = end = self.error_till_line_end(end, text)
|
||
continue
|
||
# at this point it could be a comment
|
||
match = self.COMMENT.match(text, start)
|
||
if match is not None:
|
||
self.cur.append((start, Comment, text[start:match.end()]))
|
||
start = end = match.end()
|
||
# anything after the closing bracket is invalid
|
||
start = end = self.error_till_line_end(start, text)
|
||
# do not attempt to process the rest
|
||
continue
|
||
del match
|
||
if text[start] in '[]': # fantasy push or pop
|
||
self.cur.append((start, Keyword, text[start]))
|
||
start += 1
|
||
end += 1
|
||
else:
|
||
# one formula, possibly containing subformulae
|
||
orig = len(self.cur)
|
||
try:
|
||
start = end = self.formula(start, text)
|
||
except (AssertionError, RecursionError): # not well-formed
|
||
del self.cur[orig:]
|
||
while text[end] not in self.WHITESPACE:
|
||
end += 1
|
||
self.cur.append((start, Error, text[start:end]))
|
||
start = end
|
||
# skip whitespace after formula
|
||
orig = len(self.cur)
|
||
try:
|
||
start = end = self.whitespace(end, text, True)
|
||
except AssertionError:
|
||
del self.cur[orig:]
|
||
start = end = self.error_till_line_end(start, text)
|
||
continue
|
||
# rule proving this formula a theorem
|
||
orig = len(self.cur)
|
||
try:
|
||
start = end = self.rule(start, text)
|
||
except AssertionError:
|
||
del self.cur[orig:]
|
||
start = end = self.error_till_line_end(start, text)
|
||
continue
|
||
# skip whitespace after rule
|
||
start = end = self.whitespace(end, text)
|
||
# line marker
|
||
if text[start] == '(':
|
||
orig = len(self.cur)
|
||
try:
|
||
start = end = self.lineno(start, text)
|
||
except AssertionError:
|
||
del self.cur[orig:]
|
||
start = end = self.error_till_line_end(start, text)
|
||
continue
|
||
start = end = self.whitespace(start, text)
|
||
except IndexError:
|
||
try:
|
||
del self.cur[orig:]
|
||
except NameError:
|
||
pass # if orig was never defined, fine
|
||
self.error_till_line_end(start, text)
|
||
return self.cur
|