bazarr/libs/pygments/lexers/tnt.py

263 lines
9.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
pygments.lexers.tnt
~~~~~~~~~~~~~~~~~~~
Lexer for Typographic Number Theory.
:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from pygments.lexer import Lexer
from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
Punctuation, Error
__all__ = ['TNTLexer']
class TNTLexer(Lexer):
"""
Lexer for Typographic Number Theory, as described in the book
Gödel, Escher, Bach, by Douglas R. Hofstadter,
or as summarized here:
https://github.com/Kenny2github/language-tnt/blob/master/README.md#summary-of-tnt
.. versionadded:: 2.7
"""
name = 'Typographic Number Theory'
aliases = ['tnt']
filenames = ['*.tnt']
cur = []
LOGIC = set('⊃→]&∧^|Vv')
OPERATORS = set('+.⋅*')
VARIABLES = set('abcde')
PRIMES = set("'")
NEGATORS = set('~!')
QUANTIFIERS = set('AE∀∃')
NUMBERS = set('0123456789')
WHITESPACE = set('\t \v\n')
RULES = re.compile('''(?xi)
joining | separation | double-tilde | fantasy\\ rule
| carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
| contrapositive | De\\ Morgan | switcheroo
| specification | generalization | interchange
| existence | symmetry | transitivity
| add\\ S | drop\\ S | induction
| axiom\\ ([1-5]) | premise | push | pop
''')
LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
COMMENT = re.compile(r'\[[^\n\]]+\]')
def __init__(self, *args, **kwargs):
Lexer.__init__(self, *args, **kwargs)
self.cur = []
def whitespace(self, start, text, required=False):
"""Tokenize whitespace."""
end = start
try:
while text[end] in self.WHITESPACE:
end += 1
except IndexError:
end = len(text)
if required:
assert end != start
if end != start:
self.cur.append((start, Text, text[start:end]))
return end
def variable(self, start, text):
"""Tokenize a variable."""
assert text[start] in self.VARIABLES
end = start+1
while text[end] in self.PRIMES:
end += 1
self.cur.append((start, Name.Variable, text[start:end]))
return end
def term(self, start, text):
"""Tokenize a term."""
if text[start] == 'S': # S...S(...) or S...0
end = start+1
while text[end] == 'S':
end += 1
self.cur.append((start, Number.Integer, text[start:end]))
return self.term(end, text)
if text[start] == '0': # the singleton 0
self.cur.append((start, Number.Integer, text[start]))
return start+1
if text[start] in self.VARIABLES: # a''...
return self.variable(start, text)
if text[start] == '(': # (...+...)
self.cur.append((start, Punctuation, text[start]))
start = self.term(start+1, text)
assert text[start] in self.OPERATORS
self.cur.append((start, Operator, text[start]))
start = self.term(start+1, text)
assert text[start] == ')'
self.cur.append((start, Punctuation, text[start]))
return start+1
raise AssertionError # no matches
def formula(self, start, text):
"""Tokenize a formula."""
if text[start] in self.NEGATORS: # ~<...>
end = start+1
while text[end] in self.NEGATORS:
end += 1
self.cur.append((start, Operator, text[start:end]))
return self.formula(end, text)
if text[start] in self.QUANTIFIERS: # Aa:<...>
self.cur.append((start, Keyword.Declaration, text[start]))
start = self.variable(start+1, text)
assert text[start] == ':'
self.cur.append((start, Punctuation, text[start]))
return self.formula(start+1, text)
if text[start] == '<': # <...&...>
self.cur.append((start, Punctuation, text[start]))
start = self.formula(start+1, text)
assert text[start] in self.LOGIC
self.cur.append((start, Operator, text[start]))
start = self.formula(start+1, text)
assert text[start] == '>'
self.cur.append((start, Punctuation, text[start]))
return start+1
# ...=...
start = self.term(start, text)
assert text[start] == '='
self.cur.append((start, Operator, text[start]))
start = self.term(start+1, text)
return start
def rule(self, start, text):
"""Tokenize a rule."""
match = self.RULES.match(text, start)
assert match is not None
groups = sorted(match.regs[1:]) # exclude whole match
for group in groups:
if group[0] >= 0: # this group matched
self.cur.append((start, Keyword, text[start:group[0]]))
self.cur.append((group[0], Number.Integer,
text[group[0]:group[1]]))
if group[1] != match.end():
self.cur.append((group[1], Keyword,
text[group[1]:match.end()]))
break
else:
self.cur.append((start, Keyword, text[start:match.end()]))
return match.end()
def lineno(self, start, text):
"""Tokenize a line referral."""
end = start
while text[end] not in self.NUMBERS:
end += 1
self.cur.append((start, Punctuation, text[start]))
self.cur.append((start+1, Text, text[start+1:end]))
start = end
match = self.LINENOS.match(text, start)
assert match is not None
assert text[match.end()] == ')'
self.cur.append((match.start(), Number.Integer, match.group(0)))
self.cur.append((match.end(), Punctuation, text[match.end()]))
return match.end() + 1
def error_till_line_end(self, start, text):
"""Mark everything from ``start`` to the end of the line as Error."""
end = start
try:
while text[end] != '\n': # there's whitespace in rules
end += 1
except IndexError:
end = len(text)
if end != start:
self.cur.append((start, Error, text[start:end]))
end = self.whitespace(end, text)
return end
def get_tokens_unprocessed(self, text):
"""Returns a list of TNT tokens."""
self.cur = []
start = end = self.whitespace(0, text)
while start <= end < len(text):
try:
# try line number
while text[end] in self.NUMBERS:
end += 1
if end != start: # actual number present
self.cur.append((start, Number.Integer, text[start:end]))
# whitespace is required after a line number
orig = len(self.cur)
try:
start = end = self.whitespace(end, text, True)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(end, text)
continue
# at this point it could be a comment
match = self.COMMENT.match(text, start)
if match is not None:
self.cur.append((start, Comment, text[start:match.end()]))
start = end = match.end()
# anything after the closing bracket is invalid
start = end = self.error_till_line_end(start, text)
# do not attempt to process the rest
continue
del match
if text[start] in '[]': # fantasy push or pop
self.cur.append((start, Keyword, text[start]))
start += 1
end += 1
else:
# one formula, possibly containing subformulae
orig = len(self.cur)
try:
start = end = self.formula(start, text)
except AssertionError: # not well-formed
del self.cur[orig:]
while text[end] not in self.WHITESPACE:
end += 1
self.cur.append((start, Error, text[start:end]))
start = end
# skip whitespace after formula
orig = len(self.cur)
try:
start = end = self.whitespace(end, text, True)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
# rule proving this formula a theorem
orig = len(self.cur)
try:
start = end = self.rule(start, text)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
# skip whitespace after rule
start = end = self.whitespace(end, text)
# line marker
if text[start] == '(':
orig = len(self.cur)
try:
start = end = self.lineno(start, text)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
start = end = self.whitespace(start, text)
except IndexError:
try:
del self.cur[orig:]
except NameError:
pass # if orig was never defined, fine
self.error_till_line_end(start, text)
return self.cur