bazarr/libs/pygments/lexers/tnt.py

"""
    pygments.lexers.tnt
    ~~~~~~~~~~~~~~~~~~~

    Lexer for Typographic Number Theory.

    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import Lexer
from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
    Punctuation, Error

__all__ = ['TNTLexer']


class TNTLexer(Lexer):
    """
    Lexer for Typographic Number Theory, as described in the book
    Gödel, Escher, Bach, by Douglas R. Hofstadter
    
    .. versionadded:: 2.7
    """

    name = 'Typographic Number Theory'
    url = 'https://github.com/Kenny2github/language-tnt'
    aliases = ['tnt']
    filenames = ['*.tnt']

    cur = []

    LOGIC = set('⊃→]&∧^|∨Vv')
    OPERATORS = set('+.⋅*')
    VARIABLES = set('abcde')
    PRIMES = set("'′")
    NEGATORS = set('~!')
    QUANTIFIERS = set('AE∀∃')
    NUMBERS = set('0123456789')
    WHITESPACE = set('\t \v\n')

    RULES = re.compile('''(?xi)
        joining | separation | double-tilde | fantasy\\ rule
        | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
        | contrapositive | De\\ Morgan | switcheroo
        | specification | generalization | interchange
        | existence | symmetry | transitivity
        | add\\ S | drop\\ S | induction
        | axiom\\ ([1-5]) | premise | push | pop
    ''')
    LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
    COMMENT = re.compile(r'\[[^\n\]]+\]')

    def __init__(self, *args, **kwargs):
        Lexer.__init__(self, *args, **kwargs)
        self.cur = []

    def whitespace(self, start, text, required=False):
        """Tokenize whitespace."""
        end = start
        try:
            while text[end] in self.WHITESPACE:
                end += 1
        except IndexError:
            end = len(text)
        if required and end == start:
            raise AssertionError
        if end != start:
            self.cur.append((start, Text, text[start:end]))
        return end

    def variable(self, start, text):
        """Tokenize a variable."""
        if text[start] not in self.VARIABLES:
            raise AssertionError
        end = start+1
        while text[end] in self.PRIMES:
            end += 1
        self.cur.append((start, Name.Variable, text[start:end]))
        return end

    def term(self, start, text):
        """Tokenize a term."""
        if text[start] == 'S':  # S...S(...) or S...0
            end = start+1
            while text[end] == 'S':
                end += 1
            self.cur.append((start, Number.Integer, text[start:end]))
            return self.term(end, text)
        if text[start] == '0':  # the singleton 0
            self.cur.append((start, Number.Integer, text[start]))
            return start+1
        if text[start] in self.VARIABLES:  # a''...
            return self.variable(start, text)
        if text[start] == '(':  # (...+...)
            self.cur.append((start, Punctuation, text[start]))
            start = self.term(start+1, text)
            if text[start] not in self.OPERATORS:
                raise AssertionError
            self.cur.append((start, Operator, text[start]))
            start = self.term(start+1, text)
            if text[start] != ')':
                raise AssertionError
            self.cur.append((start, Punctuation, text[start]))
            return start+1
        raise AssertionError  # no matches

    def formula(self, start, text):
        """Tokenize a formula."""
        if text[start] in self.NEGATORS:  # ~<...>
            end = start+1
            while text[end] in self.NEGATORS:
                end += 1
            self.cur.append((start, Operator, text[start:end]))
            return self.formula(end, text)
        if text[start] in self.QUANTIFIERS:  # Aa:<...>
            self.cur.append((start, Keyword.Declaration, text[start]))
            start = self.variable(start+1, text)
            if text[start] != ':':
                raise AssertionError
            self.cur.append((start, Punctuation, text[start]))
            return self.formula(start+1, text)
        if text[start] == '<':  # <...&...>
            self.cur.append((start, Punctuation, text[start]))
            start = self.formula(start+1, text)
            if text[start] not in self.LOGIC:
                raise AssertionError
            self.cur.append((start, Operator, text[start]))
            start = self.formula(start+1, text)
            if text[start] != '>':
                raise AssertionError
            self.cur.append((start, Punctuation, text[start]))
            return start+1
        # ...=...
        start = self.term(start, text)
        if text[start] != '=':
            raise AssertionError
        self.cur.append((start, Operator, text[start]))
        start = self.term(start+1, text)
        return start

    def rule(self, start, text):
        """Tokenize a rule."""
        match = self.RULES.match(text, start)
        if match is None:
            raise AssertionError
        groups = sorted(match.regs[1:])  # exclude whole match
        for group in groups:
            if group[0] >= 0:  # this group matched
                self.cur.append((start, Keyword, text[start:group[0]]))
                self.cur.append((group[0], Number.Integer,
                                 text[group[0]:group[1]]))
                if group[1] != match.end():
                    self.cur.append((group[1], Keyword,
                                     text[group[1]:match.end()]))
                break
        else:
            self.cur.append((start, Keyword, text[start:match.end()]))
        return match.end()

    def lineno(self, start, text):
        """Tokenize a line referral."""
        end = start
        while text[end] not in self.NUMBERS:
            end += 1
        self.cur.append((start, Punctuation, text[start]))
        self.cur.append((start+1, Text, text[start+1:end]))
        start = end
        match = self.LINENOS.match(text, start)
        if match is None:
            raise AssertionError
        if text[match.end()] != ')':
            raise AssertionError
        self.cur.append((match.start(), Number.Integer, match.group(0)))
        self.cur.append((match.end(), Punctuation, text[match.end()]))
        return match.end() + 1

    def error_till_line_end(self, start, text):
        """Mark everything from ``start`` to the end of the line as Error."""
        end = start
        try:
            while text[end] != '\n':  # there's whitespace in rules
                end += 1
        except IndexError:
            end = len(text)
        if end != start:
            self.cur.append((start, Error, text[start:end]))
        end = self.whitespace(end, text)
        return end

    def get_tokens_unprocessed(self, text):
        """Returns a list of TNT tokens."""
        self.cur = []
        start = end = self.whitespace(0, text)
        while start <= end < len(text):
            try:
                # try line number
                while text[end] in self.NUMBERS:
                    end += 1
                if end != start:  # actual number present
                    self.cur.append((start, Number.Integer, text[start:end]))
                    # whitespace is required after a line number
                    orig = len(self.cur)
                    try:
                        start = end = self.whitespace(end, text, True)
                    except AssertionError:
                        del self.cur[orig:]
                        start = end = self.error_till_line_end(end, text)
                        continue
                # at this point it could be a comment
                match = self.COMMENT.match(text, start)
                if match is not None:
                    self.cur.append((start, Comment, text[start:match.end()]))
                    start = end = match.end()
                    # anything after the closing bracket is invalid
                    start = end = self.error_till_line_end(start, text)
                    # do not attempt to process the rest
                    continue
                del match
                if text[start] in '[]':  # fantasy push or pop
                    self.cur.append((start, Keyword, text[start]))
                    start += 1
                    end += 1
                else:
                    # one formula, possibly containing subformulae
                    orig = len(self.cur)
                    try:
                        start = end = self.formula(start, text)
                    except (AssertionError, RecursionError):  # not well-formed
                        del self.cur[orig:]
                        while text[end] not in self.WHITESPACE:
                            end += 1
                        self.cur.append((start, Error, text[start:end]))
                        start = end
                # skip whitespace after formula
                orig = len(self.cur)
                try:
                    start = end = self.whitespace(end, text, True)
                except AssertionError:
                    del self.cur[orig:]
                    start = end = self.error_till_line_end(start, text)
                    continue
                # rule proving this formula a theorem
                orig = len(self.cur)
                try:
                    start = end = self.rule(start, text)
                except AssertionError:
                    del self.cur[orig:]
                    start = end = self.error_till_line_end(start, text)
                    continue
                # skip whitespace after rule
                start = end = self.whitespace(end, text)
                # line marker
                if text[start] == '(':
                    orig = len(self.cur)
                    try:
                        start = end = self.lineno(start, text)
                    except AssertionError:
                        del self.cur[orig:]
                        start = end = self.error_till_line_end(start, text)
                        continue
                    start = end = self.whitespace(start, text)
            except IndexError:
                try:
                    del self.cur[orig:]
                except NameError:
                    pass  # if orig was never defined, fine
                self.error_till_line_end(start, text)
        return self.cur
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								"""
 								    pygments.lexers.tnt
 								    ~~~~~~~~~~~~~~~~~~~
 								    Lexer for Typographic Number Theory.
-												Updated vendored dependencies.

											
										
										
											2022-11-07 18:06:49 +00:00
+								    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								    :license: BSD, see LICENSE for details.
 								"""
 								import re
 								from pygments.lexer import Lexer
 								from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								    Punctuation, Error
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
 								__all__ = ['TNTLexer']
 								class TNTLexer(Lexer):
 								    """
 								    Lexer for Typographic Number Theory, as described in the book
-												Updated vendored dependencies.

											
										
										
											2022-11-07 18:06:49 +00:00
+								    Gödel, Escher, Bach, by Douglas R. Hofstadter
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								    .. versionadded:: 2.7
 								    """
 								    name = 'Typographic Number Theory'
-												Updated vendored dependencies.

											
										
										
											2022-11-07 18:06:49 +00:00
+								    url = 'https://github.com/Kenny2github/language-tnt'
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								    aliases = ['tnt']
 								    filenames = ['*.tnt']
 								    cur = []
 								    LOGIC = set('⊃→]&∧^|∨Vv')
 								    OPERATORS = set('+.⋅*')
 								    VARIABLES = set('abcde')
 								    PRIMES = set("'′")
 								    NEGATORS = set('~!')
 								    QUANTIFIERS = set('AE∀∃')
 								    NUMBERS = set('0123456789')
 								    WHITESPACE = set('\t \v\n')
 								    RULES = re.compile('''(?xi)
 								        joining | separation | double-tilde | fantasy\\ rule
 								        | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
 								        | contrapositive | De\\ Morgan | switcheroo
 								        | specification | generalization | interchange
 								        | existence | symmetry | transitivity
 								        | add\\ S | drop\\ S | induction
 								        | axiom\\ ([1-5]) | premise | push | pop
 								    ''')
 								    LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
 								    COMMENT = re.compile(r'\[[^\n\]]+\]')
 								    def __init__(self, *args, **kwargs):
 								        Lexer.__init__(self, *args, **kwargs)
 								        self.cur = []
 								    def whitespace(self, start, text, required=False):
 								        """Tokenize whitespace."""
 								        end = start
 								        try:
 								            while text[end] in self.WHITESPACE:
 								                end += 1
 								        except IndexError:
 								            end = len(text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								        if required and end == start:
 								            raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								        if end != start:
 								            self.cur.append((start, Text, text[start:end]))
 								        return end
 								    def variable(self, start, text):
 								        """Tokenize a variable."""
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								        if text[start] not in self.VARIABLES:
 								            raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								        end = start+1
 								        while text[end] in self.PRIMES:
 								            end += 1
 								        self.cur.append((start, Name.Variable, text[start:end]))
 								        return end
 								    def term(self, start, text):
 								        """Tokenize a term."""
 								        if text[start] == 'S':  # S...S(...) or S...0
 								            end = start+1
 								            while text[end] == 'S':
 								                end += 1
 								            self.cur.append((start, Number.Integer, text[start:end]))
 								            return self.term(end, text)
 								        if text[start] == '0':  # the singleton 0
 								            self.cur.append((start, Number.Integer, text[start]))
 								            return start+1
 								        if text[start] in self.VARIABLES:  # a''...
 								            return self.variable(start, text)
 								        if text[start] == '(':  # (...+...)
 								            self.cur.append((start, Punctuation, text[start]))
 								            start = self.term(start+1, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								            if text[start] not in self.OPERATORS:
 								                raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								            self.cur.append((start, Operator, text[start]))
 								            start = self.term(start+1, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								            if text[start] != ')':
 								                raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								            self.cur.append((start, Punctuation, text[start]))
 								            return start+1
 								        raise AssertionError  # no matches
 								    def formula(self, start, text):
 								        """Tokenize a formula."""
 								        if text[start] in self.NEGATORS:  # ~<...>
 								            end = start+1
 								            while text[end] in self.NEGATORS:
 								                end += 1
 								            self.cur.append((start, Operator, text[start:end]))
 								            return self.formula(end, text)
 								        if text[start] in self.QUANTIFIERS:  # Aa:<...>
 								            self.cur.append((start, Keyword.Declaration, text[start]))
 								            start = self.variable(start+1, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								            if text[start] != ':':
 								                raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								            self.cur.append((start, Punctuation, text[start]))
 								            return self.formula(start+1, text)
 								        if text[start] == '<':  # <...&...>
 								            self.cur.append((start, Punctuation, text[start]))
 								            start = self.formula(start+1, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								            if text[start] not in self.LOGIC:
 								                raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								            self.cur.append((start, Operator, text[start]))
 								            start = self.formula(start+1, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								            if text[start] != '>':
 								                raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								            self.cur.append((start, Punctuation, text[start]))
 								            return start+1
 								        # ...=...
 								        start = self.term(start, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								        if text[start] != '=':
 								            raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								        self.cur.append((start, Operator, text[start]))
 								        start = self.term(start+1, text)
 								        return start
 								    def rule(self, start, text):
 								        """Tokenize a rule."""
 								        match = self.RULES.match(text, start)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								        if match is None:
 								            raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								        groups = sorted(match.regs[1:])  # exclude whole match
 								        for group in groups:
 								            if group[0] >= 0:  # this group matched
 								                self.cur.append((start, Keyword, text[start:group[0]]))
 								                self.cur.append((group[0], Number.Integer,
 								                                 text[group[0]:group[1]]))
 								                if group[1] != match.end():
 								                    self.cur.append((group[1], Keyword,
 								                                     text[group[1]:match.end()]))
 								                break
 								        else:
 								            self.cur.append((start, Keyword, text[start:match.end()]))
 								        return match.end()
 								    def lineno(self, start, text):
 								        """Tokenize a line referral."""
 								        end = start
 								        while text[end] not in self.NUMBERS:
 								            end += 1
 								        self.cur.append((start, Punctuation, text[start]))
 								        self.cur.append((start+1, Text, text[start+1:end]))
 								        start = end
 								        match = self.LINENOS.match(text, start)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								        if match is None:
 								            raise AssertionError
 								        if text[match.end()] != ')':
 								            raise AssertionError
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								        self.cur.append((match.start(), Number.Integer, match.group(0)))
 								        self.cur.append((match.end(), Punctuation, text[match.end()]))
 								        return match.end() + 1
 								    def error_till_line_end(self, start, text):
 								        """Mark everything from ``start`` to the end of the line as Error."""
 								        end = start
 								        try:
 								            while text[end] != '\n':  # there's whitespace in rules
 								                end += 1
 								        except IndexError:
 								            end = len(text)
 								        if end != start:
 								            self.cur.append((start, Error, text[start:end]))
 								        end = self.whitespace(end, text)
 								        return end
 								    def get_tokens_unprocessed(self, text):
 								        """Returns a list of TNT tokens."""
 								        self.cur = []
 								        start = end = self.whitespace(0, text)
 								        while start <= end < len(text):
 								            try:
 								                # try line number
 								                while text[end] in self.NUMBERS:
 								                    end += 1
 								                if end != start:  # actual number present
 								                    self.cur.append((start, Number.Integer, text[start:end]))
 								                    # whitespace is required after a line number
 								                    orig = len(self.cur)
 								                    try:
 								                        start = end = self.whitespace(end, text, True)
 								                    except AssertionError:
 								                        del self.cur[orig:]
 								                        start = end = self.error_till_line_end(end, text)
 								                        continue
 								                # at this point it could be a comment
 								                match = self.COMMENT.match(text, start)
 								                if match is not None:
 								                    self.cur.append((start, Comment, text[start:match.end()]))
 								                    start = end = match.end()
 								                    # anything after the closing bracket is invalid
 								                    start = end = self.error_till_line_end(start, text)
 								                    # do not attempt to process the rest
 								                    continue
 								                del match
 								                if text[start] in '[]':  # fantasy push or pop
 								                    self.cur.append((start, Keyword, text[start]))
 								                    start += 1
 								                    end += 1
 								                else:
 								                    # one formula, possibly containing subformulae
 								                    orig = len(self.cur)
 								                    try:
 								                        start = end = self.formula(start, text)
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								                    except (AssertionError, RecursionError):  # not well-formed
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								                        del self.cur[orig:]
 								                        while text[end] not in self.WHITESPACE:
 								                            end += 1
 								                        self.cur.append((start, Error, text[start:end]))
 								                        start = end
 								                # skip whitespace after formula
 								                orig = len(self.cur)
 								                try:
 								                    start = end = self.whitespace(end, text, True)
 								                except AssertionError:
 								                    del self.cur[orig:]
 								                    start = end = self.error_till_line_end(start, text)
 								                    continue
 								                # rule proving this formula a theorem
 								                orig = len(self.cur)
 								                try:
 								                    start = end = self.rule(start, text)
 								                except AssertionError:
 								                    del self.cur[orig:]
 								                    start = end = self.error_till_line_end(start, text)
 								                    continue
 								                # skip whitespace after rule
 								                start = end = self.whitespace(end, text)
 								                # line marker
 								                if text[start] == '(':
 								                    orig = len(self.cur)
 								                    try:
 								                        start = end = self.lineno(start, text)
 								                    except AssertionError:
 								                        del self.cur[orig:]
 								                        start = end = self.error_till_line_end(start, text)
 								                        continue
 								                    start = end = self.whitespace(start, text)
 								            except IndexError:
 								                try:
 								                    del self.cur[orig:]
 								                except NameError:
-												Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.

											
										
										
											2022-01-24 04:07:52 +00:00
+								                    pass  # if orig was never defined, fine
-												Update ffsubsync and srt module

* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
											
										
										
											2021-04-13 04:02:29 +00:00
+								                self.error_till_line_end(start, text)
 								        return self.cur