bazarr/libs/pygments/lexers/cddl.py

"""
    pygments.lexers.cddl
    ~~~~~~~~~~~~~~~~~~~~

    Lexer for the Concise data definition language (CDDL), a notational
    convention to express CBOR and JSON data structures.

    More information:
    https://datatracker.ietf.org/doc/rfc8610/

    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

__all__ = ['CddlLexer']

from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import (
    Comment,
    Error,
    Keyword,
    Name,
    Number,
    Operator,
    Punctuation,
    String,
    Text,
    Whitespace,
)


class CddlLexer(RegexLexer):
    """
    Lexer for CDDL definitions.

    .. versionadded:: 2.8
    """
    name = "CDDL"
    aliases = ["cddl"]
    filenames = ["*.cddl"]
    mimetypes = ["text/x-cddl"]

    _prelude_types = [
        "any",
        "b64legacy",
        "b64url",
        "bigfloat",
        "bigint",
        "bignint",
        "biguint",
        "bool",
        "bstr",
        "bytes",
        "cbor-any",
        "decfrac",
        "eb16",
        "eb64legacy",
        "eb64url",
        "encoded-cbor",
        "false",
        "float",
        "float16",
        "float16-32",
        "float32",
        "float32-64",
        "float64",
        "int",
        "integer",
        "mime-message",
        "nil",
        "nint",
        "null",
        "number",
        "regexp",
        "tdate",
        "text",
        "time",
        "true",
        "tstr",
        "uint",
        "undefined",
        "unsigned",
        "uri",
    ]

    _controls = [
        ".and",
        ".bits",
        ".cbor",
        ".cborseq",
        ".default",
        ".eq",
        ".ge",
        ".gt",
        ".le",
        ".lt",
        ".ne",
        ".regexp",
        ".size",
        ".within",
    ]

    _re_id = (
        r"[$@A-Z_a-z]"
        r"(?:[\-\.]+(?=[$@0-9A-Z_a-z])|[$@0-9A-Z_a-z])*"

    )

    # While the spec reads more like "an int must not start with 0" we use a
    # lookahead here that says "after a 0 there must be no digit". This makes the
    # '0' the invalid character in '01', which looks nicer when highlighted.
    _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))"
    _re_int = r"-?" + _re_uint

    flags = re.UNICODE | re.MULTILINE

    tokens = {
        "commentsandwhitespace": [(r"\s+", Whitespace), (r";.+$", Comment.Single)],
        "root": [
            include("commentsandwhitespace"),
            # tag types
            (r"#(\d\.{uint})?".format(uint=_re_uint), Keyword.Type),  # type or any
            # occurence
            (
                r"({uint})?(\*)({uint})?".format(uint=_re_uint),
                bygroups(Number, Operator, Number),
            ),
            (r"\?|\+", Operator),  # occurrence
            (r"\^", Operator),  # cuts
            (r"(\.\.\.|\.\.)", Operator),  # rangeop
            (words(_controls, suffix=r"\b"), Operator.Word),  # ctlops
            # into choice op
            (r"&(?=\s*({groupname}|\())".format(groupname=_re_id), Operator),
            (r"~(?=\s*{})".format(_re_id), Operator),  # unwrap op
            (r"//|/(?!/)", Operator),  # double und single slash
            (r"=>|/==|/=|=", Operator),
            (r"[\[\]{}\(\),<>:]", Punctuation),
            # Bytestrings
            (r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"),
            (r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"),
            (r"'", String.Single, "bstr"),
            # Barewords as member keys (must be matched before values, types, typenames,
            # groupnames).
            # Token type is String as barewords are always interpreted as such.
            (
                r"({bareword})(\s*)(:)".format(bareword=_re_id),
                bygroups(String, Whitespace, Punctuation),
            ),
            # predefined types
            (
                words(_prelude_types, prefix=r"(?![\-_$@])\b", suffix=r"\b(?![\-_$@])"),
                Name.Builtin,
            ),
            # user-defined groupnames, typenames
            (_re_id, Name.Class),
            # values
            (r"0b[01]+", Number.Bin),
            (r"0o[0-7]+", Number.Oct),
            (r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+)?p[+-]?\d+", Number.Hex),  # hexfloat
            (r"0x[0-9a-fA-F]+", Number.Hex),  # hex
            # Float
            (
                r"{int}(?=(\.\d|e[+-]?\d))(?:\.\d+)?(?:e[+-]?\d+)?".format(int=_re_int),
                Number.Float,
            ),
            # Int
            (_re_int, Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String.Double),
        ],
        "bstrb64url": [
            (r"'", String.Single, "#pop"),
            include("commentsandwhitespace"),
            (r"\\.", String.Escape),
            (r"[0-9a-zA-Z\-_=]+", String.Single),
            (r".", Error),
            # (r";.+$", Token.Other),
        ],
        "bstrh": [
            (r"'", String.Single, "#pop"),
            include("commentsandwhitespace"),
            (r"\\.", String.Escape),
            (r"[0-9a-fA-F]+", String.Single),
            (r".", Error),
        ],
        "bstr": [
            (r"'", String.Single, "#pop"),
            (r"\\.", String.Escape),
            (r"[^'\\]+", String.Single),
        ],
    }
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`"""`
			`pygments.lexers.cddl`
			`~~~~~~~~~~~~~~~~~~~~`

			`Lexer for the Concise data definition language (CDDL), a notational`
			`convention to express CBOR and JSON data structures.`

			`More information:`
			`https://datatracker.ietf.org/doc/rfc8610/`

			`:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.`
			`:license: BSD, see LICENSE for details.`
			`"""`

			`import re`

			`__all__ = ['CddlLexer']`

			`from pygments.lexer import RegexLexer, bygroups, include, words`
			`from pygments.token import (`
			`Comment,`
			`Error,`
			`Keyword,`
			`Name,`
			`Number,`
			`Operator,`
			`Punctuation,`
			`String,`
			`Text,`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`Whitespace,`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`)`


			`class CddlLexer(RegexLexer):`
			`"""`
			`Lexer for CDDL definitions.`

			`.. versionadded:: 2.8`
			`"""`
			`name = "CDDL"`
			`aliases = ["cddl"]`
			`filenames = ["*.cddl"]`
			`mimetypes = ["text/x-cddl"]`

			`_prelude_types = [`
			`"any",`
			`"b64legacy",`
			`"b64url",`
			`"bigfloat",`
			`"bigint",`
			`"bignint",`
			`"biguint",`
			`"bool",`
			`"bstr",`
			`"bytes",`
			`"cbor-any",`
			`"decfrac",`
			`"eb16",`
			`"eb64legacy",`
			`"eb64url",`
			`"encoded-cbor",`
			`"false",`
			`"float",`
			`"float16",`
			`"float16-32",`
			`"float32",`
			`"float32-64",`
			`"float64",`
			`"int",`
			`"integer",`
			`"mime-message",`
			`"nil",`
			`"nint",`
			`"null",`
			`"number",`
			`"regexp",`
			`"tdate",`
			`"text",`
			`"time",`
			`"true",`
			`"tstr",`
			`"uint",`
			`"undefined",`
			`"unsigned",`
			`"uri",`
			`]`

			`_controls = [`
			`".and",`
			`".bits",`
			`".cbor",`
			`".cborseq",`
			`".default",`
			`".eq",`
			`".ge",`
			`".gt",`
			`".le",`
			`".lt",`
			`".ne",`
			`".regexp",`
			`".size",`
			`".within",`
			`]`

			`_re_id = (`
			`r"[$@A-Z_a-z]"`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`r"(?:[\-\.]+(?=[$@0-9A-Z_a-z])\|[$@0-9A-Z_a-z])*"`

Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`)`

			`# While the spec reads more like "an int must not start with 0" we use a`
			`# lookahead here that says "after a 0 there must be no digit". This makes the`
			`# '0' the invalid character in '01', which looks nicer when highlighted.`
			`_re_uint = r"(?:0b[01]+\|0x[0-9a-fA-F]+\|[1-9]\d*\|0(?!\d))"`
			`_re_int = r"-?" + _re_uint`

			`flags = re.UNICODE \| re.MULTILINE`

			`tokens = {`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`"commentsandwhitespace": [(r"\s+", Whitespace), (r";.+$", Comment.Single)],`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`"root": [`
			`include("commentsandwhitespace"),`
			`# tag types`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`(r"#(\d\.{uint})?".format(uint=_re_uint), Keyword.Type), # type or any`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`# occurence`
			`(`
			`r"({uint})?(\*)({uint})?".format(uint=_re_uint),`
			`bygroups(Number, Operator, Number),`
			`),`
			`(r"\?\|\+", Operator), # occurrence`
			`(r"\^", Operator), # cuts`
			`(r"(\.\.\.\|\.\.)", Operator), # rangeop`
			`(words(_controls, suffix=r"\b"), Operator.Word), # ctlops`
			`# into choice op`
			`(r"&(?=\s*({groupname}\|\())".format(groupname=_re_id), Operator),`
			`(r"~(?=\s*{})".format(_re_id), Operator), # unwrap op`
			`(r"//\|/(?!/)", Operator), # double und single slash`
			`(r"=>\|/==\|/=\|=", Operator),`
			`(r"[\[\]{}\(\),<>:]", Punctuation),`
			`# Bytestrings`
			`(r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"),`
			`(r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"),`
			`(r"'", String.Single, "bstr"),`
			`# Barewords as member keys (must be matched before values, types, typenames,`
			`# groupnames).`
			`# Token type is String as barewords are always interpreted as such.`
			`(`
			`r"({bareword})(\s*)(:)".format(bareword=_re_id),`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`bygroups(String, Whitespace, Punctuation),`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`),`
			`# predefined types`
			`(`
			`words(_prelude_types, prefix=r"(?![\-_$@])\b", suffix=r"\b(?![\-_$@])"),`
			`Name.Builtin,`
			`),`
			`# user-defined groupnames, typenames`
			`(_re_id, Name.Class),`
			`# values`
			`(r"0b[01]+", Number.Bin),`
			`(r"0o[0-7]+", Number.Oct),`
			`(r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+)?p[+-]?\d+", Number.Hex), # hexfloat`
			`(r"0x[0-9a-fA-F]+", Number.Hex), # hex`
			`# Float`
			`(`
			`r"{int}(?=(\.\d\|e[+-]?\d))(?:\.\d+)?(?:e[+-]?\d+)?".format(int=_re_int),`
			`Number.Float,`
			`),`
			`# Int`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`(_re_int, Number.Integer),`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`(r'"(\\\\\|\\"\|[^"])*"', String.Double),`
			`],`
			`"bstrb64url": [`
			`(r"'", String.Single, "#pop"),`
			`include("commentsandwhitespace"),`
			`(r"\\.", String.Escape),`
			`(r"[0-9a-zA-Z\-_=]+", String.Single),`
			`(r".", Error),`
			`# (r";.+$", Token.Other),`
			`],`
			`"bstrh": [`
			`(r"'", String.Single, "#pop"),`
			`include("commentsandwhitespace"),`
			`(r"\\.", String.Escape),`
			`(r"[0-9a-fA-F]+", String.Single),`
			`(r".", Error),`
			`],`
			`"bstr": [`
			`(r"'", String.Single, "#pop"),`
			`(r"\\.", String.Escape),`
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies. 2022-01-24 04:07:52 +00:00			`(r"[^'\\]+", String.Single),`
Update ffsubsync and srt module * Update ffsubsync to 0.4.11 * Update srt to 3.4.1 2021-04-13 04:02:29 +00:00			`],`
			`}`