mirror of
https://github.com/morpheus65535/bazarr
synced 2025-01-02 21:15:41 +00:00
344 lines
11 KiB
Text
344 lines
11 KiB
Text
{"tests": [
|
|
|
|
{"description":"< in attribute name",
|
|
"input":"<z/0 <>",
|
|
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
|
|
|
|
{"description":"< in attribute value",
|
|
"input":"<z x=<>",
|
|
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
|
|
|
|
{"description":"= in unquoted attribute value",
|
|
"input":"<z z=z=z>",
|
|
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
|
|
|
|
{"description":"= attribute",
|
|
"input":"<z =>",
|
|
"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
|
|
|
|
{"description":"== attribute",
|
|
"input":"<z ==>",
|
|
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
|
|
|
|
{"description":"=== attribute",
|
|
"input":"<z ===>",
|
|
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
|
|
|
|
{"description":"==== attribute",
|
|
"input":"<z ====>",
|
|
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
|
|
|
|
{"description":"\" after ampersand in double-quoted attribute value",
|
|
"input":"<z z=\"&\">",
|
|
"output":[["StartTag", "z", {"z": "&"}]]},
|
|
|
|
{"description":"' after ampersand in double-quoted attribute value",
|
|
"input":"<z z=\"&'\">",
|
|
"output":[["StartTag", "z", {"z": "&'"}]]},
|
|
|
|
{"description":"' after ampersand in single-quoted attribute value",
|
|
"input":"<z z='&'>",
|
|
"output":[["StartTag", "z", {"z": "&"}]]},
|
|
|
|
{"description":"\" after ampersand in single-quoted attribute value",
|
|
"input":"<z z='&\"'>",
|
|
"output":[["StartTag", "z", {"z": "&\""}]]},
|
|
|
|
{"description":"Text after bogus character reference",
|
|
"input":"<z z='&xlink_xmlns;'>bar<z>",
|
|
"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
|
|
|
|
{"description":"Text after hex character reference",
|
|
"input":"<z z='  foo'>bar<z>",
|
|
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
|
|
|
|
{"description":"Attribute name starting with \"",
|
|
"input":"<foo \"='bar'>",
|
|
"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
|
|
|
|
{"description":"Attribute name starting with '",
|
|
"input":"<foo '='bar'>",
|
|
"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
|
|
|
|
{"description":"Attribute name containing \"",
|
|
"input":"<foo a\"b='bar'>",
|
|
"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
|
|
|
|
{"description":"Attribute name containing '",
|
|
"input":"<foo a'b='bar'>",
|
|
"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
|
|
|
|
{"description":"Unquoted attribute value containing '",
|
|
"input":"<foo a=b'c>",
|
|
"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
|
|
|
|
{"description":"Unquoted attribute value containing \"",
|
|
"input":"<foo a=b\"c>",
|
|
"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
|
|
|
|
{"description":"Double-quoted attribute value not followed by whitespace",
|
|
"input":"<foo a=\"b\"c>",
|
|
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
|
|
|
{"description":"Single-quoted attribute value not followed by whitespace",
|
|
"input":"<foo a='b'c>",
|
|
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
|
|
|
{"description":"Quoted attribute followed by permitted /",
|
|
"input":"<br a='b'/>",
|
|
"output":[["StartTag","br",{"a":"b"},true]]},
|
|
|
|
{"description":"Quoted attribute followed by non-permitted /",
|
|
"input":"<bar a='b'/>",
|
|
"output":[["StartTag","bar",{"a":"b"},true]]},
|
|
|
|
{"description":"CR EOF after doctype name",
|
|
"input":"<!doctype html \r",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"CR EOF in tag name",
|
|
"input":"<z\r",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"Slash EOF in tag name",
|
|
"input":"<z/",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"Zero hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Zero decimal numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Zero-prefixed hex numeric entity",
|
|
"input":"A",
|
|
"output":[["Character", "A"]]},
|
|
|
|
{"description":"Zero-prefixed decimal numeric entity",
|
|
"input":"A",
|
|
"output":[["Character", "A"]]},
|
|
|
|
{"description":"Empty hex numeric entities",
|
|
"input":"&#x &#X ",
|
|
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
|
|
|
|
{"description":"Empty decimal numeric entities",
|
|
"input":"&# &#; ",
|
|
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
|
|
|
|
{"description":"Non-BMP numeric entity",
|
|
"input":"𐀀",
|
|
"output":[["Character", "\uD800\uDC00"]]},
|
|
|
|
{"description":"Maximum non-BMP numeric entity",
|
|
"input":"",
|
|
"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
|
|
|
|
{"description":"Above maximum numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"32-bit hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"33-bit hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"33-bit decimal numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"65-bit hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"65-bit decimal numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Surrogate code point edge cases",
|
|
"input":"퟿����",
|
|
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
|
|
|
|
{"description":"Uppercase start tag name",
|
|
"input":"<X>",
|
|
"output":[["StartTag", "x", {}]]},
|
|
|
|
{"description":"Uppercase end tag name",
|
|
"input":"</X>",
|
|
"output":[["EndTag", "x"]]},
|
|
|
|
{"description":"Uppercase attribute name",
|
|
"input":"<x X>",
|
|
"output":[["StartTag", "x", { "x":"" }]]},
|
|
|
|
{"description":"Tag/attribute name case edge values",
|
|
"input":"<x@AZ[`az{ @AZ[`az{>",
|
|
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
|
|
|
|
{"description":"Duplicate different-case attributes",
|
|
"input":"<x x=1 x=2 X=3>",
|
|
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
|
|
|
|
{"description":"Uppercase close tag attributes",
|
|
"input":"</x X>",
|
|
"output":["ParseError", ["EndTag", "x"]]},
|
|
|
|
{"description":"Duplicate close tag attributes",
|
|
"input":"</x x x>",
|
|
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
|
|
|
|
{"description":"Permitted slash",
|
|
"input":"<br/>",
|
|
"output":[["StartTag","br",{},true]]},
|
|
|
|
{"description":"Non-permitted slash",
|
|
"input":"<xr/>",
|
|
"output":[["StartTag","xr",{},true]]},
|
|
|
|
{"description":"Permitted slash but in close tag",
|
|
"input":"</br/>",
|
|
"output":["ParseError", ["EndTag", "br"]]},
|
|
|
|
{"description":"Doctype public case-sensitivity (1)",
|
|
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
|
|
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
|
|
|
|
{"description":"Doctype public case-sensitivity (2)",
|
|
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
|
|
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
|
|
|
|
{"description":"Doctype system case-sensitivity (1)",
|
|
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
|
|
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
|
|
|
|
{"description":"Doctype system case-sensitivity (2)",
|
|
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
|
|
"output":[["DOCTYPE", "html", null, "xYz", true]]},
|
|
|
|
{"description":"U+0000 in lookahead region after non-matching character",
|
|
"input":"<!doc>\u0000",
|
|
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"U+0000 in lookahead region",
|
|
"input":"<!doc\u0000",
|
|
"output":["ParseError", ["Comment", "doc\uFFFD"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"U+0080 in lookahead region",
|
|
"input":"<!doc\u0080",
|
|
"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"U+FDD1 in lookahead region",
|
|
"input":"<!doc\uFDD1",
|
|
"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"U+1FFFF in lookahead region",
|
|
"input":"<!doc\uD83F\uDFFF",
|
|
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"CR followed by non-LF",
|
|
"input":"\r?",
|
|
"output":[["Character", "\n?"]]},
|
|
|
|
{"description":"CR at EOF",
|
|
"input":"\r",
|
|
"output":[["Character", "\n"]]},
|
|
|
|
{"description":"LF at EOF",
|
|
"input":"\n",
|
|
"output":[["Character", "\n"]]},
|
|
|
|
{"description":"CR LF",
|
|
"input":"\r\n",
|
|
"output":[["Character", "\n"]]},
|
|
|
|
{"description":"CR CR",
|
|
"input":"\r\r",
|
|
"output":[["Character", "\n\n"]]},
|
|
|
|
{"description":"LF LF",
|
|
"input":"\n\n",
|
|
"output":[["Character", "\n\n"]]},
|
|
|
|
{"description":"LF CR",
|
|
"input":"\n\r",
|
|
"output":[["Character", "\n\n"]]},
|
|
|
|
{"description":"text CR CR CR text",
|
|
"input":"text\r\r\rtext",
|
|
"output":[["Character", "text\n\n\ntext"]]},
|
|
|
|
{"description":"Doctype publik",
|
|
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype publi",
|
|
"input":"<!DOCTYPE html PUBLI",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype sistem",
|
|
"input":"<!DOCTYPE html SISTEM \"AbC\">",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype sys",
|
|
"input":"<!DOCTYPE html SYS",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype html x>text",
|
|
"input":"<!DOCTYPE html x>text",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
|
|
|
|
{"description":"Grave accent in unquoted attribute",
|
|
"input":"<a a=aa`>",
|
|
"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
|
|
|
|
{"description":"EOF in tag name state ",
|
|
"input":"<a",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in tag name state",
|
|
"input":"<a",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in before attribute name state",
|
|
"input":"<a ",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in attribute name state",
|
|
"input":"<a a",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in after attribute name state",
|
|
"input":"<a a ",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in before attribute value state",
|
|
"input":"<a a =",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in attribute value (double quoted) state",
|
|
"input":"<a a =\"a",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in attribute value (single quoted) state",
|
|
"input":"<a a ='a",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in attribute value (unquoted) state",
|
|
"input":"<a a =a",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"EOF in after attribute value state",
|
|
"input":"<a a ='a'",
|
|
"output":["ParseError"]}
|
|
|
|
]}
|