diff --git a/.gitmodules b/.gitmodules index 817760bb4..af1b28b46 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,3 +50,7 @@ [submodule "third-party/small"] path = third-party/small url = https://github.com/transmission/small.git +[submodule "third-party/rapidjson"] + path = third-party/rapidjson + url = https://github.com/transmission/rapidjson.git + fetchRecurseSubmodules = false diff --git a/CMakeLists.txt b/CMakeLists.txt index aa2879551..fa9f8594b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,6 +207,7 @@ endif() set(CMAKE_FOLDER "third-party") find_package(FastFloat) find_package(Fmt) +find_package(RapidJSON) find_package(Small) find_package(UtfCpp) find_package(WideInteger) @@ -507,7 +508,6 @@ target_compile_definitions(miniupnpc::libminiupnpc SYSTEM_MINIUPNP $<$:MINIUPNPC_API_VERSION=${MINIUPNPC_API_VERSION}>) # API version macro was only added in 1.7 -add_subdirectory(third-party/jsonsl) add_subdirectory(third-party/wildmat) tr_add_external_auto_library(DHT dht dht diff --git a/Transmission.xcodeproj/project.pbxproj b/Transmission.xcodeproj/project.pbxproj index 1fb391ef0..677efb7c9 100644 --- a/Transmission.xcodeproj/project.pbxproj +++ b/Transmission.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 51; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ @@ -350,11 +350,8 @@ C1639A7D1A55F57200E42033 /* cencode.h in Headers */ = {isa = PBXBuildFile; fileRef = C1639A7B1A55F57200E42033 /* cencode.h */; }; C17740D5273A002C00E455D2 /* web-utils.cc in Sources */ = {isa = PBXBuildFile; fileRef = C17740D3273A002C00E455D2 /* web-utils.cc */; }; C17740D6273A002C00E455D2 /* web-utils.h in Headers */ = {isa = PBXBuildFile; fileRef = C17740D4273A002C00E455D2 /* web-utils.h */; }; - C1846BA0294F7A6300A98F30 /* jsonsl.h in Headers */ = {isa = PBXBuildFile; fileRef = C1846B86294F780700A98F30 /* jsonsl.h */; }; - C1846BA1294F7A6300A98F30 /* jsonsl.c in Sources */ = {isa = PBXBuildFile; fileRef = C1846B85294F780700A98F30 /* jsonsl.c */; }; C1846BA2294F7A6800A98F30 /* wildmat.c in Sources */ = {isa = PBXBuildFile; fileRef = C1846B88294F781800A98F30 /* wildmat.c */; }; C1846BA3294F7A6800A98F30 /* wildmat.h in Headers */ = {isa = PBXBuildFile; fileRef = C1846B87294F781800A98F30 /* wildmat.h */; }; - C1846BA8294F7B5700A98F30 /* libjsonsl.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C1846B91294F796A00A98F30 /* libjsonsl.a */; }; C1846BA9294F7B5A00A98F30 /* libwildmat.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C1846B9E294F7A3400A98F30 /* libwildmat.a */; }; C1BF7BA81F2A3CB7008E88A7 /* upnpdev.c in Sources */ = {isa = PBXBuildFile; fileRef = C1BF7BA71F2A3CB7008E88A7 /* upnpdev.c */; }; C1BF7BAA1F2A3CCE008E88A7 /* upnpdev.h in Headers */ = {isa = PBXBuildFile; fileRef = C1BF7BA91F2A3CCE008E88A7 /* upnpdev.h */; }; @@ -548,13 +545,6 @@ remoteGlobalIDString = C1639A6E1A55F4D600E42033; remoteInfo = b64; }; - C1846BA4294F7B1100A98F30 /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = 29B97313FDCFA39411CA2CEA /* Project object */; - proxyType = 1; - remoteGlobalIDString = C1846B90294F796A00A98F30; - remoteInfo = jsonsl; - }; C1846BA6294F7B1400A98F30 /* PBXContainerItemProxy */ = { isa = PBXContainerItemProxy; containerPortal = 29B97313FDCFA39411CA2CEA /* Project object */; @@ -1163,11 +1153,8 @@ C1639A7B1A55F57200E42033 /* cencode.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = cencode.h; path = include/b64/cencode.h; sourceTree = ""; }; C17740D3273A002C00E455D2 /* web-utils.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "web-utils.cc"; sourceTree = ""; }; C17740D4273A002C00E455D2 /* web-utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "web-utils.h"; sourceTree = ""; }; - C1846B85294F780700A98F30 /* jsonsl.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = jsonsl.c; sourceTree = ""; }; - C1846B86294F780700A98F30 /* jsonsl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = jsonsl.h; sourceTree = ""; }; C1846B87294F781800A98F30 /* wildmat.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = wildmat.h; sourceTree = ""; }; C1846B88294F781800A98F30 /* wildmat.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = wildmat.c; sourceTree = ""; }; - C1846B91294F796A00A98F30 /* libjsonsl.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libjsonsl.a; sourceTree = BUILT_PRODUCTS_DIR; }; C1846B9E294F7A3400A98F30 /* libwildmat.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libwildmat.a; sourceTree = BUILT_PRODUCTS_DIR; }; C1BF7BA71F2A3CB7008E88A7 /* upnpdev.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = upnpdev.c; sourceTree = ""; }; C1BF7BA91F2A3CCE008E88A7 /* upnpdev.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = upnpdev.h; sourceTree = ""; }; @@ -1345,7 +1332,6 @@ buildActionMask = 2147483647; files = ( C1846BA9294F7B5A00A98F30 /* libwildmat.a in Frameworks */, - C1846BA8294F7B5700A98F30 /* libjsonsl.a in Frameworks */, C3D9062F27B7F7E200EF2386 /* libpsl.a in Frameworks */, C3CEBBFC2794A12200683BE0 /* libdeflate.a in Frameworks */, C1639A741A55F4E000E42033 /* libb64.a in Frameworks */, @@ -1388,13 +1374,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - C1846B8F294F796A00A98F30 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; C1846B99294F7A3400A98F30 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -1583,7 +1562,6 @@ C1639A6F1A55F4D600E42033 /* libb64.a */, C3CEBBA927949CA000683BE0 /* libdeflate.a */, C3D9062127B7E3C900EF2386 /* libpsl.a */, - C1846B91294F796A00A98F30 /* libjsonsl.a */, C1846B9E294F7A3400A98F30 /* libwildmat.a */, ); name = Products; @@ -1607,7 +1585,6 @@ 3C7A11880D0B2E6700B5701F /* libnatpmp */, C3D9061627B7E12F00EF2386 /* libpsl */, C1639A751A55F52800E42033 /* b64 */, - C1846B81294F774D00A98F30 /* jsonsl */, C1846B82294F777000A98F30 /* wildmat */, 4DDBB71509E16B3F00284745 /* Libraries */, A2F35BBA15C5A0A100EBF632 /* Frameworks */, @@ -2085,16 +2062,6 @@ path = "third-party/libb64"; sourceTree = ""; }; - C1846B81294F774D00A98F30 /* jsonsl */ = { - isa = PBXGroup; - children = ( - C1846B85294F780700A98F30 /* jsonsl.c */, - C1846B86294F780700A98F30 /* jsonsl.h */, - ); - name = jsonsl; - path = "third-party/jsonsl"; - sourceTree = ""; - }; C1846B82294F777000A98F30 /* wildmat */ = { isa = PBXGroup; children = ( @@ -2402,14 +2369,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - C1846B8D294F796A00A98F30 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - C1846BA0294F7A6300A98F30 /* jsonsl.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; C1846B97294F7A3400A98F30 /* Headers */ = { isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; @@ -2493,7 +2452,6 @@ ); dependencies = ( C1846BA7294F7B1400A98F30 /* PBXTargetDependency */, - C1846BA5294F7B1100A98F30 /* PBXTargetDependency */, C33E46A22794B3CC0090F2AA /* PBXTargetDependency */, A226FDB10D0CDF6E005A7F71 /* PBXTargetDependency */, BE1183760CE161040002D0F3 /* PBXTargetDependency */, @@ -2688,23 +2646,6 @@ productReference = C1639A6F1A55F4D600E42033 /* libb64.a */; productType = "com.apple.product-type.library.static"; }; - C1846B90294F796A00A98F30 /* jsonsl */ = { - isa = PBXNativeTarget; - buildConfigurationList = C1846B92294F796C00A98F30 /* Build configuration list for PBXNativeTarget "jsonsl" */; - buildPhases = ( - C1846B8D294F796A00A98F30 /* Headers */, - C1846B8E294F796A00A98F30 /* Sources */, - C1846B8F294F796A00A98F30 /* Frameworks */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = jsonsl; - productName = jsonsl; - productReference = C1846B91294F796A00A98F30 /* libjsonsl.a */; - productType = "com.apple.product-type.library.static"; - }; C1846B96294F7A3400A98F30 /* wildmat */ = { isa = PBXNativeTarget; buildConfigurationList = C1846B9A294F7A3400A98F30 /* Build configuration list for PBXNativeTarget "wildmat" */; @@ -2718,7 +2659,7 @@ dependencies = ( ); name = wildmat; - productName = jsonsl; + productName = wildmat; productReference = C1846B9E294F7A3400A98F30 /* libwildmat.a */; productType = "com.apple.product-type.library.static"; }; @@ -2827,9 +2768,6 @@ C1639A6E1A55F4D600E42033 = { CreatedOnToolsVersion = 6.1.1; }; - C1846B90294F796A00A98F30 = { - CreatedOnToolsVersion = 14.1; - }; C3D9062027B7E3C900EF2386 = { CreatedOnToolsVersion = 13.0; }; @@ -2874,7 +2812,6 @@ C1639A6E1A55F4D600E42033 /* b64 */, C3CEBB9F27949CA000683BE0 /* deflate */, C3D9062027B7E3C900EF2386 /* psl */, - C1846B90294F796A00A98F30 /* jsonsl */, C1846B96294F7A3400A98F30 /* wildmat */, ); }; @@ -3328,14 +3265,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - C1846B8E294F796A00A98F30 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - C1846BA1294F7A6300A98F30 /* jsonsl.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; C1846B98294F7A3400A98F30 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -3458,11 +3387,6 @@ target = C1639A6E1A55F4D600E42033 /* b64 */; targetProxy = C165AB8C1A55FAA900D37711 /* PBXContainerItemProxy */; }; - C1846BA5294F7B1100A98F30 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = C1846B90294F796A00A98F30 /* jsonsl */; - targetProxy = C1846BA4294F7B1100A98F30 /* PBXContainerItemProxy */; - }; C1846BA7294F7B1400A98F30 /* PBXTargetDependency */ = { isa = PBXTargetDependency; target = C1846B96294F7A3400A98F30 /* wildmat */; @@ -3757,7 +3681,7 @@ "third-party/libpsl/include", "third-party/libutp/include", "third-party/utfcpp/source", - "third-party/jsonsl", + "third-party/rapidjson/include", "third-party/wildmat", ); OTHER_CFLAGS = ( @@ -3766,6 +3690,7 @@ "-D__TRANSMISSION__", "-DWIDE_INTEGER_DISABLE_FLOAT_INTEROP", "-DWIDE_INTEGER_DISABLE_IOSTREAM", + "-DRAPIDJSON_HAS_STDSTRING=1", "-DHAVE_FLOCK", "-DHAVE_STRLCPY", ); @@ -3776,7 +3701,7 @@ "third-party/fast_float/include", "third-party/fmt/include", "third-party/small/include", - "third-party/jsonsl", + "third-party/rapidjson/include", "third-party/libb64/include", "third-party/libdeflate", "third-party/libevent/include", @@ -4016,7 +3941,7 @@ "third-party/libpsl/include", "third-party/libutp/include", "third-party/utfcpp/source", - "third-party/jsonsl", + "third-party/rapidjson/include", "third-party/wildmat", ); OTHER_CFLAGS = ( @@ -4025,6 +3950,7 @@ "-D__TRANSMISSION__", "-DWIDE_INTEGER_DISABLE_FLOAT_INTEROP", "-DWIDE_INTEGER_DISABLE_IOSTREAM", + "-DRAPIDJSON_HAS_STDSTRING=1", "-DHAVE_FLOCK", "-DHAVE_STRLCPY", ); @@ -4035,7 +3961,7 @@ "third-party/fast_float/include", "third-party/fmt/include", "third-party/small/include", - "third-party/jsonsl", + "third-party/rapidjson/include", "third-party/libb64/include", "third-party/libdeflate", "third-party/libevent/include", @@ -4347,7 +4273,7 @@ "third-party/libpsl/include", "third-party/libutp/include", "third-party/utfcpp/source", - "third-party/jsonsl", + "third-party/rapidjson/include", "third-party/wildmat", ); OTHER_CFLAGS = ( @@ -4356,6 +4282,7 @@ "-D__TRANSMISSION__", "-DWIDE_INTEGER_DISABLE_FLOAT_INTEROP", "-DWIDE_INTEGER_DISABLE_IOSTREAM", + "-DRAPIDJSON_HAS_STDSTRING=1", "-DHAVE_FLOCK", "-DHAVE_STRLCPY", ); @@ -4366,7 +4293,7 @@ "third-party/fast_float/include", "third-party/fmt/include", "third-party/small/include", - "third-party/jsonsl", + "third-party/rapidjson/include", "third-party/libb64/include", "third-party/libdeflate", "third-party/libevent/include", @@ -4695,36 +4622,6 @@ }; name = Release; }; - C1846B93294F796C00A98F30 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CLANG_ENABLE_OBJC_ARC = NO; - GCC_WARN_64_TO_32_BIT_CONVERSION = NO; - GENERATE_MASTER_OBJECT_FILE = YES; - PRODUCT_NAME = jsonsl; - }; - name = Debug; - }; - C1846B94294F796C00A98F30 /* Release - Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CLANG_ENABLE_OBJC_ARC = NO; - GCC_WARN_64_TO_32_BIT_CONVERSION = NO; - GENERATE_MASTER_OBJECT_FILE = YES; - PRODUCT_NAME = jsonsl; - }; - name = "Release - Debug"; - }; - C1846B95294F796C00A98F30 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CLANG_ENABLE_OBJC_ARC = NO; - GCC_WARN_64_TO_32_BIT_CONVERSION = NO; - GENERATE_MASTER_OBJECT_FILE = YES; - PRODUCT_NAME = jsonsl; - }; - name = Release; - }; C1846B9B294F7A3400A98F30 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -5167,16 +5064,6 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Debug; }; - C1846B92294F796C00A98F30 /* Build configuration list for PBXNativeTarget "jsonsl" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C1846B93294F796C00A98F30 /* Debug */, - C1846B94294F796C00A98F30 /* Release - Debug */, - C1846B95294F796C00A98F30 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; C1846B9A294F7A3400A98F30 /* Build configuration list for PBXNativeTarget "wildmat" */ = { isa = XCConfigurationList; buildConfigurations = ( diff --git a/cmake/FindRapidJSON.cmake b/cmake/FindRapidJSON.cmake new file mode 100644 index 000000000..c33070dd5 --- /dev/null +++ b/cmake/FindRapidJSON.cmake @@ -0,0 +1,9 @@ +add_library(RapidJSON INTERFACE IMPORTED) + +target_include_directories(RapidJSON + INTERFACE + ${CMAKE_CURRENT_LIST_DIR}/../third-party/rapidjson/include) + +target_compile_definitions(RapidJSON + INTERFACE + RAPIDJSON_HAS_STDSTRING=1) diff --git a/libtransmission/CMakeLists.txt b/libtransmission/CMakeLists.txt index 82d3efebd..6ef3a9b92 100644 --- a/libtransmission/CMakeLists.txt +++ b/libtransmission/CMakeLists.txt @@ -287,7 +287,7 @@ target_link_libraries(${TR_NAME} ${LIBM_LIBRARY} ${LIBQUOTA_LIBRARY} ${TR_NETWORK_LIBRARIES} - jsonsl + RapidJSON utf8::cpp wildmat WideInteger::WideInteger diff --git a/libtransmission/variant-json.cc b/libtransmission/variant-json.cc index 6a835264b..25b76f70a 100644 --- a/libtransmission/variant-json.cc +++ b/libtransmission/variant-json.cc @@ -5,435 +5,256 @@ #include #include -#include #include /* EILSEQ, EINVAL */ -#include /* fabs() */ #include // std::byte #include // uint16_t -#include -#include -#include -#include // std::back_inserter +#include +#include #include #include +#include #include - -#define UTF_CPP_CPLUSPLUS 201703L -#include +#include #include -#include -#include +// RapidJSON SIMD optimisations +#ifdef __SSE2__ +#define RAPIDJSON_SSE2 +#endif +#ifdef __SSE4_2__ +#define RAPIDJSON_SSE42 +#endif +#ifdef __ARM_NEON +#define RAPIDJSON_NEON +#endif +#include +#include +#include +#include +#include +#include +#include +#include #define LIBTRANSMISSION_VARIANT_MODULE #include "libtransmission/error.h" #include "libtransmission/quark.h" #include "libtransmission/tr-assert.h" -#include "libtransmission/tr-buffer.h" #include "libtransmission/utils.h" #include "libtransmission/variant.h" -using namespace std::literals; - namespace { -[[nodiscard]] constexpr size_t variant_size(tr_variant const& var) noexcept -{ - switch (var.index()) - { - case tr_variant::MapIndex: - return std::size(*var.get_if()); - - case tr_variant::VectorIndex: - return std::size(*var.get_if()); - - default: - return {}; - } -} - namespace parse_helpers { -/* arbitrary value... this is much deeper than our code goes */ -auto constexpr MaxDepth = size_t{ 64 }; - -struct json_wrapper_data +struct json_to_variant_handler : public rapidjson::BaseReaderHandler<> { - bool has_content; - size_t size; - std::string_view key; - std::string keybuf; - std::string strbuf; - tr_error* error; - std::deque stack; - tr_variant* top; - bool inplace = false; + static_assert(std::is_same_v); + + explicit json_to_variant_handler(tr_variant* const top) + { + stack_.emplace(top); + } + + bool Null() + { + tr_variantInitQuark(get_leaf(), TR_KEY_NONE); + return true; + } + + bool Bool(bool const val) + { + tr_variantInitBool(get_leaf(), val); + return true; + } + + bool Int(int const val) + { + return Int64(val); + } + + bool Uint(unsigned const val) + { + return Uint64(val); + } + + bool Int64(int64_t const val) + { + tr_variantInitInt(get_leaf(), val); + return true; + } + + bool Uint64(uint64_t const val) + { + return Int64(val); + } + + bool Double(double const val) + { + tr_variantInitReal(get_leaf(), val); + return true; + } + + bool String(Ch const* const str, rapidjson::SizeType const len, bool const copy) + { + if (copy) + { + tr_variantInitStr(get_leaf(), { str, len }); + } + else + { + tr_variantInitStrView(get_leaf(), { str, len }); + } + return true; + } + + bool StartObject() + { + tr_variantInitDict(push_stack(), prealloc_guess()); + return true; + } + + bool Key(Ch const* const str, rapidjson::SizeType const len, bool const copy) + { + if (copy) + { + key_buf_.assign(str, len); + cur_key_ = key_buf_; + } + else + { + cur_key_ = std::string_view{ str, len }; + } + return true; + } + + bool EndObject(rapidjson::SizeType const len) + { + pop_stack(len); + return true; + } + + bool StartArray() + { + tr_variantInitList(push_stack(), prealloc_guess()); + return true; + } + + bool EndArray(rapidjson::SizeType const len) + { + pop_stack(len); + return true; + } + +private: + [[nodiscard]] size_t prealloc_guess() const noexcept + { + auto const depth = std::size(stack_); + return depth < MaxDepth ? prealloc_guess_[depth] : 0; + } + + tr_variant* push_stack() noexcept + { + return stack_.emplace(get_leaf()); + } + + void pop_stack(rapidjson::SizeType const len) noexcept + { +#ifdef TR_ENABLE_ASSERTS + if (auto* top = stack_.top(); top->holds_alternative()) + { + TR_ASSERT(std::size(*top->get_if()) == len); + } + else if (top->holds_alternative()) + { + TR_ASSERT(std::size(*top->get_if()) == len); + } +#endif + + auto const depth = std::size(stack_); + stack_.pop(); + TR_ASSERT(!std::empty(stack_)); + if (depth < MaxDepth) + { + prealloc_guess_[depth] = len; + } + } + + tr_variant* get_leaf() + { + auto* const parent = stack_.top(); + TR_ASSERT(parent != nullptr); + + if (parent->holds_alternative()) + { + return tr_variantListAdd(parent); + } + if (parent->holds_alternative()) + { + TR_ASSERT(!std::empty(cur_key_)); + auto tmp = std::string_view{}; + std::swap(cur_key_, tmp); + return tr_variantDictAdd(parent, tr_quark_new(tmp)); + } + + return parent; + } + + /* arbitrary value... this is much deeper than our code goes */ + static auto constexpr MaxDepth = size_t{ 64 }; /* A very common pattern is for a container's children to be similar, * e.g. they may all be objects with the same set of keys. So when * a container is popped off the stack, remember its size to use as * a preallocation heuristic for the next container at that depth. */ - std::array preallocGuess; + std::array prealloc_guess_{}; + + std::string key_buf_; + std::string_view cur_key_; + std::stack stack_; }; - -tr_variant* get_node(struct jsonsl_st* jsn) -{ - auto* data = static_cast(jsn->data); - - auto* parent = std::empty(data->stack) ? nullptr : data->stack.back(); - - tr_variant* node = nullptr; - if (parent == nullptr) - { - node = data->top; - } - else if (parent->holds_alternative()) - { - node = tr_variantListAdd(parent); - } - else if (parent->holds_alternative() && !std::empty(data->key)) - { - node = tr_variantDictAdd(parent, tr_quark_new(data->key)); - data->key = ""sv; - } - - return node; -} - -void error_handler(jsonsl_t jsn, jsonsl_error_t error, jsonsl_state_st* /*state*/, jsonsl_char_t const* buf) -{ - auto* data = static_cast(jsn->data); - - tr_error_set( - &data->error, - EILSEQ, - fmt::format( - _("Couldn't parse JSON at position {position} '{text}': {error} ({error_code})"), - fmt::arg("position", jsn->pos), - fmt::arg("text", std::string_view{ buf, std::min(size_t{ 16U }, data->size - jsn->pos) }), - fmt::arg("error", jsonsl_strerror(error)), - fmt::arg("error_code", static_cast(error)))); -} - -int error_callback(jsonsl_t jsn, jsonsl_error_t error, struct jsonsl_state_st* state, jsonsl_char_t* at) -{ - error_handler(jsn, error, state, at); - return 0; /* bail */ -} - -void action_callback_PUSH(jsonsl_t jsn, jsonsl_action_t /*action*/, struct jsonsl_state_st* state, jsonsl_char_t const* /*buf*/) -{ - auto* const data = static_cast(jsn->data); - - if ((state->type == JSONSL_T_LIST) || (state->type == JSONSL_T_OBJECT)) - { - data->has_content = true; - tr_variant* node = get_node(jsn); - data->stack.push_back(node); - - size_t const depth = std::size(data->stack); - size_t const n = depth < MaxDepth ? data->preallocGuess[depth] : 0; - if (state->type == JSONSL_T_LIST) - { - tr_variantInitList(node, n); - } - else - { - tr_variantInitDict(node, n); - } - } -} - -/* like sscanf(in+2, "%4x", &val) but less slow */ -[[nodiscard]] constexpr bool decode_hex_string(char const* in, std::uint16_t& setme) -{ - TR_ASSERT(in != nullptr); - - unsigned int val = 0; - char const* const end = in + 6; - - TR_ASSERT(in[0] == '\\'); - TR_ASSERT(in[1] == 'u'); - in += 2; - - do - { - val <<= 4; - - if ('0' <= *in && *in <= '9') - { - val += *in - '0'; - } - else if ('a' <= *in && *in <= 'f') - { - val += *in - 'a' + 10U; - } - else if ('A' <= *in && *in <= 'F') - { - val += *in - 'A' + 10U; - } - else - { - return false; - } - } while (++in != end); - - setme = val; - return true; -} - -template -void decode_single_uchar(char const*& in, char const* const in_end, Iter& buf16_out_it) -{ - static auto constexpr EscapedUcharLength = 6U; - if (in_end - in >= EscapedUcharLength && decode_hex_string(in, *buf16_out_it)) - { - in += EscapedUcharLength; - ++buf16_out_it; - } -} - -[[nodiscard]] bool decode_escaped_uchar_sequence(char const*& in, char const* const in_end, std::string& buf) -{ - auto buf16 = std::array{}; - auto buf16_out_it = std::begin(buf16); - - decode_single_uchar(in, in_end, buf16_out_it); - if (in[0] == '\\' && in[1] == 'u') - { - decode_single_uchar(in, in_end, buf16_out_it); - } - - if (buf16_out_it == std::begin(buf16)) - { - return false; - } - - try - { - utf8::utf16to8(std::begin(buf16), buf16_out_it, std::back_inserter(buf)); - } - catch (utf8::exception const&) // invalid codepoint - { - buf.push_back('?'); - } - - return true; -} - -[[nodiscard]] std::string_view extract_escaped_string(char const* in, size_t in_len, std::string& buf) -{ - char const* const in_end = in + in_len; - - buf.clear(); - - while (in < in_end) - { - bool unescaped = false; - - if (*in == '\\' && in_end - in >= 2) - { - switch (in[1]) - { - case 'b': - buf.push_back('\b'); - in += 2; - unescaped = true; - break; - - case 'f': - buf.push_back('\f'); - in += 2; - unescaped = true; - break; - - case 'n': - buf.push_back('\n'); - in += 2; - unescaped = true; - break; - - case 'r': - buf.push_back('\r'); - in += 2; - unescaped = true; - break; - - case 't': - buf.push_back('\t'); - in += 2; - unescaped = true; - break; - - case '/': - buf.push_back('/'); - in += 2; - unescaped = true; - break; - - case '"': - buf.push_back('"'); - in += 2; - unescaped = true; - break; - - case '\\': - buf.push_back('\\'); - in += 2; - unescaped = true; - break; - - case 'u': - if (decode_escaped_uchar_sequence(in, in_end, buf)) - { - unescaped = true; - break; - } - } - } - - if (!unescaped) - { - buf.push_back(*in); - ++in; - } - } - - return buf; -} - -[[nodiscard]] std::pair extract_string(jsonsl_t jsn, struct jsonsl_state_st* state, std::string& buf) -{ - // figure out where the string is - char const* in_begin = jsn->base + state->pos_begin; - if (*in_begin == '"') - { - in_begin++; - } - - char const* const in_end = jsn->base + state->pos_cur; - size_t const in_len = in_end - in_begin; - if (memchr(in_begin, '\\', in_len) == nullptr) - { - /* it's not escaped */ - return std::make_pair(std::string_view{ in_begin, in_len }, true); - } - - return std::make_pair(extract_escaped_string(in_begin, in_len, buf), false); -} - -void action_callback_POP(jsonsl_t jsn, jsonsl_action_t /*action*/, struct jsonsl_state_st* state, jsonsl_char_t const* /*buf*/) -{ - auto* data = static_cast(jsn->data); - - if (state->type == JSONSL_T_STRING) - { - auto const [str, inplace] = extract_string(jsn, state, data->strbuf); - if (inplace && data->inplace) - { - tr_variantInitStrView(get_node(jsn), str); - } - else - { - tr_variantInitStr(get_node(jsn), str); - } - data->has_content = true; - } - else if (state->type == JSONSL_T_HKEY) - { - data->has_content = true; - auto const [key, inplace] = extract_string(jsn, state, data->keybuf); - data->key = key; - } - else if (state->type == JSONSL_T_LIST || state->type == JSONSL_T_OBJECT) - { - auto const depth = std::size(data->stack); - auto const* const v = data->stack.back(); - data->stack.pop_back(); - if (depth < MaxDepth) - { - data->preallocGuess[depth] = variant_size(*v); - } - } - else if (state->type == JSONSL_T_SPECIAL) - { - if ((state->special_flags & JSONSL_SPECIALf_NUMNOINT) != 0) - { - auto sv = std::string_view{ jsn->base + state->pos_begin, jsn->pos - state->pos_begin }; - tr_variantInitReal(get_node(jsn), tr_num_parse(sv).value_or(0.0)); - } - else if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) != 0) - { - char const* begin = jsn->base + state->pos_begin; - data->has_content = true; - tr_variantInitInt(get_node(jsn), std::strtoll(begin, nullptr, 10)); - } - else if ((state->special_flags & JSONSL_SPECIALf_BOOLEAN) != 0) - { - bool const b = (state->special_flags & JSONSL_SPECIALf_TRUE) != 0; - data->has_content = true; - tr_variantInitBool(get_node(jsn), b); - } - else if ((state->special_flags & JSONSL_SPECIALf_NULL) != 0) - { - data->has_content = true; - tr_variantInitQuark(get_node(jsn), TR_KEY_NONE); - } - } -} - } // namespace parse_helpers } // namespace std::optional tr_variant_serde::parse_json(std::string_view input) { - using namespace parse_helpers; + auto* const begin = std::data(input); + TR_ASSERT(begin != nullptr); // RapidJSON will dereference a nullptr if this is false + auto const size = std::size(input); auto top = tr_variant{}; - auto data = json_wrapper_data{}; - data.error = nullptr; - data.size = std::size(input); - data.has_content = false; - data.key = ""sv; - data.inplace = parse_inplace_; - data.preallocGuess = {}; - data.stack = {}; - data.top = ⊤ + auto handler = parse_helpers::json_to_variant_handler{ &top }; + auto ms = rapidjson::MemoryStream{ begin, size }; + auto eis = rapidjson::AutoUTFInputStream{ ms }; + auto reader = rapidjson::GenericReader, rapidjson::UTF8>{}; + reader.Parse(eis, handler); - auto jsn = jsonsl_new(MaxDepth); - jsn->action_callback_PUSH = action_callback_PUSH; - jsn->action_callback_POP = action_callback_POP; - jsn->error_callback = error_callback; - jsn->data = &data; - jsonsl_enable_all_callbacks(jsn); - - // parse it - jsonsl_feed(jsn, static_cast(std::data(input)), std::size(input)); - - // EINVAL if there was no content - if (data.error == nullptr && !data.has_content) - { - tr_error_set(&data.error, EINVAL, "No content"); - } - - end_ = std::data(input) + jsn->pos; - - if (data.error != nullptr) - { - tr_error_propagate(&error_, &data.error); - } - - // cleanup - jsonsl_destroy(jsn); - - if (error_ == nullptr) + if (!reader.HasParseError()) { return std::optional{ std::move(top) }; } + if (auto err_code = reader.GetParseErrorCode(); err_code == rapidjson::kParseErrorDocumentEmpty) + { + tr_error_set(&error_, EINVAL, "No content"); + } + else + { + auto const err_offset = reader.GetErrorOffset(); + tr_error_set( + &error_, + EILSEQ, + fmt::format( + _("Couldn't parse JSON at position {position} '{text}': {error} ({error_code})"), + fmt::arg("position", err_offset), + fmt::arg("text", std::string_view{ begin + err_offset, std::min(size_t{ 16U }, size - err_offset) }), + fmt::arg("error", rapidjson::GetParseError_En(err_code)), + fmt::arg("error_code", static_cast>(err_code)))); + } + return {}; } @@ -443,251 +264,105 @@ namespace { namespace to_string_helpers { -struct ParentState +// implements RapidJSON's Stream concept, so that the library can output +// directly to a std::string, and we can avoid some copying by copy elision +// http://rapidjson.org/md_doc_stream.html +struct string_output_stream { - bool is_map = false; - bool is_list = false; - size_t child_index; - size_t child_count; + using Ch = char; + + explicit string_output_stream(std::string& str) + : str_ref_(str) + { + } + + [[nodiscard]] static Ch Peek() + { + TR_ASSERT(false); + return 0; + } + + [[nodiscard]] static Ch Take() + { + TR_ASSERT(false); + return 0; + } + + static size_t Tell() + { + TR_ASSERT(false); + return 0U; + } + + static Ch* PutBegin() + { + TR_ASSERT(false); + return nullptr; + } + + void Put(Ch const c) + { + str_ref_ += c; + } + + static void Flush() + { + } + + static size_t PutEnd(Ch* /*begin*/) + { + TR_ASSERT(false); + return 0U; + } + +private: + std::string& str_ref_; }; -struct JsonWalk -{ - explicit JsonWalk(bool do_indent) - : doIndent{ do_indent } - { - } - - std::deque parents; - libtransmission::StackBuffer<1024U * 8U, std::byte> out; - bool doIndent; -}; - -void jsonIndent(struct JsonWalk* data) -{ - static auto buf = std::array{}; - - if (buf.front() == '\0') - { - memset(std::data(buf), ' ', std::size(buf)); - buf[0] = '\n'; - } - - if (data->doIndent) - { - data->out.add(std::data(buf), std::size(data->parents) * 4 + 1); - } -} - -void jsonChildFunc(struct JsonWalk* data) -{ - if (std::empty(data->parents)) - { - return; - } - - auto& parent_state = data->parents.back(); - - if (parent_state.is_map) - { - int const i = parent_state.child_index; - ++parent_state.child_index; - - if (i % 2 == 0) - { - data->out.add(data->doIndent ? ": "sv : ":"sv); - } - else - { - bool const is_last = parent_state.child_index == parent_state.child_count; - if (!is_last) - { - data->out.push_back(','); - jsonIndent(data); - } - } - } - else if (parent_state.is_list) - { - ++parent_state.child_index; - if (bool const is_last = parent_state.child_index == parent_state.child_count; !is_last) - { - data->out.push_back(','); - jsonIndent(data); - } - } -} - -void jsonPushParent(struct JsonWalk* data, tr_variant const& v) -{ - auto const is_dict = v.holds_alternative(); - auto const is_list = v.holds_alternative(); - auto const n_children = variant_size(v) * (is_dict ? 2U : 1U); - data->parents.push_back({ is_dict, is_list, 0, n_children }); -} - -void jsonPopParent(struct JsonWalk* data) -{ - data->parents.pop_back(); -} +using writer_var_t = std::variant, rapidjson::PrettyWriter>; void jsonIntFunc(tr_variant const& /*var*/, int64_t const val, void* vdata) { - auto buf = std::array{}; - auto const* const out = fmt::format_to(std::data(buf), FMT_COMPILE("{:d}"), val); - auto* const data = static_cast(vdata); - data->out.add(std::data(buf), static_cast(out - std::data(buf))); - jsonChildFunc(data); + std::visit([val](auto&& writer) { writer.Int64(val); }, *static_cast(vdata)); } void jsonBoolFunc(tr_variant const& /*var*/, bool const val, void* vdata) { - auto* data = static_cast(vdata); - data->out.add(val ? "true"sv : "false"sv); - jsonChildFunc(data); + std::visit([val](auto&& writer) { writer.Bool(val); }, *static_cast(vdata)); } void jsonRealFunc(tr_variant const& /*var*/, double const val, void* vdata) { - auto* const data = static_cast(vdata); - - auto const [buf, buflen] = data->out.reserve_space(64); - auto* walk = reinterpret_cast(buf); - auto const* const begin = walk; - - if (fabs(val - (int)val) < 0.00001) - { - walk = fmt::format_to(walk, FMT_COMPILE("{:.0f}"), val); - } - else - { - walk = fmt::format_to(walk, FMT_COMPILE("{:.4f}"), val); - } - - data->out.commit_space(walk - begin); - - jsonChildFunc(data); + std::visit([val](auto&& writer) { writer.Double(val); }, *static_cast(vdata)); } -// https://datatracker.ietf.org/doc/html/rfc8259#section-7 void jsonStringFunc(tr_variant const& /*var*/, std::string_view sv, void* vdata) { - auto* const data = static_cast(vdata); - - auto const utf8_str = tr_strv_convert_utf8(sv); - auto utf8_sv = std::string_view{ utf8_str }; - - auto& out = data->out; - auto const [buf, buflen] = out.reserve_space(std::size(utf8_sv) * 6 + 2); - auto* walk = reinterpret_cast(buf); - auto const* const begin = walk; - auto const* const end = begin + buflen; - - *walk++ = '"'; - - for (; !std::empty(utf8_sv); utf8_sv.remove_prefix(1)) - { - switch (utf8_sv.front()) - { - case '"': - *walk++ = '\\'; - *walk++ = '"'; - break; - - case '\\': - *walk++ = '\\'; - *walk++ = '\\'; - break; - - case '\b': - *walk++ = '\\'; - *walk++ = 'b'; - break; - - case '\f': - *walk++ = '\\'; - *walk++ = 'f'; - break; - - case '\n': - *walk++ = '\\'; - *walk++ = 'n'; - break; - - case '\r': - *walk++ = '\\'; - *walk++ = 'r'; - break; - - case '\t': - *walk++ = '\\'; - *walk++ = 't'; - break; - - default: - if (utf8_sv.front() >= '\u0000' && utf8_sv.front() <= '\u001f') - { - walk = fmt::format_to_n(walk, end - walk - 1, "\\u{:04x}", utf8_sv.front()).out; - } - else - { - *walk++ = utf8_sv.front(); - } - break; - } - } - - *walk++ = '"'; - TR_ASSERT(walk <= end); - out.commit_space(walk - begin); - - jsonChildFunc(data); + std::visit([sv](auto&& writer) { writer.String(std::data(sv), std::size(sv)); }, *static_cast(vdata)); } -void jsonDictBeginFunc(tr_variant const& var, void* vdata) +void jsonDictBeginFunc(tr_variant const& /*var*/, void* vdata) { - auto* const data = static_cast(vdata); - - jsonPushParent(data, var); - data->out.push_back('{'); - - if (variant_size(var) != 0U) - { - jsonIndent(data); - } + std::visit([](auto&& writer) { writer.StartObject(); }, *static_cast(vdata)); } -void jsonListBeginFunc(tr_variant const& var, void* vdata) +void jsonListBeginFunc(tr_variant const& /*var*/, void* vdata) { - auto* const data = static_cast(vdata); - - jsonPushParent(data, var); - data->out.push_back('['); - - if (variant_size(var) != 0U) - { - jsonIndent(data); - } + std::visit([](auto&& writer) { writer.StartArray(); }, *static_cast(vdata)); } void jsonContainerEndFunc(tr_variant const& var, void* vdata) { - auto* const data = static_cast(vdata); - - jsonPopParent(data); - - jsonIndent(data); + auto& writer_var = *static_cast(vdata); if (var.holds_alternative()) { - data->out.push_back('}'); + std::visit([](auto&& writer) { writer.EndObject(); }, writer_var); } else /* list */ { - data->out.push_back(']'); + std::visit([](auto&& writer) { writer.EndArray(); }, writer_var); } - - jsonChildFunc(data); } } // namespace to_string_helpers @@ -707,13 +382,19 @@ std::string tr_variant_serde::to_json_string(tr_variant const& var) const jsonContainerEndFunc, // }; - auto data = JsonWalk{ !compact_ }; - walk(var, Funcs, &data, true); - - auto& buf = data.out; - if (!compact_ && !std::empty(buf)) + auto out = std::string{}; + out.reserve(rapidjson::StringBuffer::kDefaultCapacity); + auto stream = string_output_stream{ out }; + auto writer = writer_var_t{}; + if (compact_) { - buf.push_back('\n'); + writer.emplace<0>(stream); } - return buf.to_string(); + else + { + writer.emplace<1>(stream); + } + walk(var, Funcs, &writer, true); + + return out; } diff --git a/tests/libtransmission/variant-test.cc b/tests/libtransmission/variant-test.cc index 4adde30ce..225f8b5b1 100644 --- a/tests/libtransmission/variant-test.cc +++ b/tests/libtransmission/variant-test.cc @@ -26,17 +26,7 @@ using namespace std::literals; -class VariantTest : public ::testing::Test -{ -protected: - static std::string stripWhitespace(std::string const& in) - { - auto s = in; - s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace)); - s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end()); - return s; - } -}; +using VariantTest = ::testing::Test; #ifndef _WIN32 #define STACK_SMASH_DEPTH (1 * 1000 * 1000) @@ -363,7 +353,7 @@ TEST_F(VariantTest, bencToJson) for (auto const& test : Tests) { auto top = benc_serde.parse(test.benc).value_or(tr_variant{}); - EXPECT_EQ(test.expected, stripWhitespace(json_serde.to_string(top))); + EXPECT_EQ(test.expected, json_serde.to_string(top)); } } diff --git a/third-party/jsonsl/CMakeLists.txt b/third-party/jsonsl/CMakeLists.txt deleted file mode 100644 index c3924a409..000000000 --- a/third-party/jsonsl/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_library(jsonsl STATIC - jsonsl.c - jsonsl.h) - -target_include_directories(jsonsl - PUBLIC - ${CMAKE_CURRENT_LIST_DIR}) diff --git a/third-party/jsonsl/jsonsl.c b/third-party/jsonsl/jsonsl.c deleted file mode 100644 index 84860e705..000000000 --- a/third-party/jsonsl/jsonsl.c +++ /dev/null @@ -1,1668 +0,0 @@ -/* https://github.com/mnunberg/jsonsl */ - -/* Copyright © 2012-2015 Mark Nunberg. - * - * See included LICENSE file for license details. - */ - -#include "jsonsl.h" -#include -#include - -#ifdef JSONSL_USE_METRICS -#define XMETRICS \ - X(STRINGY_INSIGNIFICANT) \ - X(STRINGY_SLOWPATH) \ - X(ALLOWED_WHITESPACE) \ - X(QUOTE_FASTPATH) \ - X(SPECIAL_FASTPATH) \ - X(SPECIAL_WSPOP) \ - X(SPECIAL_SLOWPATH) \ - X(GENERIC) \ - X(STRUCTURAL_TOKEN) \ - X(SPECIAL_SWITCHFIRST) \ - X(STRINGY_CATCH) \ - X(NUMBER_FASTPATH) \ - X(ESCAPES) \ - X(TOTAL) \ - -struct jsonsl_metrics_st { -#define X(m) \ - unsigned long metric_##m; - XMETRICS -#undef X -}; - -static struct jsonsl_metrics_st GlobalMetrics = { 0 }; -static unsigned long GenericCounter[0x100] = { 0 }; -static unsigned long StringyCatchCounter[0x100] = { 0 }; - -#define INCR_METRIC(m) \ - GlobalMetrics.metric_##m++; - -#define INCR_GENERIC(c) \ - INCR_METRIC(GENERIC); \ - GenericCounter[c]++; \ - -#define INCR_STRINGY_CATCH(c) \ - INCR_METRIC(STRINGY_CATCH); \ - StringyCatchCounter[c]++; - -JSONSL_API -void jsonsl_dump_global_metrics(void) -{ - int ii; - printf("JSONSL Metrics:\n"); -#define X(m) \ - printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \ - (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100); - XMETRICS -#undef X - printf("Generic Characters:\n"); - for (ii = 0; ii < 0xff; ii++) { - if (GenericCounter[ii]) { - printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]); - } - } - printf("Weird string loop\n"); - for (ii = 0; ii < 0xff; ii++) { - if (StringyCatchCounter[ii]) { - printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]); - } - } -} - -#else -#define INCR_METRIC(m) -#define INCR_GENERIC(c) -#define INCR_STRINGY_CATCH(c) -JSONSL_API -void jsonsl_dump_global_metrics(void) { } -#endif /* JSONSL_USE_METRICS */ - -#define CASE_DIGITS \ -case '1': \ -case '2': \ -case '3': \ -case '4': \ -case '5': \ -case '6': \ -case '7': \ -case '8': \ -case '9': \ -case '0': - -static unsigned extract_special(unsigned); -static int is_special_end(unsigned); -static int is_allowed_whitespace(unsigned); -static int is_allowed_escape(unsigned); -static int is_simple_char(unsigned); -static char get_escape_equiv(unsigned); - -JSONSL_API -jsonsl_t jsonsl_new(int nlevels) -{ - unsigned int ii; - struct jsonsl_st * jsn; - - if (nlevels < 2) { - return NULL; - } - - jsn = (struct jsonsl_st *) - calloc(1, sizeof (*jsn) + - ( (nlevels-1) * sizeof (struct jsonsl_state_st) ) - ); - - jsn->levels_max = (unsigned int) nlevels; - jsn->max_callback_level = UINT_MAX; - jsonsl_reset(jsn); - for (ii = 0; ii < jsn->levels_max; ii++) { - jsn->stack[ii].level = ii; - } - return jsn; -} - -JSONSL_API -void jsonsl_reset(jsonsl_t jsn) -{ - jsn->tok_last = 0; - jsn->can_insert = 1; - jsn->pos = 0; - jsn->level = 0; - jsn->stopfl = 0; - jsn->in_escape = 0; - jsn->expecting = 0; -} - -JSONSL_API -void jsonsl_destroy(jsonsl_t jsn) -{ - if (jsn) { - free(jsn); - } -} - - -#define FASTPARSE_EXHAUSTED 1 -#define FASTPARSE_BREAK 0 - -/* - * This function is meant to accelerate string parsing, reducing the main loop's - * check if we are indeed a string. - * - * @param jsn the parser - * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position) - * @param[in,out] nbytes_p A pointer to the current size of the buffer - * @return true if all bytes have been exhausted (and thus the main loop can - * return), false if a special character was examined which requires greater - * examination. - */ -static int -jsonsl__str_fastparse(jsonsl_t jsn, - const jsonsl_uchar_t **bytes_p, size_t *nbytes_p) -{ - const jsonsl_uchar_t *bytes = *bytes_p; - const jsonsl_uchar_t *end; - for (end = bytes + *nbytes_p; bytes != end; bytes++) { - if ( -#ifdef JSONSL_USE_WCHAR - *bytes >= 0x100 || -#endif /* JSONSL_USE_WCHAR */ - (is_simple_char(*bytes))) { - INCR_METRIC(TOTAL); - INCR_METRIC(STRINGY_INSIGNIFICANT); - } else { - /* Once we're done here, re-calculate the position variables */ - jsn->pos += (bytes - *bytes_p); - *nbytes_p -= (bytes - *bytes_p); - *bytes_p = bytes; - return FASTPARSE_BREAK; - } - } - - /* Once we're done here, re-calculate the position variables */ - jsn->pos += (bytes - *bytes_p); - return FASTPARSE_EXHAUSTED; -} - -/* Functions exactly like str_fastparse, except it also accepts a 'state' - * argument, since the number's value is updated in the state. */ -static int -jsonsl__num_fastparse(jsonsl_t jsn, - const jsonsl_uchar_t **bytes_p, size_t *nbytes_p, - struct jsonsl_state_st *state) -{ - int exhausted = 1; - size_t nbytes = *nbytes_p; - const jsonsl_uchar_t *bytes = *bytes_p; - - for (; nbytes; nbytes--, bytes++) { - jsonsl_uchar_t c = *bytes; - if (isdigit(c)) { - INCR_METRIC(TOTAL); - INCR_METRIC(NUMBER_FASTPATH); - state->nelem = (state->nelem * 10) + (c - 0x30); - } else { - exhausted = 0; - break; - } - } - jsn->pos += (*nbytes_p - nbytes); - if (exhausted) { - return FASTPARSE_EXHAUSTED; - } - *nbytes_p = nbytes; - *bytes_p = bytes; - return FASTPARSE_BREAK; -} - -JSONSL_API -void -jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) -{ - -#define INVOKE_ERROR(eb) \ - if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \ - goto GT_AGAIN; \ - } \ - return; - -#define STACK_PUSH \ - if (jsn->level >= (levels_max-1)) { \ - jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \ - return; \ - } \ - state = jsn->stack + (++jsn->level); \ - state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \ - state->pos_begin = jsn->pos; - -#define STACK_POP_NOPOS \ - state->pos_cur = jsn->pos; \ - state = jsn->stack + (--jsn->level); - - -#define STACK_POP \ - STACK_POP_NOPOS; \ - state->pos_cur = jsn->pos; - -#define CALLBACK_AND_POP_NOPOS(T) \ - state->pos_cur = jsn->pos; \ - DO_CALLBACK(T, POP); \ - state->nescapes = 0; \ - state = jsn->stack + (--jsn->level); - -#define CALLBACK_AND_POP(T) \ - CALLBACK_AND_POP_NOPOS(T); \ - state->pos_cur = jsn->pos; - -#define SPECIAL_POP \ - CALLBACK_AND_POP(SPECIAL); \ - jsn->expecting = 0; \ - jsn->tok_last = 0; \ - -#define CUR_CHAR (*(jsonsl_uchar_t*)c) - -#define DO_CALLBACK(T, action) \ - if (jsn->call_##T && \ - jsn->max_callback_level > state->level && \ - state->ignore_callback == 0) { \ - \ - if (jsn->action_callback_##action) { \ - jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \ - } else if (jsn->action_callback) { \ - jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \ - } \ - if (jsn->stopfl) { return; } \ - } - - /** - * Verifies that we are able to insert the (non-string) item into a hash. - */ -#define ENSURE_HVAL \ - if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \ - INVOKE_ERROR(HKEY_EXPECTED); \ - } - -#define VERIFY_SPECIAL(lit) \ - if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \ - INVOKE_ERROR(SPECIAL_EXPECTED); \ - } - -#define VERIFY_SPECIAL_CI(lit) \ - if (tolower(CUR_CHAR) != (lit)[jsn->pos - state->pos_begin]) { \ - INVOKE_ERROR(SPECIAL_EXPECTED); \ - } - -#define STATE_SPECIAL_LENGTH \ - (state)->nescapes - -#define IS_NORMAL_NUMBER \ - ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \ - (state)->special_flags == JSONSL_SPECIALf_SIGNED) - -#define STATE_NUM_LAST jsn->tok_last - -#define CONTINUE_NEXT_CHAR() continue - - const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes; - size_t levels_max = jsn->levels_max; - struct jsonsl_state_st *state = jsn->stack + jsn->level; - jsn->base = bytes; - - for (; nbytes; nbytes--, jsn->pos++, c++) { - unsigned state_type; - INCR_METRIC(TOTAL); - - GT_AGAIN: - state_type = state->type; - /* Most common type is typically a string: */ - if (state_type & JSONSL_Tf_STRINGY) { - /* Special escape handling for some stuff */ - if (jsn->in_escape) { - jsn->in_escape = 0; - if (!is_allowed_escape(CUR_CHAR)) { - INVOKE_ERROR(ESCAPE_INVALID); - } else if (CUR_CHAR == 'u') { - DO_CALLBACK(UESCAPE, UESCAPE); - if (jsn->return_UESCAPE) { - return; - } - } - CONTINUE_NEXT_CHAR(); - } - - if (jsonsl__str_fastparse(jsn, &c, &nbytes) == - FASTPARSE_EXHAUSTED) { - /* No need to readjust variables as we've exhausted the iterator */ - return; - } else { - if (CUR_CHAR == '"') { - goto GT_QUOTE; - } else if (CUR_CHAR == '\\') { - goto GT_ESCAPE; - } else { - INVOKE_ERROR(WEIRD_WHITESPACE); - } - } - INCR_METRIC(STRINGY_SLOWPATH); - - } else if (state_type == JSONSL_T_SPECIAL) { - /* Fast track for signed/unsigned */ - if (IS_NORMAL_NUMBER) { - if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) == - FASTPARSE_EXHAUSTED) { - return; - } else { - goto GT_SPECIAL_NUMERIC; - } - } else if (state->special_flags == JSONSL_SPECIALf_DASH) { -#ifdef JSONSL_PARSE_NAN - if (CUR_CHAR == 'I' || CUR_CHAR == 'i') { - /* parsing -Infinity? */ - state->special_flags = JSONSL_SPECIALf_NEG_INF; - CONTINUE_NEXT_CHAR(); - } -#endif - - if (!isdigit(CUR_CHAR)) { - INVOKE_ERROR(INVALID_NUMBER); - } - - if (CUR_CHAR == '0') { - state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED; - } else if (isdigit(CUR_CHAR)) { - state->special_flags = JSONSL_SPECIALf_SIGNED; - state->nelem = CUR_CHAR - 0x30; - } else { - INVOKE_ERROR(INVALID_NUMBER); - } - CONTINUE_NEXT_CHAR(); - - } else if (state->special_flags == JSONSL_SPECIALf_ZERO) { - if (isdigit(CUR_CHAR)) { - /* Following a zero! */ - INVOKE_ERROR(INVALID_NUMBER); - } - /* Unset the 'zero' flag: */ - if (state->special_flags & JSONSL_SPECIALf_SIGNED) { - state->special_flags = JSONSL_SPECIALf_SIGNED; - } else { - state->special_flags = JSONSL_SPECIALf_UNSIGNED; - } - goto GT_SPECIAL_NUMERIC; - } - - if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) && - !(state->special_flags & JSONSL_SPECIALf_INF)) { - GT_SPECIAL_NUMERIC: - switch (CUR_CHAR) { - CASE_DIGITS - STATE_NUM_LAST = '1'; - CONTINUE_NEXT_CHAR(); - - case '.': - if (state->special_flags & JSONSL_SPECIALf_FLOAT) { - INVOKE_ERROR(INVALID_NUMBER); - } - state->special_flags |= JSONSL_SPECIALf_FLOAT; - STATE_NUM_LAST = '.'; - CONTINUE_NEXT_CHAR(); - - case 'e': - case 'E': - if (state->special_flags & JSONSL_SPECIALf_EXPONENT) { - INVOKE_ERROR(INVALID_NUMBER); - } - state->special_flags |= JSONSL_SPECIALf_EXPONENT; - STATE_NUM_LAST = 'e'; - CONTINUE_NEXT_CHAR(); - - case '-': - case '+': - if (STATE_NUM_LAST != 'e') { - INVOKE_ERROR(INVALID_NUMBER); - } - STATE_NUM_LAST = '-'; - CONTINUE_NEXT_CHAR(); - - default: - if (is_special_end(CUR_CHAR)) { - goto GT_SPECIAL_POP; - } - INVOKE_ERROR(INVALID_NUMBER); - break; - } - } - /* else if (!NUMERIC) */ - if (!is_special_end(CUR_CHAR)) { - STATE_SPECIAL_LENGTH++; - - /* Verify TRUE, FALSE, NULL */ - if (state->special_flags == JSONSL_SPECIALf_TRUE) { - VERIFY_SPECIAL("true"); - } else if (state->special_flags == JSONSL_SPECIALf_FALSE) { - VERIFY_SPECIAL("false"); - } else if (state->special_flags == JSONSL_SPECIALf_NULL) { - VERIFY_SPECIAL("null"); -#ifdef JSONSL_PARSE_NAN - } else if (state->special_flags == JSONSL_SPECIALf_POS_INF) { - VERIFY_SPECIAL_CI("infinity"); - } else if (state->special_flags == JSONSL_SPECIALf_NEG_INF) { - VERIFY_SPECIAL_CI("-infinity"); - } else if (state->special_flags == JSONSL_SPECIALf_NAN) { - VERIFY_SPECIAL_CI("nan"); - } else if (state->special_flags & JSONSL_SPECIALf_NULL || - state->special_flags & JSONSL_SPECIALf_NAN) { - /* previous char was "n", are we parsing null or nan? */ - if (CUR_CHAR != 'u') { - state->special_flags &= ~JSONSL_SPECIALf_NULL; - } - - if (tolower(CUR_CHAR) != 'a') { - state->special_flags &= ~JSONSL_SPECIALf_NAN; - } -#endif - } - INCR_METRIC(SPECIAL_FASTPATH); - CONTINUE_NEXT_CHAR(); - } - - GT_SPECIAL_POP: - jsn->can_insert = 0; - if (IS_NORMAL_NUMBER) { - /* Nothing */ - } else if (state->special_flags == JSONSL_SPECIALf_ZERO || - state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) { - /* 0 is unsigned! */ - state->special_flags = JSONSL_SPECIALf_UNSIGNED; - } else if (state->special_flags == JSONSL_SPECIALf_DASH) { - /* Still in dash! */ - INVOKE_ERROR(INVALID_NUMBER); - } else if (state->special_flags & JSONSL_SPECIALf_INF) { - if (STATE_SPECIAL_LENGTH != 8) { - INVOKE_ERROR(SPECIAL_INCOMPLETE); - } - state->nelem = 1; - } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) { - /* Check that we're not at the end of a token */ - if (STATE_NUM_LAST != '1') { - INVOKE_ERROR(INVALID_NUMBER); - } - } else if (state->special_flags == JSONSL_SPECIALf_TRUE) { - if (STATE_SPECIAL_LENGTH != 4) { - INVOKE_ERROR(SPECIAL_INCOMPLETE); - } - state->nelem = 1; - } else if (state->special_flags == JSONSL_SPECIALf_FALSE) { - if (STATE_SPECIAL_LENGTH != 5) { - INVOKE_ERROR(SPECIAL_INCOMPLETE); - } - } else if (state->special_flags == JSONSL_SPECIALf_NULL) { - if (STATE_SPECIAL_LENGTH != 4) { - INVOKE_ERROR(SPECIAL_INCOMPLETE); - } - } - SPECIAL_POP; - jsn->expecting = ','; - if (is_allowed_whitespace(CUR_CHAR)) { - CONTINUE_NEXT_CHAR(); - } - /** - * This works because we have a non-whitespace token - * which is not a special token. If this is a structural - * character then it will be gracefully handled by the - * switch statement. Otherwise it will default to the 'special' - * state again, - */ - goto GT_STRUCTURAL_TOKEN; - } else if (is_allowed_whitespace(CUR_CHAR)) { - INCR_METRIC(ALLOWED_WHITESPACE); - /* So we're not special. Harmless insignificant whitespace - * passthrough - */ - CONTINUE_NEXT_CHAR(); - } else if (extract_special(CUR_CHAR)) { - /* not a string, whitespace, or structural token. must be special */ - goto GT_SPECIAL_BEGIN; - } - - INCR_GENERIC(CUR_CHAR); - - if (CUR_CHAR == '"') { - GT_QUOTE: - jsn->can_insert = 0; - switch (state_type) { - - /* the end of a string or hash key */ - case JSONSL_T_STRING: - CALLBACK_AND_POP(STRING); - CONTINUE_NEXT_CHAR(); - case JSONSL_T_HKEY: - CALLBACK_AND_POP(HKEY); - CONTINUE_NEXT_CHAR(); - - case JSONSL_T_OBJECT: - state->nelem++; - if ( (state->nelem-1) % 2 ) { - /* Odd, this must be a hash value */ - if (jsn->tok_last != ':') { - INVOKE_ERROR(MISSING_TOKEN); - } - jsn->expecting = ','; /* Can't figure out what to expect next */ - jsn->tok_last = 0; - - STACK_PUSH; - state->type = JSONSL_T_STRING; - DO_CALLBACK(STRING, PUSH); - - } else { - /* hash key */ - if (jsn->expecting != '"') { - INVOKE_ERROR(STRAY_TOKEN); - } - jsn->tok_last = 0; - jsn->expecting = ':'; - - STACK_PUSH; - state->type = JSONSL_T_HKEY; - DO_CALLBACK(HKEY, PUSH); - } - CONTINUE_NEXT_CHAR(); - - case JSONSL_T_LIST: - state->nelem++; - STACK_PUSH; - state->type = JSONSL_T_STRING; - jsn->expecting = ','; - jsn->tok_last = 0; - DO_CALLBACK(STRING, PUSH); - CONTINUE_NEXT_CHAR(); - - case JSONSL_T_SPECIAL: - INVOKE_ERROR(STRAY_TOKEN); - break; - - default: - INVOKE_ERROR(STRING_OUTSIDE_CONTAINER); - break; - } /* switch(state->type) */ - } else if (CUR_CHAR == '\\') { - GT_ESCAPE: - INCR_METRIC(ESCAPES); - /* Escape */ - if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) { - INVOKE_ERROR(ESCAPE_OUTSIDE_STRING); - } - state->nescapes++; - jsn->in_escape = 1; - CONTINUE_NEXT_CHAR(); - } /* " or \ */ - - GT_STRUCTURAL_TOKEN: - switch (CUR_CHAR) { - case ':': - INCR_METRIC(STRUCTURAL_TOKEN); - if (jsn->expecting != CUR_CHAR) { - INVOKE_ERROR(STRAY_TOKEN); - } - jsn->tok_last = ':'; - jsn->can_insert = 1; - jsn->expecting = '"'; - CONTINUE_NEXT_CHAR(); - - case ',': - INCR_METRIC(STRUCTURAL_TOKEN); - /** - * The comma is one of the more generic tokens. - * In the context of an OBJECT, the can_insert flag - * should never be set, and no other action is - * necessary. - */ - if (jsn->expecting != CUR_CHAR) { - /* make this branch execute only when we haven't manually - * just placed the ',' in the expecting register. - */ - INVOKE_ERROR(STRAY_TOKEN); - } - - if (state->type == JSONSL_T_OBJECT) { - /* end of hash value, expect a string as a hash key */ - jsn->expecting = '"'; - } else { - jsn->can_insert = 1; - } - - jsn->tok_last = ','; - jsn->expecting = '"'; - CONTINUE_NEXT_CHAR(); - - /* new list or object */ - /* hashes are more common */ - case '{': - case '[': - INCR_METRIC(STRUCTURAL_TOKEN); - if (!jsn->can_insert) { - INVOKE_ERROR(CANT_INSERT); - } - - ENSURE_HVAL; - state->nelem++; - - STACK_PUSH; - /* because the constants match the opening delimiters, we can do this: */ - state->type = CUR_CHAR; - state->nelem = 0; - jsn->can_insert = 1; - if (CUR_CHAR == '{') { - /* If we're a hash, we expect a key first, which is quoted */ - jsn->expecting = '"'; - } - if (CUR_CHAR == JSONSL_T_OBJECT) { - DO_CALLBACK(OBJECT, PUSH); - } else { - DO_CALLBACK(LIST, PUSH); - } - jsn->tok_last = 0; - CONTINUE_NEXT_CHAR(); - - /* closing of list or object */ - case '}': - case ']': - INCR_METRIC(STRUCTURAL_TOKEN); - if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) { - INVOKE_ERROR(TRAILING_COMMA); - } - - jsn->can_insert = 0; - jsn->level--; - jsn->expecting = ','; - jsn->tok_last = 0; - if (CUR_CHAR == ']') { - if (state->type != '[') { - INVOKE_ERROR(BRACKET_MISMATCH); - } - DO_CALLBACK(LIST, POP); - } else { - if (state->type != '{') { - INVOKE_ERROR(BRACKET_MISMATCH); - } else if (state->nelem && state->nelem % 2 != 0) { - INVOKE_ERROR(VALUE_EXPECTED); - } - DO_CALLBACK(OBJECT, POP); - } - state = jsn->stack + jsn->level; - state->pos_cur = jsn->pos; - CONTINUE_NEXT_CHAR(); - - default: - GT_SPECIAL_BEGIN: - /** - * Not a string, not a structural token, and not benign whitespace. - * Technically we should iterate over the character always, but since - * we are not doing full numerical/value decoding anyway (but only hinting), - * we only check upon entry. - */ - if (state->type != JSONSL_T_SPECIAL) { - int special_flags = extract_special(CUR_CHAR); - if (!special_flags) { - /** - * Try to do some heuristics here anyway to figure out what kind of - * error this is. The 'special' case is a fallback scenario anyway. - */ - if (CUR_CHAR == '\0') { - INVOKE_ERROR(FOUND_NULL_BYTE); - } else if (CUR_CHAR < 0x20) { - INVOKE_ERROR(WEIRD_WHITESPACE); - } else { - INVOKE_ERROR(SPECIAL_EXPECTED); - } - } - ENSURE_HVAL; - state->nelem++; - if (!jsn->can_insert) { - INVOKE_ERROR(CANT_INSERT); - } - STACK_PUSH; - state->type = JSONSL_T_SPECIAL; - state->special_flags = special_flags; - STATE_SPECIAL_LENGTH = 1; - - if (special_flags == JSONSL_SPECIALf_UNSIGNED) { - state->nelem = CUR_CHAR - 0x30; - STATE_NUM_LAST = '1'; - } else { - STATE_NUM_LAST = '-'; - state->nelem = 0; - } - DO_CALLBACK(SPECIAL, PUSH); - } - CONTINUE_NEXT_CHAR(); - } - } -} - -JSONSL_API -const char* jsonsl_strerror(jsonsl_error_t err) -{ - if (err == JSONSL_ERROR_SUCCESS) { - return "SUCCESS"; - } -#define X(t) \ - if (err == JSONSL_ERROR_##t) \ - return #t; - JSONSL_XERR; -#undef X - return ""; -} - -JSONSL_API -const char *jsonsl_strtype(jsonsl_type_t type) -{ -#define X(o,c) \ - if (type == JSONSL_T_##o) \ - return #o; - JSONSL_XTYPE -#undef X - return "UNKNOWN TYPE"; - -} - -/* - * - * JPR/JSONPointer functions - * - * - */ -#ifndef JSONSL_NO_JPR -static -jsonsl_jpr_type_t -populate_component(char *in, - struct jsonsl_jpr_component_st *component, - char **next, - jsonsl_error_t *errp) -{ - unsigned long pctval; - char *c = NULL, *outp = NULL, *end = NULL; - size_t input_len; - jsonsl_jpr_type_t ret = JSONSL_PATH_NONE; - - if (*next == NULL || *(*next) == '\0') { - return JSONSL_PATH_NONE; - } - - /* Replace the next / with a NULL */ - *next = strstr(in, "/"); - if (*next != NULL) { - *(*next) = '\0'; /* drop the forward slash */ - input_len = *next - in; - end = *next; - *next += 1; /* next character after the '/' */ - } else { - input_len = strlen(in); - end = in + input_len + 1; - } - - component->pstr = in; - - /* Check for special components of interest */ - if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) { - /* Lone wildcard */ - ret = JSONSL_PATH_WILDCARD; - goto GT_RET; - } else if (isdigit(*in)) { - /* ASCII Numeric */ - char *endptr; - component->idx = strtoul(in, &endptr, 10); - if (endptr && *endptr == '\0') { - ret = JSONSL_PATH_NUMERIC; - goto GT_RET; - } - } - - /* Default, it's a string */ - ret = JSONSL_PATH_STRING; - for (c = outp = in; c < end; c++, outp++) { - char origc; - if (*c != '%') { - goto GT_ASSIGN; - } - /* - * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' } - */ - - /* Need %XX */ - if (c+2 >= end) { - *errp = JSONSL_ERROR_PERCENT_BADHEX; - return JSONSL_PATH_INVALID; - } - if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) { - *errp = JSONSL_ERROR_PERCENT_BADHEX; - return JSONSL_PATH_INVALID; - } - - /* Temporarily null-terminate the characters */ - origc = *(c+3); - *(c+3) = '\0'; - pctval = strtoul(c+1, NULL, 16); - *(c+3) = origc; - - *outp = (char) pctval; - c += 2; - continue; - - GT_ASSIGN: - *outp = *c; - } - /* Null-terminate the string */ - for (; outp < c; outp++) { - *outp = '\0'; - } - - GT_RET: - component->ptype = ret; - if (ret != JSONSL_PATH_WILDCARD) { - component->len = strlen(component->pstr); - } - return ret; -} - -JSONSL_API -jsonsl_jpr_t -jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) -{ - char *my_copy = NULL; - int count, curidx; - struct jsonsl_jpr_st *ret = NULL; - struct jsonsl_jpr_component_st *components = NULL; - size_t origlen; - jsonsl_error_t errstacked; - -#define JPR_BAIL(err) *errp = err; goto GT_ERROR; - - if (errp == NULL) { - errp = &errstacked; - } - - if (path == NULL || *path != '/') { - JPR_BAIL(JSONSL_ERROR_JPR_NOROOT); - } - - count = 1; - path++; - { - const char *c = path; - for (; *c; c++) { - if (*c == '/') { - count++; - if (*(c+1) == '/') { - JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH); - } - } - } - } - if(*path) { - count++; - } - - components = (struct jsonsl_jpr_component_st *) - malloc(sizeof(*components) * count); - if (!components) { - JPR_BAIL(JSONSL_ERROR_ENOMEM); - } - - my_copy = (char *)malloc(strlen(path) + 1); - if (!my_copy) { - JPR_BAIL(JSONSL_ERROR_ENOMEM); - } - - strcpy(my_copy, path); - - components[0].ptype = JSONSL_PATH_ROOT; - - if (*my_copy) { - char *cur = my_copy; - int pathret = JSONSL_PATH_STRING; - curidx = 1; - while (curidx < count) { - pathret = populate_component(cur, components + curidx, &cur, errp); - if (pathret > 0) { - curidx++; - } else { - break; - } - } - - if (pathret == JSONSL_PATH_INVALID) { - JPR_BAIL(JSONSL_ERROR_JPR_BADPATH); - } - } else { - curidx = 1; - } - - path--; /*revert path to leading '/' */ - origlen = strlen(path) + 1; - ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret)); - if (!ret) { - JPR_BAIL(JSONSL_ERROR_ENOMEM); - } - ret->orig = (char *)malloc(origlen); - if (!ret->orig) { - JPR_BAIL(JSONSL_ERROR_ENOMEM); - } - ret->components = components; - ret->ncomponents = curidx; - ret->basestr = my_copy; - ret->norig = origlen-1; - strcpy(ret->orig, path); - - return ret; - - GT_ERROR: - free(my_copy); - free(components); - if (ret) { - free(ret->orig); - } - free(ret); - return NULL; -#undef JPR_BAIL -} - -void jsonsl_jpr_destroy(jsonsl_jpr_t jpr) -{ - free(jpr->components); - free(jpr->basestr); - free(jpr->orig); - free(jpr); -} - -/** - * Call when there is a possibility of a match, either as a final match or - * as a path within a match - * @param jpr The JPR path - * @param component Component corresponding to the current element - * @param prlevel The level of the *parent* - * @param chtype The type of the child - * @return Match status - */ -static jsonsl_jpr_match_t -jsonsl__match_continue(jsonsl_jpr_t jpr, - const struct jsonsl_jpr_component_st *component, - unsigned prlevel, unsigned chtype) -{ - const struct jsonsl_jpr_component_st *next_comp = component + 1; - if (prlevel == jpr->ncomponents - 1) { - /* This is the match. Check the expected type of the match against - * the child */ - if (jpr->match_type == 0 || jpr->match_type == chtype) { - return JSONSL_MATCH_COMPLETE; - } else { - return JSONSL_MATCH_TYPE_MISMATCH; - } - } - if (chtype == JSONSL_T_LIST) { - if (next_comp->ptype == JSONSL_PATH_NUMERIC) { - return JSONSL_MATCH_POSSIBLE; - } else { - return JSONSL_MATCH_TYPE_MISMATCH; - } - } else if (chtype == JSONSL_T_OBJECT) { - if (next_comp->ptype == JSONSL_PATH_NUMERIC) { - return JSONSL_MATCH_TYPE_MISMATCH; - } else { - return JSONSL_MATCH_POSSIBLE; - } - } else { - return JSONSL_MATCH_TYPE_MISMATCH; - } -} - -JSONSL_API -jsonsl_jpr_match_t -jsonsl_path_match(jsonsl_jpr_t jpr, - const struct jsonsl_state_st *parent, - const struct jsonsl_state_st *child, - const char *key, size_t nkey) -{ - const struct jsonsl_jpr_component_st *comp; - if (!parent) { - /* No parent. Return immediately since it's always a match */ - return jsonsl__match_continue(jpr, jpr->components, 0, child->type); - } - - comp = jpr->components + parent->level; - - /* note that we don't need to verify the type of the match, this is - * always done through the previous call to jsonsl__match_continue. - * If we are in a POSSIBLE tree then we can be certain the types (at - * least at this level) are correct */ - if (parent->type == JSONSL_T_OBJECT) { - if (comp->len != nkey || strncmp(key, comp->pstr, nkey) != 0) { - return JSONSL_MATCH_NOMATCH; - } - } else { - if (comp->idx != parent->nelem - 1) { - return JSONSL_MATCH_NOMATCH; - } - } - return jsonsl__match_continue(jpr, comp, parent->level, child->type); -} - -JSONSL_API -jsonsl_jpr_match_t -jsonsl_jpr_match(jsonsl_jpr_t jpr, - unsigned int parent_type, - unsigned int parent_level, - const char *key, - size_t nkey) -{ - /* find our current component. This is the child level */ - int cmpret; - struct jsonsl_jpr_component_st *p_component; - p_component = jpr->components + parent_level; - - if (parent_level >= jpr->ncomponents) { - return JSONSL_MATCH_NOMATCH; - } - - /* Lone query for 'root' element. Always matches */ - if (parent_level == 0) { - if (jpr->ncomponents == 1) { - return JSONSL_MATCH_COMPLETE; - } else { - return JSONSL_MATCH_POSSIBLE; - } - } - - /* Wildcard, always matches */ - if (p_component->ptype == JSONSL_PATH_WILDCARD) { - if (parent_level == jpr->ncomponents-1) { - return JSONSL_MATCH_COMPLETE; - } else { - return JSONSL_MATCH_POSSIBLE; - } - } - - /* Check numeric array index. This gets its special block so we can avoid - * string comparisons */ - if (p_component->ptype == JSONSL_PATH_NUMERIC) { - if (parent_type == JSONSL_T_LIST) { - if (p_component->idx != nkey) { - /* Wrong index */ - return JSONSL_MATCH_NOMATCH; - } else { - if (parent_level == jpr->ncomponents-1) { - /* This is the last element of the path */ - return JSONSL_MATCH_COMPLETE; - } else { - /* Intermediate element */ - return JSONSL_MATCH_POSSIBLE; - } - } - } else if (p_component->is_arridx) { - /* Numeric and an array index (set explicitly by user). But not - * a list for a parent */ - return JSONSL_MATCH_TYPE_MISMATCH; - } - } else if (parent_type == JSONSL_T_LIST) { - return JSONSL_MATCH_TYPE_MISMATCH; - } - - /* Check lengths */ - if (p_component->len != nkey) { - return JSONSL_MATCH_NOMATCH; - } - - /* Check string comparison */ - cmpret = strncmp(p_component->pstr, key, nkey); - if (cmpret == 0) { - if (parent_level == jpr->ncomponents-1) { - return JSONSL_MATCH_COMPLETE; - } else { - return JSONSL_MATCH_POSSIBLE; - } - } - - return JSONSL_MATCH_NOMATCH; -} - -JSONSL_API -void jsonsl_jpr_match_state_init(jsonsl_t jsn, - jsonsl_jpr_t *jprs, - size_t njprs) -{ - size_t ii, *firstjmp; - if (njprs == 0) { - return; - } - jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs); - jsn->jpr_count = njprs; - jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max); - memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs); - /* Set the initial jump table values */ - - firstjmp = jsn->jpr_root; - for (ii = 0; ii < njprs; ii++) { - firstjmp[ii] = ii+1; - } -} - -JSONSL_API -void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn) -{ - if (jsn->jpr_count == 0) { - return; - } - - free(jsn->jpr_root); - free(jsn->jprs); - jsn->jprs = NULL; - jsn->jpr_root = NULL; - jsn->jpr_count = 0; -} - -/** - * This function should be called exactly once on each element... - * This should also be called in recursive order, since we rely - * on the parent having been initialized for a match. - * - * Since the parent is checked for a match as well, we maintain a 'serial' counter. - * Whenever we traverse an element, we expect the serial to be the same as a global - * integer. If they do not match, we re-initialize the context, and set the serial. - * - * This ensures a type of consistency without having a proactive reset by the - * main lexer itself. - * - */ -JSONSL_API -jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn, - struct jsonsl_state_st *state, - const char *key, - size_t nkey, - jsonsl_jpr_match_t *out) -{ - struct jsonsl_state_st *parent_state; - jsonsl_jpr_t ret = NULL; - - /* Jump and JPR tables for our own state and the parent state */ - size_t *jmptable, *pjmptable; - size_t jmp_cur, ii, ourjmpidx; - - if (!jsn->jpr_root) { - *out = JSONSL_MATCH_NOMATCH; - return NULL; - } - - pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1)); - jmptable = pjmptable + jsn->jpr_count; - - /* If the parent cannot match, then invalidate it */ - if (*pjmptable == 0) { - *jmptable = 0; - *out = JSONSL_MATCH_NOMATCH; - return NULL; - } - - parent_state = jsn->stack + state->level - 1; - - if (parent_state->type == JSONSL_T_LIST) { - nkey = (size_t) parent_state->nelem; - } - - *jmptable = 0; - ourjmpidx = 0; - memset(jmptable, 0, sizeof(int) * jsn->jpr_count); - - for (ii = 0; ii < jsn->jpr_count; ii++) { - jmp_cur = pjmptable[ii]; - if (jmp_cur) { - jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1]; - *out = jsonsl_jpr_match(jpr, - parent_state->type, - parent_state->level, - key, nkey); - if (*out == JSONSL_MATCH_COMPLETE) { - ret = jpr; - *jmptable = 0; - return ret; - } else if (*out == JSONSL_MATCH_POSSIBLE) { - jmptable[ourjmpidx] = ii+1; - ourjmpidx++; - } - } else { - break; - } - } - if (!*jmptable) { - *out = JSONSL_MATCH_NOMATCH; - } - return NULL; -} - -JSONSL_API -const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match) -{ -#define X(T,v) \ - if ( match == JSONSL_MATCH_##T ) \ - return #T; - JSONSL_XMATCH -#undef X - return ""; -} - -#endif /* JSONSL_WITH_JPR */ - -static char * -jsonsl__writeutf8(uint32_t pt, char *out) -{ - #define ADD_OUTPUT(c) *out = (char)(c); out++; - - if (pt < 0x80) { - ADD_OUTPUT(pt); - } else if (pt < 0x800) { - ADD_OUTPUT((pt >> 6) | 0xC0); - ADD_OUTPUT((pt & 0x3F) | 0x80); - } else if (pt < 0x10000) { - ADD_OUTPUT((pt >> 12) | 0xE0); - ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80); - ADD_OUTPUT((pt & 0x3F) | 0x80); - } else { - ADD_OUTPUT((pt >> 18) | 0xF0); - ADD_OUTPUT(((pt >> 12) & 0x3F) | 0x80); - ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80); - ADD_OUTPUT((pt & 0x3F) | 0x80); - } - return out; - #undef ADD_OUTPUT -} - -/* Thanks snej (https://github.com/mnunberg/jsonsl/issues/9) */ -static int -jsonsl__digit2int(char ch) { - int d = ch - '0'; - if ((unsigned) d < 10) { - return d; - } - d = ch - 'a'; - if ((unsigned) d < 6) { - return d + 10; - } - d = ch - 'A'; - if ((unsigned) d < 6) { - return d + 10; - } - return -1; -} - -/* Assume 's' is at least 4 bytes long */ -static int -jsonsl__get_uescape_16(const char *s) -{ - int ret = 0; - int cur; - - #define GET_DIGIT(off) \ - cur = jsonsl__digit2int(s[off]); \ - if (cur == -1) { return -1; } \ - ret |= (cur << (12 - (off * 4))); - - GET_DIGIT(0); - GET_DIGIT(1); - GET_DIGIT(2); - GET_DIGIT(3); - #undef GET_DIGIT - return ret; -} - -/** - * Utility function to convert escape sequences - */ -JSONSL_API -size_t jsonsl_util_unescape_ex(const char *in, - char *out, - size_t len, - const int toEscape[128], - unsigned *oflags, - jsonsl_error_t *err, - const char **errat) -{ - const unsigned char *c = (const unsigned char*)in; - char *begin_p = out; - unsigned oflags_s; - uint16_t last_codepoint = 0; - - if (!oflags) { - oflags = &oflags_s; - } - *oflags = 0; - - #define UNESCAPE_BAIL(e,offset) \ - *err = JSONSL_ERROR_##e; \ - if (errat) { \ - *errat = (const char*)(c+ (ptrdiff_t)(offset)); \ - } \ - return 0; - - for (; len; len--, c++, out++) { - int uescval; - if (*c != '\\') { - /* Not an escape, so we don't care about this */ - goto GT_ASSIGN; - } - - if (len < 2) { - UNESCAPE_BAIL(ESCAPE_INVALID, 0); - } - if (!is_allowed_escape(c[1])) { - UNESCAPE_BAIL(ESCAPE_INVALID, 1) - } - if ((toEscape && toEscape[(unsigned char)c[1] & 0x7f] == 0 && - c[1] != '\\' && c[1] != '"')) { - /* if we don't want to unescape this string, write the escape sequence to the output */ - *out++ = *c++; - --len; - goto GT_ASSIGN; - } - - if (c[1] != 'u') { - /* simple skip-and-replace using pre-defined maps. - * TODO: should the maps actually reflect the desired - * replacement character in toEscape? - */ - char esctmp = get_escape_equiv(c[1]); - if (esctmp) { - /* Check if there is a corresponding replacement */ - *out = esctmp; - } else { - /* Just gobble up the 'reverse-solidus' */ - *out = c[1]; - } - len--; - c++; - /* do not assign, just continue */ - continue; - } - - /* next == 'u' */ - if (len < 6) { - /* Need at least six characters.. */ - UNESCAPE_BAIL(UESCAPE_TOOSHORT, 2); - } - - uescval = jsonsl__get_uescape_16((const char *)c + 2); - if (uescval == -1) { - UNESCAPE_BAIL(PERCENT_BADHEX, -1); - } - - if (last_codepoint) { - uint16_t w1 = last_codepoint, w2 = (uint16_t)uescval; - uint32_t cp; - - if (uescval < 0xDC00 || uescval > 0xDFFF) { - UNESCAPE_BAIL(INVALID_CODEPOINT, -1); - } - - cp = (w1 & 0x3FF) << 10; - cp |= (w2 & 0x3FF); - cp += 0x10000; - - out = jsonsl__writeutf8(cp, out) - 1; - last_codepoint = 0; - - } else if (uescval < 0xD800 || uescval > 0xDFFF) { - *oflags |= JSONSL_SPECIALf_NONASCII; - out = jsonsl__writeutf8(uescval, out) - 1; - - } else if (uescval < 0xDC00) { - *oflags |= JSONSL_SPECIALf_NONASCII; - last_codepoint = (uint16_t)uescval; - out--; - } else { - UNESCAPE_BAIL(INVALID_CODEPOINT, 2); - } - - /* Post uescape cleanup */ - len -= 5; /* Gobble up 5 chars after 'u' */ - c += 5; - continue; - - /* Only reached by previous branches */ - GT_ASSIGN: - *out = *c; - } - - if (last_codepoint) { - *err = JSONSL_ERROR_INVALID_CODEPOINT; - return 0; - } - - *err = JSONSL_ERROR_SUCCESS; - return out - begin_p; -} - -/** - * Character Table definitions. - * These were all generated via srcutil/genchartables.pl - */ - -/** - * This table contains the beginnings of non-string - * allowable (bareword) values. - */ -static unsigned short Special_Table[0x100] = { - /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ - /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */ - /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */ - /* 0x2e */ 0,0, /* 0x2f */ - /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */ - /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */ - /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */ - /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */ - /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */ - /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */ - /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */ - /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */ - /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */ - /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */ - /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x48 */ - /* 0x49 */ JSONSL__INF_PROXY /* */, /* 0x49 */ - /* 0x4a */ 0,0,0,0, /* 0x4d */ - /* 0x4e */ JSONSL__NAN_PROXY /* */, /* 0x4e */ - /* 0x4f */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */ - /* 0x66 */ JSONSL_SPECIALf_FALSE /* */, /* 0x66 */ - /* 0x67 */ 0,0, /* 0x68 */ - /* 0x69 */ JSONSL__INF_PROXY /* */, /* 0x69 */ - /* 0x6a */ 0,0,0,0, /* 0x6d */ - /* 0x6e */ JSONSL_SPECIALf_NULL|JSONSL__NAN_PROXY /* */, /* 0x6e */ - /* 0x6f */ 0,0,0,0,0, /* 0x73 */ - /* 0x74 */ JSONSL_SPECIALf_TRUE /* */, /* 0x74 */ - /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */ - /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */ - /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */ - /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */ - /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */ -}; - -/** - * Contains characters which signal the termination of any of the 'special' bareword - * values. - */ -static int Special_Endings[0x100] = { - /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */ - /* 0x09 */ 1 /* */, /* 0x09 */ - /* 0x0a */ 1 /* */, /* 0x0a */ - /* 0x0b */ 0,0, /* 0x0c */ - /* 0x0d */ 1 /* */, /* 0x0d */ - /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ - /* 0x20 */ 1 /* */, /* 0x20 */ - /* 0x21 */ 0, /* 0x21 */ - /* 0x22 */ 1 /* " */, /* 0x22 */ - /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */ - /* 0x2c */ 1 /* , */, /* 0x2c */ - /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */ - /* 0x3a */ 1 /* : */, /* 0x3a */ - /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */ - /* 0x5b */ 1 /* [ */, /* 0x5b */ - /* 0x5c */ 1 /* \ */, /* 0x5c */ - /* 0x5d */ 1 /* ] */, /* 0x5d */ - /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */ - /* 0x7b */ 1 /* { */, /* 0x7b */ - /* 0x7c */ 0, /* 0x7c */ - /* 0x7d */ 1 /* } */, /* 0x7d */ - /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */ - /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */ - /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */ - /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */ - /* 0xfe */ 0 /* 0xfe */ -}; - -/** - * This table contains entries for the allowed whitespace as per RFC 4627 - */ -static int Allowed_Whitespace[0x100] = { - /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */ - /* 0x09 */ 1 /* */, /* 0x09 */ - /* 0x0a */ 1 /* */, /* 0x0a */ - /* 0x0b */ 0,0, /* 0x0c */ - /* 0x0d */ 1 /* */, /* 0x0d */ - /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ - /* 0x20 */ 1 /* */, /* 0x20 */ - /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */ - /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */ - /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */ - /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */ - /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */ - /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */ - /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */ -}; - -static const int String_No_Passthrough[0x100] = { - /* 0x00 */ 1 /* */, /* 0x00 */ - /* 0x01 */ 1 /* */, /* 0x01 */ - /* 0x02 */ 1 /* */, /* 0x02 */ - /* 0x03 */ 1 /* */, /* 0x03 */ - /* 0x04 */ 1 /* */, /* 0x04 */ - /* 0x05 */ 1 /* */, /* 0x05 */ - /* 0x06 */ 1 /* */, /* 0x06 */ - /* 0x07 */ 1 /* */, /* 0x07 */ - /* 0x08 */ 1 /* */, /* 0x08 */ - /* 0x09 */ 1 /* */, /* 0x09 */ - /* 0x0a */ 1 /* */, /* 0x0a */ - /* 0x0b */ 1 /* */, /* 0x0b */ - /* 0x0c */ 1 /* */, /* 0x0c */ - /* 0x0d */ 1 /* */, /* 0x0d */ - /* 0x0e */ 1 /* */, /* 0x0e */ - /* 0x0f */ 1 /* */, /* 0x0f */ - /* 0x10 */ 1 /* */, /* 0x10 */ - /* 0x11 */ 1 /* */, /* 0x11 */ - /* 0x12 */ 1 /* */, /* 0x12 */ - /* 0x13 */ 1 /* */, /* 0x13 */ - /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ - /* 0x22 */ 1 /* <"> */, /* 0x22 */ - /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ - /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ - /* 0x5c */ 1 /* <\> */, /* 0x5c */ - /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ - /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ - /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ - /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ - /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ - /* 0xfd */ 0,0, /* 0xfe */ -}; - -/** - * Allowable two-character 'common' escapes: - */ -static int Allowed_Escapes[0x100] = { - /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ - /* 0x20 */ 0,0, /* 0x21 */ - /* 0x22 */ 1 /* <"> */, /* 0x22 */ - /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */ - /* 0x2f */ 1 /* */, /* 0x2f */ - /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */ - /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ - /* 0x5c */ 1 /* <\> */, /* 0x5c */ - /* 0x5d */ 0,0,0,0,0, /* 0x61 */ - /* 0x62 */ 1 /* */, /* 0x62 */ - /* 0x63 */ 0,0,0, /* 0x65 */ - /* 0x66 */ 1 /* */, /* 0x66 */ - /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */ - /* 0x6e */ 1 /* */, /* 0x6e */ - /* 0x6f */ 0,0,0, /* 0x71 */ - /* 0x72 */ 1 /* */, /* 0x72 */ - /* 0x73 */ 0, /* 0x73 */ - /* 0x74 */ 1 /* */, /* 0x74 */ - /* 0x75 */ 1 /* */, /* 0x75 */ - /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */ - /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */ - /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */ - /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */ - /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */ -}; - -/** - * This table contains the _values_ for a given (single) escaped character. - */ -static unsigned char Escape_Equivs[0x100] = { - /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ - /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */ - /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */ - /* 0x60 */ 0,0, /* 0x61 */ - /* 0x62 */ 8 /* */, /* 0x62 */ - /* 0x63 */ 0,0,0, /* 0x65 */ - /* 0x66 */ 12 /* */, /* 0x66 */ - /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */ - /* 0x6e */ 10 /* */, /* 0x6e */ - /* 0x6f */ 0,0,0, /* 0x71 */ - /* 0x72 */ 13 /* */, /* 0x72 */ - /* 0x73 */ 0, /* 0x73 */ - /* 0x74 */ 9 /* */, /* 0x74 */ - /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */ - /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */ - /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */ - /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */ - /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */ -}; - -/* Definitions of above-declared static functions */ -static char get_escape_equiv(unsigned c) { - return Escape_Equivs[c & 0xff]; -} -static unsigned extract_special(unsigned c) { - return Special_Table[c & 0xff]; -} -static int is_special_end(unsigned c) { - return Special_Endings[c & 0xff]; -} -static int is_allowed_whitespace(unsigned c) { - return c == ' ' || Allowed_Whitespace[c & 0xff]; -} -static int is_allowed_escape(unsigned c) { - return Allowed_Escapes[c & 0xff]; -} -static int is_simple_char(unsigned c) { - return !String_No_Passthrough[c & 0xff]; -} - -/* Clean up all our macros! */ -#undef INCR_METRIC -#undef INCR_GENERIC -#undef INCR_STRINGY_CATCH -#undef CASE_DIGITS -#undef INVOKE_ERROR -#undef STACK_PUSH -#undef STACK_POP_NOPOS -#undef STACK_POP -#undef CALLBACK_AND_POP_NOPOS -#undef CALLBACK_AND_POP -#undef SPECIAL_POP -#undef CUR_CHAR -#undef DO_CALLBACK -#undef ENSURE_HVAL -#undef VERIFY_SPECIAL -#undef STATE_SPECIAL_LENGTH -#undef IS_NORMAL_NUMBER -#undef STATE_NUM_LAST -#undef FASTPARSE_EXHAUSTED -#undef FASTPARSE_BREAK diff --git a/third-party/jsonsl/jsonsl.h b/third-party/jsonsl/jsonsl.h deleted file mode 100644 index 43170743a..000000000 --- a/third-party/jsonsl/jsonsl.h +++ /dev/null @@ -1,1006 +0,0 @@ -/* https://github.com/mnunberg/jsonsl */ - -/** - * JSON Simple/Stacked/Stateful Lexer. - * - Does not buffer data - * - Maintains state - * - Callback oriented - * - Lightweight and fast. One source file and one header file - * - * Copyright © 2012-2015 Mark Nunberg - * See included LICENSE file for license details. - */ - -#ifndef JSONSL_H_ -#define JSONSL_H_ - -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#ifdef JSONSL_USE_WCHAR -typedef jsonsl_char_t wchar_t; -typedef jsonsl_uchar_t unsigned wchar_t; -#else -typedef char jsonsl_char_t; -typedef unsigned char jsonsl_uchar_t; -#endif /* JSONSL_USE_WCHAR */ - -#ifdef JSONSL_PARSE_NAN -#define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN -#define JSONSL__INF_PROXY JSONSL_SPECIALf_INF -#else -#define JSONSL__NAN_PROXY 0 -#define JSONSL__INF_PROXY 0 -#endif - -/* Stolen from http-parser.h, and possibly others */ -#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -#if !defined(_MSC_VER) || _MSC_VER<1400 -typedef unsigned int size_t; -typedef int ssize_t; -#endif -#else -#include -#endif - - -#if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS)) -#define JSONSL_STATE_GENERIC -#endif /* !defined JSONSL_STATE_GENERIC */ - -#ifdef JSONSL_STATE_GENERIC -#define JSONSL_STATE_USER_FIELDS -#endif /* JSONSL_STATE_GENERIC */ - -/* Additional fields for component object */ -#ifndef JSONSL_JPR_COMPONENT_USER_FIELDS -#define JSONSL_JPR_COMPONENT_USER_FIELDS -#endif - -#ifndef JSONSL_API -/** - * We require a /DJSONSL_DLL so that users already using this as a static - * or embedded library don't get confused - */ -#if defined(_WIN32) && defined(JSONSL_DLL) -#define JSONSL_API __declspec(dllexport) -#else -#define JSONSL_API -#endif /* _WIN32 */ - -#endif /* !JSONSL_API */ - -#ifndef JSONSL_INLINE -#if defined(_MSC_VER) - #define JSONSL_INLINE __inline - #elif defined(__GNUC__) - #define JSONSL_INLINE __inline__ - #else - #define JSONSL_INLINE inline - #endif /* _MSC_VER or __GNUC__ */ -#endif /* JSONSL_INLINE */ - -#define JSONSL_MAX_LEVELS 512 - -struct jsonsl_st; -typedef struct jsonsl_st *jsonsl_t; - -typedef struct jsonsl_jpr_st* jsonsl_jpr_t; - -/** - * This flag is true when AND'd against a type whose value - * must be in "quoutes" i.e. T_HKEY and T_STRING - */ -#define JSONSL_Tf_STRINGY 0xffff00 - -/** - * Constant representing the special JSON types. - * The values are special and aid in speed (the OBJECT and LIST - * values are the char literals of their openings). - * - * Their actual value is a character which attempts to resemble - * some mnemonic reference to the actual type. - * - * If new types are added, they must fit into the ASCII printable - * range (so they should be AND'd with 0x7f and yield something - * meaningful) - */ -#define JSONSL_XTYPE \ - X(STRING, '"'|JSONSL_Tf_STRINGY) \ - X(HKEY, '#'|JSONSL_Tf_STRINGY) \ - X(OBJECT, '{') \ - X(LIST, '[') \ - X(SPECIAL, '^') \ - X(UESCAPE, 'u') -typedef enum { -#define X(o, c) \ - JSONSL_T_##o = c, - JSONSL_XTYPE - JSONSL_T_UNKNOWN = '?', - /* Abstract 'root' object */ - JSONSL_T_ROOT = 0 -#undef X -} jsonsl_type_t; - -/** - * Subtypes for T_SPECIAL. We define them as flags - * because more than one type can be applied to a - * given object. - */ - -#define JSONSL_XSPECIAL \ - X(NONE, 0) \ - X(SIGNED, 1<<0) \ - X(UNSIGNED, 1<<1) \ - X(TRUE, 1<<2) \ - X(FALSE, 1<<3) \ - X(NULL, 1<<4) \ - X(FLOAT, 1<<5) \ - X(EXPONENT, 1<<6) \ - X(NONASCII, 1<<7) \ - X(NAN, 1<<8) \ - X(INF, 1<<9) -typedef enum { -#define X(o,b) \ - JSONSL_SPECIALf_##o = b, - JSONSL_XSPECIAL -#undef X - /* Handy flags for checking */ - - JSONSL_SPECIALf_UNKNOWN = 1 << 10, - - /** @private Private */ - JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED, - /** @private */ - JSONSL_SPECIALf_DASH = 1 << 12, - /** @private */ - JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF), - JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED), - - /** Type is numeric */ - JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED), - - /** Type is a boolean */ - JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE), - - /** Type is an "extended", not integral type (but numeric) */ - JSONSL_SPECIALf_NUMNOINT = - (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN - |JSONSL_SPECIALf_INF) -} jsonsl_special_t; - - -/** - * These are the various types of stack (or other) events - * which will trigger a callback. - * Like the type constants, this are also mnemonic - */ -#define JSONSL_XACTION \ - X(PUSH, '+') \ - X(POP, '-') \ - X(UESCAPE, 'U') \ - X(ERROR, '!') -typedef enum { -#define X(a,c) \ - JSONSL_ACTION_##a = c, - JSONSL_XACTION - JSONSL_ACTION_UNKNOWN = '?' -#undef X -} jsonsl_action_t; - - -/** - * Various errors which may be thrown while parsing JSON - */ -#define JSONSL_XERR \ -/* Trailing garbage characters */ \ - X(GARBAGE_TRAILING) \ -/* We were expecting a 'special' (numeric, true, false, null) */ \ - X(SPECIAL_EXPECTED) \ -/* The 'special' value was incomplete */ \ - X(SPECIAL_INCOMPLETE) \ -/* Found a stray token */ \ - X(STRAY_TOKEN) \ -/* We were expecting a token before this one */ \ - X(MISSING_TOKEN) \ -/* Cannot insert because the container is not ready */ \ - X(CANT_INSERT) \ -/* Found a '\' outside a string */ \ - X(ESCAPE_OUTSIDE_STRING) \ -/* Found a ':' outside of a hash */ \ - X(KEY_OUTSIDE_OBJECT) \ -/* found a string outside of a container */ \ - X(STRING_OUTSIDE_CONTAINER) \ -/* Found a null byte in middle of string */ \ - X(FOUND_NULL_BYTE) \ -/* Current level exceeds limit specified in constructor */ \ - X(LEVELS_EXCEEDED) \ -/* Got a } as a result of an opening [ or vice versa */ \ - X(BRACKET_MISMATCH) \ -/* We expected a key, but got something else instead */ \ - X(HKEY_EXPECTED) \ -/* We got an illegal control character (bad whitespace or something) */ \ - X(WEIRD_WHITESPACE) \ -/* Found a \u-escape, but there were less than 4 following hex digits */ \ - X(UESCAPE_TOOSHORT) \ -/* Invalid two-character escape */ \ - X(ESCAPE_INVALID) \ -/* Trailing comma */ \ - X(TRAILING_COMMA) \ -/* An invalid number was passed in a numeric field */ \ - X(INVALID_NUMBER) \ -/* Value is missing for object */ \ - X(VALUE_EXPECTED) \ -/* The following are for JPR Stuff */ \ - \ -/* Found a literal '%' but it was only followed by a single valid hex digit */ \ - X(PERCENT_BADHEX) \ -/* jsonpointer URI is malformed '/' */ \ - X(JPR_BADPATH) \ -/* Duplicate slash */ \ - X(JPR_DUPSLASH) \ -/* No leading root */ \ - X(JPR_NOROOT) \ -/* Allocation failure */ \ - X(ENOMEM) \ -/* Invalid unicode codepoint detected (in case of escapes) */ \ - X(INVALID_CODEPOINT) - -typedef enum { - JSONSL_ERROR_SUCCESS = 0, -#define X(e) \ - JSONSL_ERROR_##e, - JSONSL_XERR -#undef X - JSONSL_ERROR_GENERIC -} jsonsl_error_t; - - -/** - * A state is a single level of the stack. - * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section) - * will remain in tact until the item is popped. - * - * As a result, it means a parent state object may be accessed from a child - * object, (the parents fields will all be valid). This allows a user to create - * an ad-hoc hierarchy on top of the JSON one. - * - */ -struct jsonsl_state_st { - /** - * The JSON object type - */ - unsigned type; - - /** If this element is special, then its extended type is here */ - unsigned special_flags; - - /** - * The position (in terms of number of bytes since the first call to - * jsonsl_feed()) at which the state was first pushed. This includes - * opening tokens, if applicable. - * - * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will - * be the position of the first quote. - * - * @see jsonsl_st::pos which contains the _current_ position and can be - * used during a POP callback to get the length of the element. - */ - size_t pos_begin; - - /**FIXME: This is redundant as the same information can be derived from - * jsonsl_st::pos at pop-time */ - size_t pos_cur; - - /** - * Level of recursion into nesting. This is mainly a convenience - * variable, as this can technically be deduced from the lexer's - * level parameter (though the logic is not that simple) - */ - unsigned int level; - - - /** - * how many elements in the object/list. - * For objects (hashes), an element is either - * a key or a value. Thus for one complete pair, - * nelem will be 2. - * - * For special types, this will hold the sum of the digits. - * This only holds true for values which are simple signed/unsigned - * numbers. Otherwise a special flag is set, and extra handling is not - * performed. - */ - uint64_t nelem; - - - - /*TODO: merge this and special_flags into a union */ - - - /** - * Useful for an opening nest, this will prevent a callback from being - * invoked on this item or any of its children - */ - int ignore_callback; - - /** - * Counter which is incremented each time an escape ('\') is encountered. - * This is used internally for non-string types and should only be - * inspected by the user if the state actually represents a string - * type. - */ - unsigned int nescapes; - - /** - * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then - * the macro expansion happens here. - * - * You can use these fields to store hierarchical or 'tagging' information - * for specific objects. - * - * See the documentation above for the lifetime of the state object (i.e. - * if the private data points to allocated memory, it should be freed - * when the object is popped, as the state object will be re-used) - */ -#ifndef JSONSL_STATE_GENERIC - JSONSL_STATE_USER_FIELDS -#else - - /** - * Otherwise, this is a simple void * pointer for anything you want - */ - void *data; -#endif /* JSONSL_STATE_USER_FIELDS */ -}; - -/**Gets the number of elements in the list. - * @param st The state. Must be of type JSONSL_T_LIST - * @return number of elements in the list - */ -#define JSONSL_LIST_SIZE(st) ((st)->nelem) - -/**Gets the number of key-value pairs in an object - * @param st The state. Must be of type JSONSL_T_OBJECT - * @return the number of key-value pairs in the object - */ -#define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2) - -/**Gets the numeric value. - * @param st The state. Must be of type JSONSL_T_SPECIAL and - * special_flags must have the JSONSL_SPECIALf_NUMERIC flag - * set. - * @return the numeric value of the state. - */ -#define JSONSL_NUMERIC_VALUE(st) ((st)->nelem) - -/* - * So now we need some special structure for keeping the - * JPR info in sync. Preferably all in a single block - * of memory (there's no need for separate allocations. - * So we will define a 'table' with the following layout - * - * Level nPosbl JPR1_last JPR2_last JPR3_last - * - * 0 1 NOMATCH POSSIBLE POSSIBLE - * 1 0 NOMATCH NOMATCH COMPLETE - * [ table ends here because no further path is possible] - * - * Where the JPR..n corresponds to the number of JPRs - * requested, and nPosble is a quick flag to determine - * - * the number of possibilities. In the future this might - * be made into a proper 'jump' table, - * - * Since we always mark JPRs from the higher levels descending - * into the lower ones, a prospective child match would first - * look at the parent table to check the possibilities, and then - * see which ones were possible.. - * - * Thus, the size of this blob would be (and these are all ints here) - * nLevels * nJPR * 2. - * - * the 'Width' of the table would be nJPR*2, and the 'height' would be - * nlevels - */ - -/** - * This is called when a stack change occurs. - * - * @param jsn The lexer - * @param action The type of action, this can be PUSH or POP - * @param state A pointer to the stack currently affected by the action - * @param at A pointer to the position of the input buffer which triggered - * this action. - */ -typedef void (*jsonsl_stack_callback)( - jsonsl_t jsn, - jsonsl_action_t action, - struct jsonsl_state_st* state, - const jsonsl_char_t *at); - - -/** - * This is called when an error is encountered. - * Sometimes it's possible to 'erase' characters (by replacing them - * with whitespace). If you think you have corrected the error, you - * can return a true value, in which case the parser will backtrack - * and try again. - * - * @param jsn The lexer - * @param error The error which was thrown - * @param state the current state - * @param a pointer to the position of the input buffer which triggered - * the error. Note that this is not const, this is because you have the - * possibility of modifying the character in an attempt to correct the - * error - * - * @return zero to bail, nonzero to try again (this only makes sense if - * the input buffer has been modified by this callback) - */ -typedef int (*jsonsl_error_callback)( - jsonsl_t jsn, - jsonsl_error_t error, - struct jsonsl_state_st* state, - jsonsl_char_t *at); - -struct jsonsl_st { - /** Public, read-only */ - - /** This is the current level of the stack */ - unsigned int level; - - /** Flag set to indicate we should stop processing */ - unsigned int stopfl; - - /** - * This is the current position, relative to the beginning - * of the stream. - */ - size_t pos; - - /** This is the 'bytes' variable passed to feed() */ - const jsonsl_char_t *base; - - /** Callback invoked for PUSH actions */ - jsonsl_stack_callback action_callback_PUSH; - - /** Callback invoked for POP actions */ - jsonsl_stack_callback action_callback_POP; - - /** Default callback for any action, if neither PUSH or POP callbacks are defined */ - jsonsl_stack_callback action_callback; - - /** - * Do not invoke callbacks for objects deeper than this level. - * NOTE: This field establishes the lower bound for ignored callbacks, - * and is thus misnamed. `min_ignore_level` would actually make more - * sense, but we don't want to break API. - */ - unsigned int max_callback_level; - - /** The error callback. Invoked when an error happens. Should not be NULL */ - jsonsl_error_callback error_callback; - - /* these are boolean flags you can modify. You will be called - * about notification for each of these types if the corresponding - * variable is true. - */ - - /** - * @name Callback Booleans. - * These determine whether a callback is to be invoked for certain types of objects - * @{*/ - - /** Boolean flag to enable or disable the invokcation for events on this type*/ - int call_SPECIAL; - int call_OBJECT; - int call_LIST; - int call_STRING; - int call_HKEY; - /*@}*/ - - /** - * @name u-Escape handling - * Special handling for the \\u-f00d type sequences. These are meant - * to be translated back into the corresponding octet(s). - * A special callback (if set) is invoked with *at=='u'. An application - * may wish to temporarily suspend parsing and handle the 'u-' sequence - * internally (or not). - */ - - /*@{*/ - - /** Callback to be invoked for a u-escape */ - jsonsl_stack_callback action_callback_UESCAPE; - - /** Boolean flag, whether to invoke the callback */ - int call_UESCAPE; - - /** Boolean flag, whether we should return after encountering a u-escape: - * the callback is invoked and then we return if this is true - */ - int return_UESCAPE; - /*@}*/ - - struct { - int allow_trailing_comma; - } options; - - /** Put anything here */ - void *data; - - /*@{*/ - /** Private */ - int in_escape; - char expecting; - char tok_last; - int can_insert; - unsigned int levels_max; - -#ifndef JSONSL_NO_JPR - size_t jpr_count; - jsonsl_jpr_t *jprs; - - /* Root pointer for JPR matching information */ - size_t *jpr_root; -#endif /* JSONSL_NO_JPR */ - /*@}*/ - - /** - * This is the stack. Its upper bound is levels_max, or the - * nlevels argument passed to jsonsl_new. If you modify this structure, - * make sure that this member is last. - */ - struct jsonsl_state_st stack[1]; -}; - - -/** - * Creates a new lexer object, with capacity for recursion up to nlevels - * - * @param nlevels maximum recursion depth - */ -JSONSL_API -jsonsl_t jsonsl_new(int nlevels); - -/** - * Feeds data into the lexer. - * - * @param jsn the lexer object - * @param bytes new data to be fed - * @param nbytes size of new data - */ -JSONSL_API -void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes); - -/** - * Resets the internal parser state. This does not free the parser - * but does clean it internally, so that the next time feed() is called, - * it will be treated as a new stream - * - * @param jsn the lexer - */ -JSONSL_API -void jsonsl_reset(jsonsl_t jsn); - -/** - * Frees the lexer, cleaning any allocated memory taken - * - * @param jsn the lexer - */ -JSONSL_API -void jsonsl_destroy(jsonsl_t jsn); - -/** - * Gets the 'parent' element, given the current one - * - * @param jsn the lexer - * @param cur the current nest, which should be a struct jsonsl_nest_st - */ -static JSONSL_INLINE -struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn, - const struct jsonsl_state_st *state) -{ - /* Don't complain about overriding array bounds */ - if (state->level > 1) { - return jsn->stack + state->level - 1; - } else { - return NULL; - } -} - -/** - * Gets the state of the last fully consumed child of this parent. This is - * only valid in the parent's POP callback. - * - * @param the lexer - * @return A pointer to the child. - */ -static JSONSL_INLINE -struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn, - const struct jsonsl_state_st *parent) -{ - return jsn->stack + (parent->level + 1); -} - -/**Call to instruct the parser to stop parsing and return. This is valid - * only from within a callback */ -static JSONSL_INLINE -void jsonsl_stop(jsonsl_t jsn) -{ - jsn->stopfl = 1; -} - -/** - * This enables receiving callbacks on all events. Doesn't do - * anything special but helps avoid some boilerplate. - * This does not touch the UESCAPE callbacks or flags. - */ -static JSONSL_INLINE -void jsonsl_enable_all_callbacks(jsonsl_t jsn) -{ - jsn->call_HKEY = 1; - jsn->call_STRING = 1; - jsn->call_OBJECT = 1; - jsn->call_SPECIAL = 1; - jsn->call_LIST = 1; -} - -/** - * A macro which returns true if the current state object can - * have children. This means a list type or an object type. - */ -#define JSONSL_STATE_IS_CONTAINER(state) \ - (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST) - -/** - * These two functions, dump a string representation - * of the error or type, respectively. They will never - * return NULL - */ -JSONSL_API -const char* jsonsl_strerror(jsonsl_error_t err); -JSONSL_API -const char* jsonsl_strtype(jsonsl_type_t jt); - -/** - * Dumps global metrics to the screen. This is a noop unless - * jsonsl was compiled with JSONSL_USE_METRICS - */ -JSONSL_API -void jsonsl_dump_global_metrics(void); - -/* This macro just here for editors to do code folding */ -#ifndef JSONSL_NO_JPR - -/** - * @name JSON Pointer API - * - * JSONPointer API. This isn't really related to the lexer (at least not yet) - * JSONPointer provides an extremely simple specification for providing - * locations within JSON objects. We will extend it a bit and allow for - * providing 'wildcard' characters by which to be able to 'query' the stream. - * - * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00 - * - * Currently I'm implementing the 'single query' API which can only use a single - * query component. In the future I will integrate my yet-to-be-published - * Boyer-Moore-esque prefix searching implementation, in order to allow - * multiple paths to be merged into one for quick and efficient searching. - * - * - * JPR (as we'll refer to it within the source) can be used by splitting - * the components into multiple sections, and incrementally 'track' each - * component. When JSONSL delivers a 'pop' callback for a string, or a 'push' - * callback for an object, we will check to see whether the index matching - * the component corresponding to the current level contains a match - * for our path. - * - * In order to do this properly, a structure must be maintained within the - * parent indicating whether its children are possible matches. This flag - * will be 'inherited' by call children which may conform to the match - * specification, and discarded by all which do not (thereby eliminating - * their children from inheriting it). - * - * A successful match is a complete one. One can provide multiple paths with - * multiple levels of matches e.g. - * /foo/bar/baz/^/blah - * - * @{ - */ - -/** The wildcard character */ -#ifndef JSONSL_PATH_WILDCARD_CHAR -#define JSONSL_PATH_WILDCARD_CHAR '^' -#endif /* WILDCARD_CHAR */ - -#define JSONSL_XMATCH \ - X(COMPLETE,1) \ - X(POSSIBLE,0) \ - X(NOMATCH,-1) \ - X(TYPE_MISMATCH, -2) - -typedef enum { - -#define X(T,v) \ - JSONSL_MATCH_##T = v, - JSONSL_XMATCH - -#undef X - JSONSL_MATCH_UNKNOWN -} jsonsl_jpr_match_t; - -typedef enum { - JSONSL_PATH_STRING = 1, - JSONSL_PATH_WILDCARD, - JSONSL_PATH_NUMERIC, - JSONSL_PATH_ROOT, - - /* Special */ - JSONSL_PATH_INVALID = -1, - JSONSL_PATH_NONE = 0 -} jsonsl_jpr_type_t; - -struct jsonsl_jpr_component_st { - /** The string the component points to */ - char *pstr; - /** if this is a numeric type, the number is 'cached' here */ - unsigned long idx; - /** The length of the string */ - size_t len; - /** The type of component (NUMERIC or STRING) */ - jsonsl_jpr_type_t ptype; - - /** Set this to true to enforce type checking between dict keys and array - * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects - * that an array index is actually a child of a dictionary. */ - short is_arridx; - - /* Extra fields (for more advanced searches. Default is empty) */ - JSONSL_JPR_COMPONENT_USER_FIELDS -}; - -struct jsonsl_jpr_st { - /** Path components */ - struct jsonsl_jpr_component_st *components; - size_t ncomponents; - - /**Type of the match to be expected. If nonzero, will be compared against - * the actual type */ - unsigned match_type; - - /** Base of allocated string for components */ - char *basestr; - - /** The original match string. Useful for returning to the user */ - char *orig; - size_t norig; -}; - -/** - * Create a new JPR object. - * - * @param path the JSONPointer path specification. - * @param errp a pointer to a jsonsl_error_t. If this function returns NULL, - * then more details will be in this variable. - * - * @return a new jsonsl_jpr_t object, or NULL on error. - */ -JSONSL_API -jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp); - -/** - * Destroy a JPR object - */ -JSONSL_API -void jsonsl_jpr_destroy(jsonsl_jpr_t jpr); - -/** - * Match a JSON object against a type and specific level - * - * @param jpr the JPR object - * @param parent_type the type of the parent (should be T_LIST or T_OBJECT) - * @param parent_level the level of the parent - * @param key the 'key' of the child. If the parent is an array, this should be - * empty. - * @param nkey - the length of the key. If the parent is an array (T_LIST), then - * this should be the current index. - * - * NOTE: The key of the child means any kind of associative data related to the - * element. Thus: <<< { "foo" : [ >>, - * the opening array's key is "foo". - * - * @return a status constant. This indicates whether a match was excluded, possible, - * or successful. - */ -JSONSL_API -jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr, - unsigned int parent_type, - unsigned int parent_level, - const char *key, size_t nkey); - -/** - * Alternate matching algorithm. This matching algorithm does not use - * JSONPointer but relies on a more structured searching mechanism. It - * assumes that there is a clear distinction between array indices and - * object keys. In this case, the jsonsl_path_component_st::ptype should - * be set to @ref JSONSL_PATH_NUMERIC for an array index (the - * jsonsl_path_component_st::is_arridx field will be removed in a future - * version). - * - * @param jpr The path - * @param parent The parent structure. Can be NULL if this is the root object - * @param child The child structure. Should not be NULL - * @param key Object key, if an object - * @param nkey Length of object key - * @return Status constant if successful - * - * @note - * For successful matching, both the key and the path itself should be normalized - * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This - * should currently be done in the application. Another version of this function - * may use a temporary buffer in such circumstances (allocated by the application). - * - * Since this function also checks the state of the child, it should only - * be called on PUSH callbacks, and not POP callbacks - */ -JSONSL_API -jsonsl_jpr_match_t -jsonsl_path_match(jsonsl_jpr_t jpr, - const struct jsonsl_state_st *parent, - const struct jsonsl_state_st *child, - const char *key, size_t nkey); - - -/** - * Associate a set of JPR objects with a lexer instance. - * This should be called before the lexer has been fed any data (and - * behavior is undefined if you don't adhere to this). - * - * After using this function, you may subsequently call match_state() on - * given states (presumably from within the callbacks). - * - * Note that currently the first JPR is the quickest and comes - * pre-allocated with the state structure. Further JPR objects - * are chained. - * - * @param jsn The lexer - * @param jprs An array of jsonsl_jpr_t objects - * @param njprs How many elements in the jprs array. - */ -JSONSL_API -void jsonsl_jpr_match_state_init(jsonsl_t jsn, - jsonsl_jpr_t *jprs, - size_t njprs); - -/** - * This follows the same semantics as the normal match, - * except we infer parent and type information from the relevant state objects. - * The match status (for all possible JPR objects) is set in the *out parameter. - * - * If a match has succeeded, then its JPR object will be returned. In all other - * instances, NULL is returned; - * - * @param jpr The jsonsl_jpr_t handle - * @param state The jsonsl_state_st which is a candidate - * @param key The hash key (if applicable, can be NULL if parent is list) - * @param nkey Length of hash key (if applicable, can be zero if parent is list) - * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with - * the match result - * - * @return If a match was completed in full, then the JPR object containing - * the matching path will be returned. Otherwise, the return is NULL (note, this - * does not mean matching has failed, it can still be part of the match: check - * the out parameter). - */ -JSONSL_API -jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn, - struct jsonsl_state_st *state, - const char *key, - size_t nkey, - jsonsl_jpr_match_t *out); - - -/** - * Cleanup any memory allocated and any states set by - * match_state_init() and match_state() - * @param jsn The lexer - */ -JSONSL_API -void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn); - -/** - * Return a string representation of the match result returned by match() - */ -JSONSL_API -const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match); - -/* @}*/ - -/** - * Utility function to convert escape sequences into their original form. - * - * The decoders I've sampled do not seem to specify a standard behavior of what - * to escape/unescape. - * - * RFC 4627 Mandates only that the quoute, backslash, and ASCII control - * characters (0x00-0x1f) be escaped. It is often common for applications - * to escape a '/' - however this may also be desired behavior. the JSON - * spec is not clear on this, and therefore jsonsl leaves it up to you. - * - * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically - * true when dealing with 'u-escapes' which can be expressed perfectly fine - * as utf8. One use case for normalization is JPR string comparison, in which - * case two effectively equivalent strings may not match because one is using - * u-escapes and the other proper utf8. To normalize u-escapes only, pass in - * an empty `toEscape` table, enabling only the `u` index. - * - * @param in The input string. - * @param out An allocated output (should be the same size as in) - * @param len the size of the buffer - * @param toEscape - A sparse array of characters to unescape. Characters - * which are not present in this array, e.g. toEscape['c'] == 0 will be - * ignored and passed to the output in their original form. - * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte, - * then this variable will have the SPECIALf_NONASCII flag on. - * - * @param err A pointer to an error variable. If an error occurs, it will be - * set in this variable - * @param errat If not null and an error occurs, this will be set to point - * to the position within the string at which the offending character was - * encountered. - * - * @return The effective size of the output buffer. - * - * @note - * This function now encodes the UTF8 equivalents of utf16 escapes (i.e. - * 'u-escapes'). Previously this would encode the escapes as utf16 literals, - * which while still correct in some sense was confusing for many (especially - * considering that the inputs were variations of char). - * - * @note - * The output buffer will never be larger than the input buffer, since - * standard escape sequences (i.e. '\t') occupy two bytes in the source - * but only one byte (when unescaped) in the output. Likewise u-escapes - * (i.e. \uXXXX) will occupy six bytes in the source, but at the most - * two bytes when escaped. - */ -JSONSL_API -size_t jsonsl_util_unescape_ex(const char *in, - char *out, - size_t len, - const int toEscape[128], - unsigned *oflags, - jsonsl_error_t *err, - const char **errat); - -/** - * Convenience macro to avoid passing too many parameters - */ -#define jsonsl_util_unescape(in, out, len, toEscape, err) \ - jsonsl_util_unescape_ex(in, out, len, toEscape, nullptr, err, nullptr) - -#endif /* JSONSL_NO_JPR */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* JSONSL_H_ */ diff --git a/third-party/rapidjson b/third-party/rapidjson new file mode 160000 index 000000000..f9d53419e --- /dev/null +++ b/third-party/rapidjson @@ -0,0 +1 @@ +Subproject commit f9d53419e912910fd8fa57d5705fa41425428c35