refactor: SAX-like benc parser pt. 1 (#2490)
* refactor: add SAX-like benc parser This is the first part of a series of PRs whose end goal is to avoid the overhead of tr_variant when parsing bencoded data, e.g. when parsing .torrent files on startup or when parsing announce/scrape tracker responses. This PR introduces a SAX-like benc parser, reimplements variant-benc to use the SAX benc parser (so that we don't have two benc parsers), and updates the benc + variant tests.
This commit is contained in:
parent
2329f7541f
commit
5efec26a3b
|
@ -400,6 +400,7 @@
|
|||
C3CEBBFB2794A0D200683BE0 /* compiler_msc.h in Headers */ = {isa = PBXBuildFile; fileRef = C3CEBBF82794A0D200683BE0 /* compiler_msc.h */; };
|
||||
C3CEBBFC2794A12200683BE0 /* libdeflate.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C3CEBBA927949CA000683BE0 /* libdeflate.a */; };
|
||||
CAB35C64252F6F5E00552A55 /* mime-types.h in Headers */ = {isa = PBXBuildFile; fileRef = CAB35C62252F6F5E00552A55 /* mime-types.h */; };
|
||||
2856E0656A49F2665D69E760 /* benc.h in Headers */ = {isa = PBXBuildFile; fileRef = 2856E0656A49F2665D69E761 /* benc.h */; };
|
||||
E138A9780C04D88F00C5426C /* ProgressGradients.mm in Sources */ = {isa = PBXBuildFile; fileRef = E138A9760C04D88F00C5426C /* ProgressGradients.mm */; };
|
||||
E23B55A5FC3B557F7746D510 /* interned-string.h in Headers */ = {isa = PBXBuildFile; fileRef = E23B55A5FC3B557F7746D511 /* interned-string.h */; settings = {ATTRIBUTES = (Project, ); }; };
|
||||
E71A5565279C2DD600EBFA1E /* tr-assert.mm in Sources */ = {isa = PBXBuildFile; fileRef = E71A5564279C2DD600EBFA1E /* tr-assert.mm */; };
|
||||
|
@ -1111,6 +1112,7 @@
|
|||
C3CEBBF72794A0D200683BE0 /* compiler_gcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compiler_gcc.h; path = common/compiler_gcc.h; sourceTree = "<group>"; };
|
||||
C3CEBBF82794A0D200683BE0 /* compiler_msc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compiler_msc.h; path = common/compiler_msc.h; sourceTree = "<group>"; };
|
||||
CAB35C62252F6F5E00552A55 /* mime-types.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "mime-types.h"; sourceTree = "<group>"; };
|
||||
2856E0656A49F2665D69E761 /* benc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "benc.h"; sourceTree = "<group>"; };
|
||||
E138A9750C04D88F00C5426C /* ProgressGradients.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ProgressGradients.h; sourceTree = "<group>"; };
|
||||
E138A9760C04D88F00C5426C /* ProgressGradients.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ProgressGradients.mm; sourceTree = "<group>"; };
|
||||
E23B55A5FC3B557F7746D511 /* interned-string.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "interned-string.h"; sourceTree = SOURCE_ROOT; };
|
||||
|
@ -1481,6 +1483,7 @@
|
|||
C17740D3273A002C00E455D2 /* web-utils.cc */,
|
||||
C17740D4273A002C00E455D2 /* web-utils.h */,
|
||||
CAB35C62252F6F5E00552A55 /* mime-types.h */,
|
||||
2856E0656A49F2665D69E761 /* benc.h */,
|
||||
C1077A4A183EB29600634C22 /* error.cc */,
|
||||
C1077A4B183EB29600634C22 /* error.h */,
|
||||
C1077A4C183EB29600634C22 /* file-posix.cc */,
|
||||
|
@ -2060,6 +2063,7 @@
|
|||
A220EC5C118C8A060022B4BE /* tr-lpd.h in Headers */,
|
||||
A23547E311CD0B090046EAE6 /* cache.h in Headers */,
|
||||
CAB35C64252F6F5E00552A55 /* mime-types.h in Headers */,
|
||||
2856E0656A49F2665D69E760 /* benc.h in Headers */,
|
||||
A284214512DA663E00FBDDBB /* tr-udp.h in Headers */,
|
||||
C1077A4F183EB29600634C22 /* error.h in Headers */,
|
||||
A2679295130E00A000CB7464 /* tr-utp.h in Headers */,
|
||||
|
|
|
@ -155,6 +155,7 @@ set(${PROJECT_NAME}_PRIVATE_HEADERS
|
|||
announcer-common.h
|
||||
announcer.h
|
||||
bandwidth.h
|
||||
benc.h
|
||||
bitfield.h
|
||||
block-info.h
|
||||
blocklist.h
|
||||
|
|
|
@ -0,0 +1,337 @@
|
|||
// This file Copyright © 2022 Mnemosyne LLC.
|
||||
// It may be used under GPLv2 (SPDX: GPL-2.0), GPLv3 (SPDX: GPL-3.0),
|
||||
// or any future license endorsed by Mnemosyne LLC.
|
||||
// License text can be found in the licenses/ folder.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
|
||||
#include "error.h"
|
||||
|
||||
namespace transmission::benc
|
||||
{
|
||||
|
||||
namespace impl
|
||||
{
|
||||
|
||||
std::optional<int64_t> ParseInt(std::string_view* benc);
|
||||
|
||||
std::optional<std::string_view> ParseString(std::string_view* benc);
|
||||
|
||||
} // namespace impl
|
||||
|
||||
struct Handler
|
||||
{
|
||||
virtual bool Int64(int64_t) = 0;
|
||||
virtual bool String(std::string_view) = 0;
|
||||
|
||||
virtual bool StartDict() = 0;
|
||||
virtual bool Key(std::string_view) = 0;
|
||||
virtual bool EndDict() = 0;
|
||||
|
||||
virtual bool StartArray() = 0;
|
||||
virtual bool EndArray() = 0;
|
||||
};
|
||||
|
||||
template<std::size_t MaxDepth>
|
||||
struct BasicHandler : public Handler
|
||||
{
|
||||
bool Int64(int64_t) override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool String(std::string_view) override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartDict() override
|
||||
{
|
||||
keys.emplace_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Key(std::string_view key) override
|
||||
{
|
||||
keys.back() = key;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndDict() override
|
||||
{
|
||||
keys.resize(keys.size() - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartArray() override
|
||||
{
|
||||
keys.emplace_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndArray() override
|
||||
{
|
||||
keys.resize(keys.size() - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::array<std::string_view, MaxDepth> keys;
|
||||
};
|
||||
|
||||
template<std::size_t MaxDepth>
|
||||
struct ParserStack
|
||||
{
|
||||
enum class ParentType
|
||||
{
|
||||
Array,
|
||||
Dict
|
||||
};
|
||||
struct Node
|
||||
{
|
||||
ParentType parent_type;
|
||||
size_t n_children_walked;
|
||||
};
|
||||
std::array<Node, MaxDepth> stack;
|
||||
std::size_t depth = 0;
|
||||
|
||||
void clear()
|
||||
{
|
||||
depth = 0;
|
||||
}
|
||||
|
||||
void tokenWalked()
|
||||
{
|
||||
++stack[depth].n_children_walked;
|
||||
}
|
||||
|
||||
Node& current()
|
||||
{
|
||||
return stack[depth];
|
||||
}
|
||||
Node& current() const
|
||||
{
|
||||
return stack[depth];
|
||||
}
|
||||
|
||||
bool expectingDictKey() const
|
||||
{
|
||||
return depth > 0 && stack[depth].parent_type == ParentType::Dict && (stack[depth].n_children_walked % 2) == 0;
|
||||
}
|
||||
|
||||
std::optional<ParentType> parentType() const
|
||||
{
|
||||
if (depth == 0)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
return stack[depth].parent_type;
|
||||
}
|
||||
|
||||
std::optional<ParentType> pop(tr_error** error)
|
||||
{
|
||||
if (depth == 0)
|
||||
{
|
||||
tr_error_set(error, EILSEQ, "Cannot pop empty stack");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (stack[depth].parent_type == ParentType::Dict && ((stack[depth].n_children_walked % 2) != 0))
|
||||
{
|
||||
tr_error_set(error, EILSEQ, "Premature end-of-dict found. Malformed benc?");
|
||||
return {};
|
||||
}
|
||||
|
||||
auto const ret = stack[depth].parent_type;
|
||||
--depth;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool push(ParentType parent_type, tr_error** error)
|
||||
{
|
||||
if (depth + 1 >= std::size(stack))
|
||||
{
|
||||
tr_error_set(error, E2BIG, "Max stack depth reached; unable to continue parsing");
|
||||
return false;
|
||||
}
|
||||
|
||||
++depth;
|
||||
current() = { parent_type, 0 };
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<size_t MaxDepth>
|
||||
bool parse(
|
||||
std::string_view benc,
|
||||
ParserStack<MaxDepth>& stack,
|
||||
Handler& handler,
|
||||
char const** setme_end = nullptr,
|
||||
tr_error** error = nullptr)
|
||||
{
|
||||
stack.clear();
|
||||
|
||||
int err = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (std::empty(benc))
|
||||
{
|
||||
err = EILSEQ;
|
||||
}
|
||||
|
||||
if (err != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
switch (benc.front())
|
||||
{
|
||||
case 'i': // int
|
||||
{
|
||||
auto const value = impl::ParseInt(&benc);
|
||||
if (!value)
|
||||
{
|
||||
tr_error_set(error, err, "Malformed benc? Unable to parse integer");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!handler.Int64(*value))
|
||||
{
|
||||
err = ECANCELED;
|
||||
tr_error_set(error, err, "Handler indicated parser should stop");
|
||||
break;
|
||||
}
|
||||
|
||||
stack.tokenWalked();
|
||||
break;
|
||||
}
|
||||
case 'l': // list
|
||||
case 'd': // dict
|
||||
{
|
||||
bool ok = benc.front() == 'l' ? stack.push(ParserStack<MaxDepth>::ParentType::Array, error) :
|
||||
stack.push(ParserStack<MaxDepth>::ParentType::Dict, error);
|
||||
if (!ok)
|
||||
{
|
||||
err = EILSEQ;
|
||||
break;
|
||||
}
|
||||
|
||||
ok = benc.front() == 'l' ? handler.StartArray() : handler.StartDict();
|
||||
if (!ok)
|
||||
{
|
||||
err = ECANCELED;
|
||||
tr_error_set(error, err, "Handler indicated parser should stop");
|
||||
break;
|
||||
}
|
||||
|
||||
benc.remove_prefix(1);
|
||||
break;
|
||||
}
|
||||
case 'e': // end of list or dict
|
||||
{
|
||||
benc.remove_prefix(1);
|
||||
|
||||
auto const parent_type = stack.pop(error);
|
||||
if (!parent_type)
|
||||
{
|
||||
err = EILSEQ;
|
||||
break;
|
||||
}
|
||||
|
||||
stack.tokenWalked();
|
||||
|
||||
bool ok = *parent_type == ParserStack<MaxDepth>::ParentType::Array ? handler.EndArray() : handler.EndDict();
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
err = ECANCELED;
|
||||
tr_error_set(error, err, "Handler indicated parser should stop");
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': // string
|
||||
{
|
||||
auto const sv = impl::ParseString(&benc);
|
||||
if (!sv)
|
||||
{
|
||||
err = EILSEQ;
|
||||
tr_error_set(error, err, "Malformed benc? Unable to parse string");
|
||||
break;
|
||||
}
|
||||
|
||||
bool const ok = stack.expectingDictKey() ? handler.Key(*sv) : handler.String(*sv);
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
err = ECANCELED;
|
||||
tr_error_set(error, err, "Handler indicated parser should stop");
|
||||
break;
|
||||
}
|
||||
stack.tokenWalked();
|
||||
break;
|
||||
}
|
||||
|
||||
default: // invalid bencoded text... march past it
|
||||
benc.remove_prefix(1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (stack.depth == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err != 0)
|
||||
{
|
||||
errno = err;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stack.depth != 0)
|
||||
{
|
||||
err = EILSEQ;
|
||||
tr_error_set(error, err, "premature end-of-data reached");
|
||||
errno = err;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stack.stack[0].n_children_walked == 0)
|
||||
{
|
||||
err = EILSEQ;
|
||||
tr_error_set(error, err, "no data found");
|
||||
errno = err;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (setme_end != nullptr)
|
||||
{
|
||||
*setme_end = std::data(benc);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace transmission::benc
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "transmission.h"
|
||||
|
||||
#include "benc.h"
|
||||
#include "tr-assert.h"
|
||||
#include "quark.h"
|
||||
#include "utils.h" /* tr_snprintf() */
|
||||
|
@ -27,10 +28,13 @@ using namespace std::literals;
|
|||
auto constexpr MaxBencStrLength = size_t{ 128 * 1024 * 1024 }; // arbitrary
|
||||
|
||||
/***
|
||||
**** tr_variantParse()
|
||||
**** tr_variantLoad()
|
||||
****
|
||||
****
|
||||
***/
|
||||
|
||||
namespace transmission::benc::impl
|
||||
{
|
||||
|
||||
/**
|
||||
* The initial i and trailing e are beginning and ending delimiters.
|
||||
* You can have negative numbers such as i-3e. You cannot prefix the
|
||||
|
@ -41,7 +45,7 @@ auto constexpr MaxBencStrLength = size_t{ 128 * 1024 * 1024 }; // arbitrary
|
|||
* but to handle it as a signed 64bit integer is mandatory to handle
|
||||
* "large files" aka .torrent for more that 4Gbyte
|
||||
*/
|
||||
std::optional<int64_t> tr_bencParseInt(std::string_view* benc)
|
||||
std::optional<int64_t> ParseInt(std::string_view* benc)
|
||||
{
|
||||
auto constexpr Prefix = "i"sv;
|
||||
auto constexpr Suffix = "e"sv;
|
||||
|
@ -85,7 +89,7 @@ std::optional<int64_t> tr_bencParseInt(std::string_view* benc)
|
|||
* Note that there is no constant beginning delimiter, and no ending delimiter.
|
||||
* Example: 4:spam represents the string "spam"
|
||||
*/
|
||||
std::optional<std::string_view> tr_bencParseStr(std::string_view* benc)
|
||||
std::optional<std::string_view> ParseString(std::string_view* benc)
|
||||
{
|
||||
// find the ':' delimiter
|
||||
auto const colon_pos = benc->find(':');
|
||||
|
@ -114,179 +118,135 @@ std::optional<std::string_view> tr_bencParseStr(std::string_view* benc)
|
|||
return string;
|
||||
}
|
||||
|
||||
static tr_variant* get_node(std::deque<tr_variant*>& stack, std::optional<tr_quark>& dict_key, tr_variant* top, int* err)
|
||||
} // namespace transmission::benc::impl
|
||||
|
||||
/***
|
||||
**** tr_variantParse()
|
||||
**** tr_variantLoad()
|
||||
***/
|
||||
|
||||
struct MyHandler : public transmission::benc::Handler
|
||||
{
|
||||
tr_variant* node = nullptr;
|
||||
tr_variant* const top_;
|
||||
int const parse_opts_;
|
||||
std::deque<tr_variant*> stack_;
|
||||
std::optional<tr_quark> key_;
|
||||
|
||||
if (std::empty(stack))
|
||||
MyHandler(tr_variant* top, int parse_opts)
|
||||
: top_{ top }
|
||||
, parse_opts_{ parse_opts }
|
||||
{
|
||||
node = top;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* parent = stack.back();
|
||||
|
||||
if (tr_variantIsList(parent))
|
||||
bool Int64(int64_t value) final
|
||||
{
|
||||
if (tr_variant* variant = get_node(); variant != nullptr)
|
||||
{
|
||||
node = tr_variantListAdd(parent);
|
||||
tr_variantInitInt(variant, value);
|
||||
}
|
||||
else if (dict_key && tr_variantIsDict(parent))
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool String(std::string_view sv) final
|
||||
{
|
||||
if (tr_variant* variant = get_node(); variant != nullptr)
|
||||
{
|
||||
node = tr_variantDictAdd(parent, *dict_key);
|
||||
dict_key.reset();
|
||||
if ((parse_opts_ & TR_VARIANT_PARSE_INPLACE) != 0)
|
||||
{
|
||||
tr_variantInitStrView(variant, sv);
|
||||
}
|
||||
else
|
||||
{
|
||||
tr_variantInitStr(variant, sv);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartDict() final
|
||||
{
|
||||
if (tr_variant* variant = get_node(); variant != nullptr)
|
||||
{
|
||||
tr_variantInitDict(variant, 0);
|
||||
stack_.push_back(variant);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Key(std::string_view sv) final
|
||||
{
|
||||
key_ = tr_quark_new(sv);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndDict() final
|
||||
{
|
||||
stack_.pop_back();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartArray() final
|
||||
{
|
||||
if (tr_variant* variant = get_node(); variant != nullptr)
|
||||
{
|
||||
tr_variantInitList(variant, 0);
|
||||
stack_.push_back(variant);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EndArray() final
|
||||
{
|
||||
stack_.pop_back();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
tr_variant* get_node()
|
||||
{
|
||||
tr_variant* node = nullptr;
|
||||
|
||||
if (std::empty(stack_))
|
||||
{
|
||||
node = top_;
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = EILSEQ;
|
||||
auto* parent = stack_.back();
|
||||
|
||||
if (tr_variantIsList(parent))
|
||||
{
|
||||
node = tr_variantListAdd(parent);
|
||||
}
|
||||
else if (key_ && tr_variantIsDict(parent))
|
||||
{
|
||||
node = tr_variantDictAdd(parent, *key_);
|
||||
key_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
};
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function's previous recursive implementation was
|
||||
* easier to read, but was vulnerable to a smash-stacking
|
||||
* attack via maliciously-crafted bencoded data. (#667)
|
||||
*/
|
||||
int tr_variantParseBenc(tr_variant& top, int parse_opts, std::string_view benc, char const** setme_end)
|
||||
bool tr_variantParseBenc(tr_variant& top, int parse_opts, std::string_view benc, char const** setme_end, tr_error** error)
|
||||
{
|
||||
TR_ASSERT((parse_opts & TR_VARIANT_PARSE_BENC) != 0);
|
||||
|
||||
auto stack = std::deque<tr_variant*>{};
|
||||
auto key = std::optional<tr_quark>{};
|
||||
|
||||
tr_variantInit(&top, 0);
|
||||
|
||||
int err = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (std::empty(benc))
|
||||
{
|
||||
err = EILSEQ;
|
||||
}
|
||||
|
||||
if (err != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
switch (benc.front())
|
||||
{
|
||||
case 'i': // int
|
||||
{
|
||||
auto const value = tr_bencParseInt(&benc);
|
||||
if (!value)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (tr_variant* const v = get_node(stack, key, &top, &err); v != nullptr)
|
||||
{
|
||||
tr_variantInitInt(v, *value);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'l': // list
|
||||
benc.remove_prefix(1);
|
||||
|
||||
if (tr_variant* const v = get_node(stack, key, &top, &err); v != nullptr)
|
||||
{
|
||||
tr_variantInitList(v, 0);
|
||||
stack.push_back(v);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'd': // dict
|
||||
benc.remove_prefix(1);
|
||||
|
||||
if (tr_variant* const v = get_node(stack, key, &top, &err); v != nullptr)
|
||||
{
|
||||
tr_variantInitDict(v, 0);
|
||||
stack.push_back(v);
|
||||
}
|
||||
break;
|
||||
case 'e': // end of list or dict
|
||||
benc.remove_prefix(1);
|
||||
|
||||
if (std::empty(stack) || key)
|
||||
{
|
||||
err = EILSEQ;
|
||||
break;
|
||||
}
|
||||
|
||||
stack.pop_back();
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9': // string?
|
||||
{
|
||||
auto const sv = tr_bencParseStr(&benc);
|
||||
if (!sv)
|
||||
{
|
||||
benc.remove_prefix(1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!key && !std::empty(stack) && tr_variantIsDict(stack.back()))
|
||||
{
|
||||
key = tr_quark_new(*sv);
|
||||
}
|
||||
else
|
||||
{
|
||||
tr_variant* const v = get_node(stack, key, &top, &err);
|
||||
if (v != nullptr)
|
||||
{
|
||||
if ((parse_opts & TR_VARIANT_PARSE_INPLACE) != 0)
|
||||
{
|
||||
tr_variantInitStrView(v, *sv);
|
||||
}
|
||||
else
|
||||
{
|
||||
tr_variantInitStr(v, *sv);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: // invalid bencoded text... march past it
|
||||
benc.remove_prefix(1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (std::empty(stack))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 0 && (top.type == 0 || !std::empty(stack)))
|
||||
{
|
||||
err = EILSEQ;
|
||||
}
|
||||
|
||||
if (err == 0)
|
||||
{
|
||||
if (setme_end != nullptr)
|
||||
{
|
||||
*setme_end = std::data(benc);
|
||||
}
|
||||
}
|
||||
else if (top.type != 0)
|
||||
using Stack = transmission::benc::ParserStack<512>;
|
||||
auto stack = Stack{};
|
||||
auto handler = MyHandler{ &top, parse_opts };
|
||||
bool const ok = transmission::benc::parse(benc, stack, handler, setme_end, error);
|
||||
if (!ok)
|
||||
{
|
||||
tr_variantFree(&top);
|
||||
tr_variantInit(&top, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
return ok;
|
||||
}
|
||||
|
||||
/****
|
||||
|
|
|
@ -43,6 +43,6 @@ std::optional<int64_t> tr_bencParseInt(std::string_view* benc_inout);
|
|||
/** @brief Private function that's exposed here only for unit tests */
|
||||
std::optional<std::string_view> tr_bencParseStr(std::string_view* benc_inout);
|
||||
|
||||
int tr_variantParseBenc(tr_variant& setme, int opts, std::string_view benc, char const** setme_end);
|
||||
bool tr_variantParseBenc(tr_variant& top, int parse_opts, std::string_view benc, char const** setme_end, tr_error** error);
|
||||
|
||||
int tr_variantParseJson(tr_variant& setme, int opts, std::string_view benc, char const** setme_end);
|
||||
|
|
|
@ -1236,19 +1236,26 @@ bool tr_variantFromBuf(tr_variant* setme, int opts, std::string_view buf, char c
|
|||
auto locale_ctx = locale_context{};
|
||||
use_numeric_locale(&locale_ctx, "C");
|
||||
|
||||
auto err = (opts & TR_VARIANT_PARSE_BENC) ? tr_variantParseBenc(*setme, opts, buf, setme_end) :
|
||||
tr_variantParseJson(*setme, opts, buf, setme_end);
|
||||
auto success = bool{};
|
||||
if (opts & TR_VARIANT_PARSE_BENC)
|
||||
{
|
||||
success = tr_variantParseBenc(*setme, opts, buf, setme_end, error);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: tr_variantParseJson() should take a tr_error* same as ParseBenc
|
||||
auto err = tr_variantParseJson(*setme, opts, buf, setme_end);
|
||||
if (err)
|
||||
{
|
||||
tr_error_set(error, EILSEQ, "error parsing encoded data"sv);
|
||||
}
|
||||
success = err == 0;
|
||||
}
|
||||
|
||||
/* restore the previous locale */
|
||||
restore_locale(&locale_ctx);
|
||||
|
||||
if (err)
|
||||
{
|
||||
tr_error_set(error, EILSEQ, "error parsing encoded data"sv);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return success;
|
||||
}
|
||||
|
||||
bool tr_variantFromFile(tr_variant* setme, tr_variant_parse_opts opts, std::string const& filename, tr_error** error)
|
||||
|
@ -1262,6 +1269,6 @@ bool tr_variantFromFile(tr_variant* setme, tr_variant_parse_opts opts, std::stri
|
|||
return false;
|
||||
}
|
||||
|
||||
auto sv = std::string_view{ std::data(buf), std::size(buf) };
|
||||
auto const sv = std::string_view{ std::data(buf), std::size(buf) };
|
||||
return tr_variantFromBuf(setme, opts, sv, nullptr, error);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#define LIBTRANSMISSION_VARIANT_MODULE
|
||||
|
||||
#include "transmission.h"
|
||||
|
||||
#include "benc.h"
|
||||
#include "error.h"
|
||||
#include "utils.h" /* tr_free */
|
||||
#include "variant-common.h"
|
||||
#include "variant.h"
|
||||
|
@ -90,7 +93,7 @@ TEST_F(VariantTest, parseInt)
|
|||
auto constexpr ExpectVal = int64_t{ 64 };
|
||||
|
||||
auto benc = Benc;
|
||||
auto const value = tr_bencParseInt(&benc);
|
||||
auto const value = transmission::benc::impl::ParseInt(&benc);
|
||||
EXPECT_TRUE(value);
|
||||
EXPECT_EQ(ExpectVal, *value);
|
||||
EXPECT_EQ(std::data(Benc) + std::size(Benc), std::data(benc));
|
||||
|
@ -101,7 +104,7 @@ TEST_F(VariantTest, parseIntWithMissingEnd)
|
|||
auto constexpr Benc = "i64"sv;
|
||||
|
||||
auto benc = Benc;
|
||||
EXPECT_FALSE(tr_bencParseInt(&benc));
|
||||
EXPECT_FALSE(transmission::benc::impl::ParseInt(&benc));
|
||||
EXPECT_EQ(std::data(Benc), std::data(benc));
|
||||
}
|
||||
|
||||
|
@ -110,7 +113,7 @@ TEST_F(VariantTest, parseIntEmptyBuffer)
|
|||
auto constexpr Benc = ""sv;
|
||||
|
||||
auto benc = Benc;
|
||||
EXPECT_FALSE(tr_bencParseInt(&benc));
|
||||
EXPECT_FALSE(transmission::benc::impl::ParseInt(&benc));
|
||||
EXPECT_EQ(std::data(Benc), std::data(benc));
|
||||
}
|
||||
|
||||
|
@ -119,7 +122,7 @@ TEST_F(VariantTest, parseIntWithBadDigits)
|
|||
auto constexpr Benc = "i6z4e"sv;
|
||||
|
||||
auto benc = Benc;
|
||||
EXPECT_FALSE(tr_bencParseInt(&benc));
|
||||
EXPECT_FALSE(transmission::benc::impl::ParseInt(&benc));
|
||||
EXPECT_EQ(std::data(Benc), std::data(benc));
|
||||
}
|
||||
|
||||
|
@ -129,7 +132,7 @@ TEST_F(VariantTest, parseNegativeInt)
|
|||
auto constexpr Expected = int64_t{ -3 };
|
||||
|
||||
auto benc = Benc;
|
||||
auto const value = tr_bencParseInt(&benc);
|
||||
auto const value = transmission::benc::impl::ParseInt(&benc);
|
||||
EXPECT_TRUE(value);
|
||||
EXPECT_EQ(Expected, *value);
|
||||
EXPECT_EQ(std::data(Benc) + std::size(Benc), std::data(benc));
|
||||
|
@ -140,7 +143,7 @@ TEST_F(VariantTest, parseNegativeWithLeadingZero)
|
|||
auto constexpr Benc = "i-03e"sv;
|
||||
|
||||
auto benc = Benc;
|
||||
EXPECT_FALSE(tr_bencParseInt(&benc));
|
||||
EXPECT_FALSE(transmission::benc::impl::ParseInt(&benc));
|
||||
EXPECT_EQ(std::data(Benc), std::data(benc));
|
||||
}
|
||||
|
||||
|
@ -150,7 +153,7 @@ TEST_F(VariantTest, parseIntZero)
|
|||
auto constexpr Expected = int64_t{ 0 };
|
||||
|
||||
auto benc = Benc;
|
||||
auto const value = tr_bencParseInt(&benc);
|
||||
auto const value = transmission::benc::impl::ParseInt(&benc);
|
||||
EXPECT_TRUE(value);
|
||||
EXPECT_EQ(Expected, *value);
|
||||
EXPECT_EQ(std::data(Benc) + std::size(Benc), std::data(benc));
|
||||
|
@ -161,42 +164,44 @@ TEST_F(VariantTest, parseIntWithLeadingZero)
|
|||
auto constexpr Benc = "i04e"sv;
|
||||
|
||||
auto benc = Benc;
|
||||
EXPECT_FALSE(tr_bencParseInt(&benc));
|
||||
EXPECT_FALSE(transmission::benc::impl::ParseInt(&benc));
|
||||
EXPECT_EQ(std::data(Benc), std::data(benc));
|
||||
}
|
||||
|
||||
TEST_F(VariantTest, str)
|
||||
{
|
||||
using namespace transmission::benc::impl;
|
||||
|
||||
// string len is designed to overflow
|
||||
auto benc = "99999999999999999999:boat"sv;
|
||||
auto inout = benc;
|
||||
auto value = tr_bencParseStr(&inout);
|
||||
auto value = ParseString(&inout);
|
||||
EXPECT_FALSE(value);
|
||||
EXPECT_EQ(benc, inout);
|
||||
|
||||
// good string
|
||||
inout = benc = "4:boat";
|
||||
value = tr_bencParseStr(&inout);
|
||||
value = ParseString(&inout);
|
||||
EXPECT_TRUE(value);
|
||||
EXPECT_EQ("boat"sv, *value);
|
||||
EXPECT_EQ(std::data(benc) + std::size(benc), std::data(inout));
|
||||
|
||||
// string goes past end of buffer
|
||||
inout = benc = "4:boa"sv;
|
||||
value = tr_bencParseStr(&inout);
|
||||
value = ParseString(&inout);
|
||||
EXPECT_FALSE(value);
|
||||
EXPECT_EQ(benc, inout);
|
||||
|
||||
// empty string
|
||||
inout = benc = "0:"sv;
|
||||
value = tr_bencParseStr(&inout);
|
||||
value = ParseString(&inout);
|
||||
EXPECT_TRUE(value);
|
||||
EXPECT_EQ(""sv, *value);
|
||||
EXPECT_EQ(std::data(benc) + std::size(benc), std::data(inout));
|
||||
|
||||
// short string
|
||||
inout = benc = "3:boat";
|
||||
value = tr_bencParseStr(&inout);
|
||||
value = ParseString(&inout);
|
||||
EXPECT_TRUE(value);
|
||||
EXPECT_EQ("boa"sv, *value);
|
||||
EXPECT_EQ(std::data(benc) + benc.find('t'), std::data(inout));
|
||||
|
@ -274,6 +279,7 @@ TEST_F(VariantTest, bencParseAndReencode)
|
|||
tr_variant val;
|
||||
char const* end = nullptr;
|
||||
auto const is_good = tr_variantFromBuf(&val, TR_VARIANT_PARSE_BENC | TR_VARIANT_PARSE_INPLACE, test.benc, &end);
|
||||
|
||||
EXPECT_EQ(test.is_good, is_good);
|
||||
if (is_good)
|
||||
{
|
||||
|
@ -418,17 +424,16 @@ TEST_F(VariantTest, stackSmash)
|
|||
int constexpr Depth = STACK_SMASH_DEPTH;
|
||||
std::string const in = std::string(Depth, 'l') + std::string(Depth, 'e');
|
||||
|
||||
// confirm that it parses
|
||||
// confirm that it fails instead of crashing
|
||||
char const* end;
|
||||
tr_variant val;
|
||||
auto ok = tr_variantFromBuf(&val, TR_VARIANT_PARSE_BENC | TR_VARIANT_PARSE_INPLACE, in, &end);
|
||||
EXPECT_TRUE(ok);
|
||||
EXPECT_EQ(in.data() + in.size(), end);
|
||||
tr_error* error = nullptr;
|
||||
auto ok = tr_variantFromBuf(&val, TR_VARIANT_PARSE_BENC | TR_VARIANT_PARSE_INPLACE, in, &end, &error);
|
||||
EXPECT_NE(nullptr, error);
|
||||
EXPECT_EQ(E2BIG, error->code);
|
||||
EXPECT_FALSE(ok);
|
||||
|
||||
// confirm that we can serialize it back again
|
||||
EXPECT_EQ(in, tr_variantToStr(&val, TR_VARIANT_FMT_BENC));
|
||||
|
||||
tr_variantFree(&val);
|
||||
tr_error_clear(&error);
|
||||
}
|
||||
|
||||
TEST_F(VariantTest, boolAndIntRecast)
|
||||
|
|
Loading…
Reference in New Issue