mirror of
https://github.com/transmission/transmission
synced 2024-12-29 02:56:11 +00:00
refactor: don't use tr_free in tr_strvUtf8Clean() (#3658)
This commit is contained in:
parent
fecdc418e5
commit
6b25a57899
4 changed files with 50 additions and 46 deletions
|
@ -321,11 +321,11 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
|
||||||
}
|
}
|
||||||
else if (pathIs(CommentKey) || pathIs(CommentUtf8Key))
|
else if (pathIs(CommentKey) || pathIs(CommentUtf8Key))
|
||||||
{
|
{
|
||||||
tr_strvUtf8Clean(value, tm_.comment_);
|
tm_.comment_ = tr_strvUtf8Clean(value);
|
||||||
}
|
}
|
||||||
else if (pathIs(CreatedByKey) || pathIs(CreatedByUtf8Key))
|
else if (pathIs(CreatedByKey) || pathIs(CreatedByUtf8Key))
|
||||||
{
|
{
|
||||||
tr_strvUtf8Clean(value, tm_.creator_);
|
tm_.creator_ = tr_strvUtf8Clean(value);
|
||||||
}
|
}
|
||||||
else if (pathIs(SourceKey) || pathIs(InfoKey, SourceKey) || pathIs(PublisherKey) || pathIs(InfoKey, PublisherKey))
|
else if (pathIs(SourceKey) || pathIs(InfoKey, SourceKey) || pathIs(PublisherKey) || pathIs(InfoKey, PublisherKey))
|
||||||
{
|
{
|
||||||
|
@ -333,7 +333,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
|
||||||
// to have the same use as the 'source' key
|
// to have the same use as the 'source' key
|
||||||
// http://wiki.bitcomet.com/inside_bitcomet
|
// http://wiki.bitcomet.com/inside_bitcomet
|
||||||
|
|
||||||
tr_strvUtf8Clean(value, tm_.source_);
|
tm_.source_ = tr_strvUtf8Clean(value);
|
||||||
}
|
}
|
||||||
else if (pathIs(AnnounceKey))
|
else if (pathIs(AnnounceKey))
|
||||||
{
|
{
|
||||||
|
@ -349,7 +349,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
|
||||||
}
|
}
|
||||||
else if (pathIs(InfoKey, NameKey) || pathIs(InfoKey, NameUtf8Key))
|
else if (pathIs(InfoKey, NameKey) || pathIs(InfoKey, NameUtf8Key))
|
||||||
{
|
{
|
||||||
tr_strvUtf8Clean(value, tm_.name_);
|
tm_.name_ = tr_strvUtf8Clean(value);
|
||||||
}
|
}
|
||||||
else if (pathIs(InfoKey, PiecesKey))
|
else if (pathIs(InfoKey, PiecesKey))
|
||||||
{
|
{
|
||||||
|
|
|
@ -390,22 +390,19 @@ bool tr_utf8_validate(std::string_view sv, char const** good_end)
|
||||||
return all_good;
|
return all_good;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char* strip_non_utf8(std::string_view sv)
|
static std::string strip_non_utf8(std::string_view sv)
|
||||||
{
|
{
|
||||||
auto* const ret = tr_new(char, std::size(sv) + 1);
|
auto out = std::string{};
|
||||||
if (ret != nullptr)
|
utf8::unchecked::replace_invalid(std::data(sv), std::data(sv) + std::size(sv), std::back_inserter(out), '?');
|
||||||
{
|
return out;
|
||||||
auto const it = utf8::unchecked::replace_invalid(std::data(sv), std::data(sv) + std::size(sv), ret, '?');
|
|
||||||
*it = '\0';
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static char* to_utf8(std::string_view sv)
|
static std::string to_utf8(std::string_view sv)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_ICONV
|
#ifdef HAVE_ICONV
|
||||||
size_t const buflen = std::size(sv) * 4 + 10;
|
size_t const buflen = std::size(sv) * 4 + 10;
|
||||||
auto* const out = tr_new(char, buflen);
|
auto buf = std::vector<char>{};
|
||||||
|
buf.resize(buflen);
|
||||||
|
|
||||||
auto constexpr Encodings = std::array<char const*, 2>{ "CURRENT", "ISO-8859-15" };
|
auto constexpr Encodings = std::array<char const*, 2>{ "CURRENT", "ISO-8859-15" };
|
||||||
for (auto const* test_encoding : Encodings)
|
for (auto const* test_encoding : Encodings)
|
||||||
|
@ -421,40 +418,30 @@ static char* to_utf8(std::string_view sv)
|
||||||
#else
|
#else
|
||||||
auto* inbuf = const_cast<char*>(std::data(sv));
|
auto* inbuf = const_cast<char*>(std::data(sv));
|
||||||
#endif
|
#endif
|
||||||
char* outbuf = out;
|
|
||||||
size_t inbytesleft = std::size(sv);
|
size_t inbytesleft = std::size(sv);
|
||||||
size_t outbytesleft = buflen;
|
char* out = std::data(buf);
|
||||||
auto const rv = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
size_t outbytesleft = std::size(buf);
|
||||||
|
auto const rv = iconv(cd, &inbuf, &inbytesleft, &out, &outbytesleft);
|
||||||
iconv_close(cd);
|
iconv_close(cd);
|
||||||
if (rv != size_t(-1))
|
if (rv != size_t(-1))
|
||||||
{
|
{
|
||||||
char* const ret = tr_strvDup({ out, buflen - outbytesleft });
|
return std::string{ std::data(buf), buflen - outbytesleft };
|
||||||
tr_free(out);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tr_free(out);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return strip_non_utf8(sv);
|
return strip_non_utf8(sv);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string& tr_strvUtf8Clean(std::string_view cleanme, std::string& setme)
|
std::string tr_strvUtf8Clean(std::string_view cleanme)
|
||||||
{
|
{
|
||||||
if (tr_utf8_validate(cleanme, nullptr))
|
if (tr_utf8_validate(cleanme, nullptr))
|
||||||
{
|
{
|
||||||
setme = cleanme;
|
return std::string{ cleanme };
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto* const tmp = to_utf8(cleanme);
|
|
||||||
setme.assign(tmp != nullptr ? tmp : "");
|
|
||||||
tr_free(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return setme;
|
return to_utf8(cleanme);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
|
@ -227,7 +227,16 @@ constexpr bool tr_strvSep(std::string_view* sv, std::string_view* token, char de
|
||||||
|
|
||||||
[[nodiscard]] char* tr_strvDup(std::string_view) TR_GNUC_MALLOC;
|
[[nodiscard]] char* tr_strvDup(std::string_view) TR_GNUC_MALLOC;
|
||||||
|
|
||||||
std::string& tr_strvUtf8Clean(std::string_view cleanme, std::string& setme);
|
[[nodiscard]] std::string tr_strvUtf8Clean(std::string_view cleanme);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief copies `src` into `buf`.
|
||||||
|
*
|
||||||
|
* - Always returns std::size(src).
|
||||||
|
* - `src` will be copied into `buf` iff `buflen >= std::size(src)`
|
||||||
|
* - `buf` will also be zero terminated iff `buflen >= std::size(src) + 1`.
|
||||||
|
*/
|
||||||
|
size_t tr_strvToBuf(std::string_view src, char* buf, size_t buflen);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief copies `src` into `buf`.
|
* @brief copies `src` into `buf`.
|
||||||
|
|
|
@ -124,45 +124,53 @@ TEST_F(UtilsTest, trStrvDup)
|
||||||
TEST_F(UtilsTest, trStrvUtf8Clean)
|
TEST_F(UtilsTest, trStrvUtf8Clean)
|
||||||
{
|
{
|
||||||
auto in = "hello world"sv;
|
auto in = "hello world"sv;
|
||||||
auto out = std::string{};
|
auto out = tr_strvUtf8Clean(in);
|
||||||
tr_strvUtf8Clean(in, out);
|
|
||||||
EXPECT_EQ(in, out);
|
EXPECT_EQ(in, out);
|
||||||
|
|
||||||
in = "hello world"sv;
|
in = "hello world"sv;
|
||||||
tr_strvUtf8Clean(in.substr(0, 5), out);
|
out = tr_strvUtf8Clean(in.substr(0, 5));
|
||||||
EXPECT_EQ("hello"sv, out);
|
EXPECT_EQ("hello"sv, out);
|
||||||
|
|
||||||
// this version is not utf-8 (but cp866)
|
// this version is not utf-8 (but cp866)
|
||||||
in = "\x92\xE0\xE3\xA4\xAD\xAE \xA1\xEB\xE2\xEC \x81\xAE\xA3\xAE\xAC"sv;
|
in = "\x92\xE0\xE3\xA4\xAD\xAE \xA1\xEB\xE2\xEC \x81\xAE\xA3\xAE\xAC"sv;
|
||||||
tr_strvUtf8Clean(in, out);
|
out = tr_strvUtf8Clean(in);
|
||||||
EXPECT_TRUE(std::size(out) == 17 || std::size(out) == 33);
|
EXPECT_TRUE(std::size(out) == 17 || std::size(out) == 33);
|
||||||
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
||||||
|
|
||||||
// same string, but utf-8 clean
|
// same string, but utf-8 clean
|
||||||
in = "Трудно быть Богом"sv;
|
in = "Трудно быть Богом"sv;
|
||||||
tr_strvUtf8Clean(in, out);
|
out = tr_strvUtf8Clean(in);
|
||||||
EXPECT_NE(nullptr, out.data());
|
EXPECT_NE(0U, std::size(out));
|
||||||
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
||||||
EXPECT_EQ(in, out);
|
EXPECT_EQ(in, out);
|
||||||
|
|
||||||
// https://trac.transmissionbt.com/ticket/6064
|
// https://trac.transmissionbt.com/ticket/6064
|
||||||
// TODO(anyone): It seems like that bug was not fixed so much as we just
|
// This was a fuzzer-generated string that crashed Transmission.
|
||||||
// let strlen() solve the problem for us; however, it's probably better
|
// Even invalid strings shouldn't cause a crash.
|
||||||
// to wait until https://github.com/transmission/transmission/issues/612
|
|
||||||
// is resolved before revisiting this.
|
|
||||||
in = "\xF4\x00\x81\x82"sv;
|
in = "\xF4\x00\x81\x82"sv;
|
||||||
tr_strvUtf8Clean(in, out);
|
out = tr_strvUtf8Clean(in);
|
||||||
EXPECT_NE(nullptr, out.data());
|
EXPECT_NE(0U, std::size(out));
|
||||||
EXPECT_TRUE(out.size() == 1 || out.size() == 2);
|
|
||||||
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
||||||
|
|
||||||
in = "\xF4\x33\x81\x82"sv;
|
in = "\xF4\x33\x81\x82"sv;
|
||||||
tr_strvUtf8Clean(in, out);
|
out = tr_strvUtf8Clean(in);
|
||||||
EXPECT_NE(nullptr, out.data());
|
EXPECT_NE(nullptr, out.data());
|
||||||
EXPECT_TRUE(out.size() == 4 || out.size() == 7);
|
EXPECT_TRUE(out.size() == 4 || out.size() == 7);
|
||||||
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(UtilsTest, trStrvUtf8CleanFuzz)
|
||||||
|
{
|
||||||
|
auto buf = std::vector<char>{};
|
||||||
|
for (size_t i = 0; i < 1000; ++i)
|
||||||
|
{
|
||||||
|
buf.resize(tr_rand_int(4096));
|
||||||
|
tr_rand_buffer(std::data(buf), std::size(buf));
|
||||||
|
auto const out = tr_strvUtf8Clean({ std::data(buf), std::size(buf) });
|
||||||
|
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(UtilsTest, trParseNumberRange)
|
TEST_F(UtilsTest, trParseNumberRange)
|
||||||
{
|
{
|
||||||
auto const tostring = [](std::vector<int> const& v)
|
auto const tostring = [](std::vector<int> const& v)
|
||||||
|
|
Loading…
Reference in a new issue