1
0
Fork 0
mirror of https://github.com/transmission/transmission synced 2024-12-28 10:38:45 +00:00

refactor: don't use tr_free in tr_strvUtf8Clean() (#3658)

This commit is contained in:
Charles Kerr 2022-08-16 21:47:07 -05:00 committed by GitHub
parent fecdc418e5
commit 6b25a57899
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 46 deletions

View file

@ -321,11 +321,11 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
}
else if (pathIs(CommentKey) || pathIs(CommentUtf8Key))
{
tr_strvUtf8Clean(value, tm_.comment_);
tm_.comment_ = tr_strvUtf8Clean(value);
}
else if (pathIs(CreatedByKey) || pathIs(CreatedByUtf8Key))
{
tr_strvUtf8Clean(value, tm_.creator_);
tm_.creator_ = tr_strvUtf8Clean(value);
}
else if (pathIs(SourceKey) || pathIs(InfoKey, SourceKey) || pathIs(PublisherKey) || pathIs(InfoKey, PublisherKey))
{
@ -333,7 +333,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
// to have the same use as the 'source' key
// http://wiki.bitcomet.com/inside_bitcomet
tr_strvUtf8Clean(value, tm_.source_);
tm_.source_ = tr_strvUtf8Clean(value);
}
else if (pathIs(AnnounceKey))
{
@ -349,7 +349,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
}
else if (pathIs(InfoKey, NameKey) || pathIs(InfoKey, NameUtf8Key))
{
tr_strvUtf8Clean(value, tm_.name_);
tm_.name_ = tr_strvUtf8Clean(value);
}
else if (pathIs(InfoKey, PiecesKey))
{

View file

@ -390,22 +390,19 @@ bool tr_utf8_validate(std::string_view sv, char const** good_end)
return all_good;
}
static char* strip_non_utf8(std::string_view sv)
static std::string strip_non_utf8(std::string_view sv)
{
auto* const ret = tr_new(char, std::size(sv) + 1);
if (ret != nullptr)
{
auto const it = utf8::unchecked::replace_invalid(std::data(sv), std::data(sv) + std::size(sv), ret, '?');
*it = '\0';
}
return ret;
auto out = std::string{};
utf8::unchecked::replace_invalid(std::data(sv), std::data(sv) + std::size(sv), std::back_inserter(out), '?');
return out;
}
static char* to_utf8(std::string_view sv)
static std::string to_utf8(std::string_view sv)
{
#ifdef HAVE_ICONV
size_t const buflen = std::size(sv) * 4 + 10;
auto* const out = tr_new(char, buflen);
auto buf = std::vector<char>{};
buf.resize(buflen);
auto constexpr Encodings = std::array<char const*, 2>{ "CURRENT", "ISO-8859-15" };
for (auto const* test_encoding : Encodings)
@ -421,40 +418,30 @@ static char* to_utf8(std::string_view sv)
#else
auto* inbuf = const_cast<char*>(std::data(sv));
#endif
char* outbuf = out;
size_t inbytesleft = std::size(sv);
size_t outbytesleft = buflen;
auto const rv = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
char* out = std::data(buf);
size_t outbytesleft = std::size(buf);
auto const rv = iconv(cd, &inbuf, &inbytesleft, &out, &outbytesleft);
iconv_close(cd);
if (rv != size_t(-1))
{
char* const ret = tr_strvDup({ out, buflen - outbytesleft });
tr_free(out);
return ret;
return std::string{ std::data(buf), buflen - outbytesleft };
}
}
tr_free(out);
#endif
return strip_non_utf8(sv);
}
std::string& tr_strvUtf8Clean(std::string_view cleanme, std::string& setme)
std::string tr_strvUtf8Clean(std::string_view cleanme)
{
if (tr_utf8_validate(cleanme, nullptr))
{
setme = cleanme;
}
else
{
auto* const tmp = to_utf8(cleanme);
setme.assign(tmp != nullptr ? tmp : "");
tr_free(tmp);
return std::string{ cleanme };
}
return setme;
return to_utf8(cleanme);
}
#ifdef _WIN32

View file

@ -227,7 +227,16 @@ constexpr bool tr_strvSep(std::string_view* sv, std::string_view* token, char de
[[nodiscard]] char* tr_strvDup(std::string_view) TR_GNUC_MALLOC;
std::string& tr_strvUtf8Clean(std::string_view cleanme, std::string& setme);
[[nodiscard]] std::string tr_strvUtf8Clean(std::string_view cleanme);
/**
* @brief copies `src` into `buf`.
*
* - Always returns std::size(src).
* - `src` will be copied into `buf` iff `buflen >= std::size(src)`
* - `buf` will also be zero terminated iff `buflen >= std::size(src) + 1`.
*/
size_t tr_strvToBuf(std::string_view src, char* buf, size_t buflen);
/**
* @brief copies `src` into `buf`.

View file

@ -124,45 +124,53 @@ TEST_F(UtilsTest, trStrvDup)
TEST_F(UtilsTest, trStrvUtf8Clean)
{
auto in = "hello world"sv;
auto out = std::string{};
tr_strvUtf8Clean(in, out);
auto out = tr_strvUtf8Clean(in);
EXPECT_EQ(in, out);
in = "hello world"sv;
tr_strvUtf8Clean(in.substr(0, 5), out);
out = tr_strvUtf8Clean(in.substr(0, 5));
EXPECT_EQ("hello"sv, out);
// this version is not utf-8 (but cp866)
in = "\x92\xE0\xE3\xA4\xAD\xAE \xA1\xEB\xE2\xEC \x81\xAE\xA3\xAE\xAC"sv;
tr_strvUtf8Clean(in, out);
out = tr_strvUtf8Clean(in);
EXPECT_TRUE(std::size(out) == 17 || std::size(out) == 33);
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
// same string, but utf-8 clean
in = "Трудно быть Богом"sv;
tr_strvUtf8Clean(in, out);
EXPECT_NE(nullptr, out.data());
out = tr_strvUtf8Clean(in);
EXPECT_NE(0U, std::size(out));
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
EXPECT_EQ(in, out);
// https://trac.transmissionbt.com/ticket/6064
// TODO(anyone): It seems like that bug was not fixed so much as we just
// let strlen() solve the problem for us; however, it's probably better
// to wait until https://github.com/transmission/transmission/issues/612
// is resolved before revisiting this.
// This was a fuzzer-generated string that crashed Transmission.
// Even invalid strings shouldn't cause a crash.
in = "\xF4\x00\x81\x82"sv;
tr_strvUtf8Clean(in, out);
EXPECT_NE(nullptr, out.data());
EXPECT_TRUE(out.size() == 1 || out.size() == 2);
out = tr_strvUtf8Clean(in);
EXPECT_NE(0U, std::size(out));
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
in = "\xF4\x33\x81\x82"sv;
tr_strvUtf8Clean(in, out);
out = tr_strvUtf8Clean(in);
EXPECT_NE(nullptr, out.data());
EXPECT_TRUE(out.size() == 4 || out.size() == 7);
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
}
TEST_F(UtilsTest, trStrvUtf8CleanFuzz)
{
auto buf = std::vector<char>{};
for (size_t i = 0; i < 1000; ++i)
{
buf.resize(tr_rand_int(4096));
tr_rand_buffer(std::data(buf), std::size(buf));
auto const out = tr_strvUtf8Clean({ std::data(buf), std::size(buf) });
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
}
}
TEST_F(UtilsTest, trParseNumberRange)
{
auto const tostring = [](std::vector<int> const& v)