1
0
Fork 0
mirror of https://github.com/transmission/transmission synced 2024-12-31 20:16:57 +00:00

refactor: don't use tr_free in tr_strvUtf8Clean() (#3658)

This commit is contained in:
Charles Kerr 2022-08-16 21:47:07 -05:00 committed by GitHub
parent fecdc418e5
commit 6b25a57899
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 46 deletions

View file

@ -321,11 +321,11 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
} }
else if (pathIs(CommentKey) || pathIs(CommentUtf8Key)) else if (pathIs(CommentKey) || pathIs(CommentUtf8Key))
{ {
tr_strvUtf8Clean(value, tm_.comment_); tm_.comment_ = tr_strvUtf8Clean(value);
} }
else if (pathIs(CreatedByKey) || pathIs(CreatedByUtf8Key)) else if (pathIs(CreatedByKey) || pathIs(CreatedByUtf8Key))
{ {
tr_strvUtf8Clean(value, tm_.creator_); tm_.creator_ = tr_strvUtf8Clean(value);
} }
else if (pathIs(SourceKey) || pathIs(InfoKey, SourceKey) || pathIs(PublisherKey) || pathIs(InfoKey, PublisherKey)) else if (pathIs(SourceKey) || pathIs(InfoKey, SourceKey) || pathIs(PublisherKey) || pathIs(InfoKey, PublisherKey))
{ {
@ -333,7 +333,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
// to have the same use as the 'source' key // to have the same use as the 'source' key
// http://wiki.bitcomet.com/inside_bitcomet // http://wiki.bitcomet.com/inside_bitcomet
tr_strvUtf8Clean(value, tm_.source_); tm_.source_ = tr_strvUtf8Clean(value);
} }
else if (pathIs(AnnounceKey)) else if (pathIs(AnnounceKey))
{ {
@ -349,7 +349,7 @@ struct MetainfoHandler final : public transmission::benc::BasicHandler<MaxBencDe
} }
else if (pathIs(InfoKey, NameKey) || pathIs(InfoKey, NameUtf8Key)) else if (pathIs(InfoKey, NameKey) || pathIs(InfoKey, NameUtf8Key))
{ {
tr_strvUtf8Clean(value, tm_.name_); tm_.name_ = tr_strvUtf8Clean(value);
} }
else if (pathIs(InfoKey, PiecesKey)) else if (pathIs(InfoKey, PiecesKey))
{ {

View file

@ -390,22 +390,19 @@ bool tr_utf8_validate(std::string_view sv, char const** good_end)
return all_good; return all_good;
} }
static char* strip_non_utf8(std::string_view sv) static std::string strip_non_utf8(std::string_view sv)
{ {
auto* const ret = tr_new(char, std::size(sv) + 1); auto out = std::string{};
if (ret != nullptr) utf8::unchecked::replace_invalid(std::data(sv), std::data(sv) + std::size(sv), std::back_inserter(out), '?');
{ return out;
auto const it = utf8::unchecked::replace_invalid(std::data(sv), std::data(sv) + std::size(sv), ret, '?');
*it = '\0';
}
return ret;
} }
static char* to_utf8(std::string_view sv) static std::string to_utf8(std::string_view sv)
{ {
#ifdef HAVE_ICONV #ifdef HAVE_ICONV
size_t const buflen = std::size(sv) * 4 + 10; size_t const buflen = std::size(sv) * 4 + 10;
auto* const out = tr_new(char, buflen); auto buf = std::vector<char>{};
buf.resize(buflen);
auto constexpr Encodings = std::array<char const*, 2>{ "CURRENT", "ISO-8859-15" }; auto constexpr Encodings = std::array<char const*, 2>{ "CURRENT", "ISO-8859-15" };
for (auto const* test_encoding : Encodings) for (auto const* test_encoding : Encodings)
@ -421,40 +418,30 @@ static char* to_utf8(std::string_view sv)
#else #else
auto* inbuf = const_cast<char*>(std::data(sv)); auto* inbuf = const_cast<char*>(std::data(sv));
#endif #endif
char* outbuf = out;
size_t inbytesleft = std::size(sv); size_t inbytesleft = std::size(sv);
size_t outbytesleft = buflen; char* out = std::data(buf);
auto const rv = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); size_t outbytesleft = std::size(buf);
auto const rv = iconv(cd, &inbuf, &inbytesleft, &out, &outbytesleft);
iconv_close(cd); iconv_close(cd);
if (rv != size_t(-1)) if (rv != size_t(-1))
{ {
char* const ret = tr_strvDup({ out, buflen - outbytesleft }); return std::string{ std::data(buf), buflen - outbytesleft };
tr_free(out);
return ret;
} }
} }
tr_free(out);
#endif #endif
return strip_non_utf8(sv); return strip_non_utf8(sv);
} }
std::string& tr_strvUtf8Clean(std::string_view cleanme, std::string& setme) std::string tr_strvUtf8Clean(std::string_view cleanme)
{ {
if (tr_utf8_validate(cleanme, nullptr)) if (tr_utf8_validate(cleanme, nullptr))
{ {
setme = cleanme; return std::string{ cleanme };
}
else
{
auto* const tmp = to_utf8(cleanme);
setme.assign(tmp != nullptr ? tmp : "");
tr_free(tmp);
} }
return setme; return to_utf8(cleanme);
} }
#ifdef _WIN32 #ifdef _WIN32

View file

@ -227,7 +227,16 @@ constexpr bool tr_strvSep(std::string_view* sv, std::string_view* token, char de
[[nodiscard]] char* tr_strvDup(std::string_view) TR_GNUC_MALLOC; [[nodiscard]] char* tr_strvDup(std::string_view) TR_GNUC_MALLOC;
std::string& tr_strvUtf8Clean(std::string_view cleanme, std::string& setme); [[nodiscard]] std::string tr_strvUtf8Clean(std::string_view cleanme);
/**
* @brief copies `src` into `buf`.
*
* - Always returns std::size(src).
* - `src` will be copied into `buf` iff `buflen >= std::size(src)`
* - `buf` will also be zero terminated iff `buflen >= std::size(src) + 1`.
*/
size_t tr_strvToBuf(std::string_view src, char* buf, size_t buflen);
/** /**
* @brief copies `src` into `buf`. * @brief copies `src` into `buf`.

View file

@ -124,45 +124,53 @@ TEST_F(UtilsTest, trStrvDup)
TEST_F(UtilsTest, trStrvUtf8Clean) TEST_F(UtilsTest, trStrvUtf8Clean)
{ {
auto in = "hello world"sv; auto in = "hello world"sv;
auto out = std::string{}; auto out = tr_strvUtf8Clean(in);
tr_strvUtf8Clean(in, out);
EXPECT_EQ(in, out); EXPECT_EQ(in, out);
in = "hello world"sv; in = "hello world"sv;
tr_strvUtf8Clean(in.substr(0, 5), out); out = tr_strvUtf8Clean(in.substr(0, 5));
EXPECT_EQ("hello"sv, out); EXPECT_EQ("hello"sv, out);
// this version is not utf-8 (but cp866) // this version is not utf-8 (but cp866)
in = "\x92\xE0\xE3\xA4\xAD\xAE \xA1\xEB\xE2\xEC \x81\xAE\xA3\xAE\xAC"sv; in = "\x92\xE0\xE3\xA4\xAD\xAE \xA1\xEB\xE2\xEC \x81\xAE\xA3\xAE\xAC"sv;
tr_strvUtf8Clean(in, out); out = tr_strvUtf8Clean(in);
EXPECT_TRUE(std::size(out) == 17 || std::size(out) == 33); EXPECT_TRUE(std::size(out) == 17 || std::size(out) == 33);
EXPECT_TRUE(tr_utf8_validate(out, nullptr)); EXPECT_TRUE(tr_utf8_validate(out, nullptr));
// same string, but utf-8 clean // same string, but utf-8 clean
in = "Трудно быть Богом"sv; in = "Трудно быть Богом"sv;
tr_strvUtf8Clean(in, out); out = tr_strvUtf8Clean(in);
EXPECT_NE(nullptr, out.data()); EXPECT_NE(0U, std::size(out));
EXPECT_TRUE(tr_utf8_validate(out, nullptr)); EXPECT_TRUE(tr_utf8_validate(out, nullptr));
EXPECT_EQ(in, out); EXPECT_EQ(in, out);
// https://trac.transmissionbt.com/ticket/6064 // https://trac.transmissionbt.com/ticket/6064
// TODO(anyone): It seems like that bug was not fixed so much as we just // This was a fuzzer-generated string that crashed Transmission.
// let strlen() solve the problem for us; however, it's probably better // Even invalid strings shouldn't cause a crash.
// to wait until https://github.com/transmission/transmission/issues/612
// is resolved before revisiting this.
in = "\xF4\x00\x81\x82"sv; in = "\xF4\x00\x81\x82"sv;
tr_strvUtf8Clean(in, out); out = tr_strvUtf8Clean(in);
EXPECT_NE(nullptr, out.data()); EXPECT_NE(0U, std::size(out));
EXPECT_TRUE(out.size() == 1 || out.size() == 2);
EXPECT_TRUE(tr_utf8_validate(out, nullptr)); EXPECT_TRUE(tr_utf8_validate(out, nullptr));
in = "\xF4\x33\x81\x82"sv; in = "\xF4\x33\x81\x82"sv;
tr_strvUtf8Clean(in, out); out = tr_strvUtf8Clean(in);
EXPECT_NE(nullptr, out.data()); EXPECT_NE(nullptr, out.data());
EXPECT_TRUE(out.size() == 4 || out.size() == 7); EXPECT_TRUE(out.size() == 4 || out.size() == 7);
EXPECT_TRUE(tr_utf8_validate(out, nullptr)); EXPECT_TRUE(tr_utf8_validate(out, nullptr));
} }
TEST_F(UtilsTest, trStrvUtf8CleanFuzz)
{
auto buf = std::vector<char>{};
for (size_t i = 0; i < 1000; ++i)
{
buf.resize(tr_rand_int(4096));
tr_rand_buffer(std::data(buf), std::size(buf));
auto const out = tr_strvUtf8Clean({ std::data(buf), std::size(buf) });
EXPECT_TRUE(tr_utf8_validate(out, nullptr));
}
}
TEST_F(UtilsTest, trParseNumberRange) TEST_F(UtilsTest, trParseNumberRange)
{ {
auto const tostring = [](std::vector<int> const& v) auto const tostring = [](std::vector<int> const& v)