perf: faster detection of invalid filenames (#3126)

* refactor: move file_info into tr_torrent_files

* chore: rename files-test as torrent-files-test.cc

* test: add tests for tr_torrent_files::isSubpathPortable()
This commit is contained in:
Charles Kerr 2022-05-23 17:53:26 -05:00 committed by GitHub
parent 0f29958751
commit 19db28c04d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 176 additions and 124 deletions

View File

@ -271,8 +271,6 @@
A2FB701C0D95CAEA0001F331 /* GroupsController.mm in Sources */ = {isa = PBXBuildFile; fileRef = A2FB701B0D95CAEA0001F331 /* GroupsController.mm */; };
A47A7C87B8B57BE50DF0D410 /* torrent-files.cc in Sources */ = {isa = PBXBuildFile; fileRef = A47A7C87B8B57BE50DF0D411 /* torrent-files.cc */; };
A47A7C87B8B57BE50DF0D412 /* torrent-files.h in Headers */ = {isa = PBXBuildFile; fileRef = A47A7C87B8B57BE50DF0D413 /* torrent-files.h */; };
ACBE7A956ED89682EC4460E0 /* file-info.cc in Sources */ = {isa = PBXBuildFile; fileRef = ACBE7A956ED89682EC4460E1 /* file-info.cc */; };
ACBE7A956ED89682EC4460E2 /* file-info.h in Headers */ = {isa = PBXBuildFile; fileRef = ACBE7A956ED89682EC4460E3 /* file-info.h */; settings = {ATTRIBUTES = (Project, ); }; };
BE1183580CE160C50002D0F3 /* miniupnpc_declspec.h in Headers */ = {isa = PBXBuildFile; fileRef = BE11834E0CE160C50002D0F3 /* miniupnpc_declspec.h */; };
BE1183590CE160C50002D0F3 /* igd_desc_parse.h in Headers */ = {isa = PBXBuildFile; fileRef = BE11834F0CE160C50002D0F3 /* igd_desc_parse.h */; };
BE11835A0CE160C50002D0F3 /* minixml.h in Headers */ = {isa = PBXBuildFile; fileRef = BE1183500CE160C50002D0F3 /* minixml.h */; };
@ -1045,8 +1043,6 @@
A47A7C87B8B57BE50DF0D411 /* torrent-files.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "torrent-files.cc"; sourceTree = "<group>"; };
A47A7C87B8B57BE50DF0D413 /* torrent-files.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "torrent-files.h"; sourceTree = "<group>"; };
A54D44C6A7AAF131D9AE29F5 /* block-info.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "block-info.cc"; sourceTree = "<group>"; };
ACBE7A956ED89682EC4460E1 /* file-info.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "file-info.cc"; sourceTree = "<group>"; };
ACBE7A956ED89682EC4460E3 /* file-info.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "file-info.h"; sourceTree = "<group>"; };
BE1183480CE160960002D0F3 /* libminiupnp.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libminiupnp.a; sourceTree = BUILT_PRODUCTS_DIR; };
BE11834E0CE160C50002D0F3 /* miniupnpc_declspec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = miniupnpc_declspec.h; sourceTree = "<group>"; };
BE11834F0CE160C50002D0F3 /* igd_desc_parse.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = igd_desc_parse.h; sourceTree = "<group>"; };
@ -1603,8 +1599,6 @@
children = (
A54D44C6A7AAF131D9AE29F5 /* block-info.cc */,
6A044CBD8C049AFCBD4DB411 /* block-info.h */,
ACBE7A956ED89682EC4460E1 /* file-info.cc */,
ACBE7A956ED89682EC4460E3 /* file-info.h */,
E23B55A5FC3B557F7746D511 /* interned-string.h */,
C17740D3273A002C00E455D2 /* web-utils.cc */,
C17740D4273A002C00E455D2 /* web-utils.h */,
@ -2234,7 +2228,6 @@
A2AF23C916B44FA0003BC59E /* log.h in Headers */,
A23FAE55178BC2950053DC5B /* platform-quota.h in Headers */,
F11545ACA7C4D7A464F703AB /* block-info.h in Headers */,
ACBE7A956ED89682EC4460E2 /* file-info.h in Headers */,
E23B55A5FC3B557F7746D510 /* interned-string.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -2994,7 +2987,6 @@
A2AF23C816B44FA0003BC59E /* log.cc in Sources */,
A23FAE54178BC2950053DC5B /* platform-quota.cc in Sources */,
62F644738FE3D8788EBF73A9 /* block-info.cc in Sources */,
ACBE7A956ED89682EC4460E0 /* file-info.cc in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -25,7 +25,6 @@ set(PROJECT_FILES
crypto-utils.cc
crypto.cc
error.cc
file-info.cc
file-piece-map.cc
file-posix.cc
file-win32.cc
@ -168,7 +167,6 @@ set(${PROJECT_NAME}_PRIVATE_HEADERS
completion.h
crypto-utils.h
crypto.h
file-info.h
file-piece-map.h
handshake.h
history.h

View File

@ -1,83 +0,0 @@
// This file Copyright © 2019-2022 Mnemosyne LLC.
// It may be used under GPLv2 (SPDX: GPL-2.0-only), GPLv3 (SPDX: GPL-3.0-only),
// or any future license endorsed by Mnemosyne LLC.
// License text can be found in the licenses/ folder.
#include <algorithm>
#include <iterator>
#include <string>
#include <string_view>
#include <event2/util.h> // evutil_ascii_strncasecmp
#include "file-info.h"
#include "utils.h"
using namespace std::literals;
static void appendSanitizedComponent(std::string& out, std::string_view in)
{
// remove leading spaces
auto constexpr leading_test = [](unsigned char ch)
{
return isspace(ch);
};
auto const it = std::find_if_not(std::begin(in), std::end(in), leading_test);
in.remove_prefix(std::distance(std::begin(in), it));
// remove trailing spaces and '.'
auto constexpr trailing_test = [](unsigned char ch)
{
return (isspace(ch) != 0) || ch == '.';
};
auto const rit = std::find_if_not(std::rbegin(in), std::rend(in), trailing_test);
in.remove_suffix(std::distance(std::rbegin(in), rit));
// munge banned characters
// https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file
auto constexpr ensure_legal_char = [](auto ch)
{
auto constexpr Banned = std::string_view{ "<>:\"/\\|?*" };
auto const banned = Banned.find(ch) != std::string_view::npos || (unsigned char)ch < 0x20;
return banned ? '_' : ch;
};
auto const old_out_len = std::size(out);
std::transform(std::begin(in), std::end(in), std::back_inserter(out), ensure_legal_char);
// munge banned filenames
// https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file
auto constexpr ReservedNames = std::array<std::string_view, 22>{
"CON"sv, "PRN"sv, "AUX"sv, "NUL"sv, "COM1"sv, "COM2"sv, "COM3"sv, "COM4"sv, "COM5"sv, "COM6"sv, "COM7"sv,
"COM8"sv, "COM9"sv, "LPT1"sv, "LPT2"sv, "LPT3"sv, "LPT4"sv, "LPT5"sv, "LPT6"sv, "LPT7"sv, "LPT8"sv, "LPT9"sv,
};
for (auto const& name : ReservedNames)
{
size_t const name_len = std::size(name);
if (evutil_ascii_strncasecmp(out.c_str() + old_out_len, std::data(name), name_len) != 0 ||
(out[old_out_len + name_len] != '\0' && out[old_out_len + name_len] != '.'))
{
continue;
}
out.insert(std::begin(out) + old_out_len + name_len, '_');
break;
}
}
std::string tr_file_info::sanitizePath(std::string_view in)
{
auto out = std::string{};
auto segment = std::string_view{};
while (tr_strvSep(&in, &segment, '/'))
{
appendSanitizedComponent(out, segment);
out += '/';
}
if (!std::empty(out)) // remove trailing slash
{
out.resize(std::size(out) - 1);
}
return out;
}

View File

@ -1,19 +0,0 @@
// This file Copyright © 2021-2022 Mnemosyne LLC.
// It may be used under GPLv2 (SPDX: GPL-2.0), GPLv3 (SPDX: GPL-3.0),
// or any future license endorsed by Mnemosyne LLC.
// License text can be found in the licenses/ folder.
#pragma once
#include <string>
#include <string_view>
struct tr_file_info
{
[[nodiscard]] static std::string sanitizePath(std::string_view path);
[[nodiscard]] static bool isPortable(std::string_view path)
{
return sanitizePath(path) == path;
}
};

View File

@ -20,7 +20,6 @@
#include "crypto-utils.h"
#include "error.h"
#include "file-info.h"
#include "file.h"
#include "log.h"
#include "makemeta.h"
@ -171,7 +170,7 @@ tr_metainfo_builder* tr_metaInfoBuilderCreate(char const* topFileArg)
auto* const file = &ret->files[i++];
file->filename = tmp->filename;
file->size = tmp->size;
file->is_portable = tr_file_info::isPortable(file->filename + offset);
file->is_portable = tr_torrent_files::isSubpathPortable(file->filename + offset);
ret->totalSize += tmp->size;

View File

@ -310,3 +310,116 @@ void tr_torrent_files::remove(std::string_view parent_in, std::string_view tmpdi
depthFirstWalk(filename.c_str(), remove_junk);
}
}
namespace
{
// https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
// Do not use the following reserved names for the name of a file:
// CON, PRN, AUX, NUL, COM1, COM2, COM3, COM4, COM5, COM6, COM7, COM8,
// COM9, LPT1, LPT2, LPT3, LPT4, LPT5, LPT6, LPT7, LPT8, and LPT9.
// Also avoid these names followed immediately by an extension;
// for example, NUL.txt is not recommended.
[[nodiscard]] bool isReservedFile(std::string_view in) noexcept
{
if (std::empty(in))
{
return false;
}
// Shortcut to avoid extra work below.
// All the paths below involve filenames that begin with one of these chars
static auto constexpr ReservedFilesBeginWithOneOf = "ACLNP"sv;
if (ReservedFilesBeginWithOneOf.find(toupper(in.front())) == std::string_view::npos)
{
return false;
}
auto in_upper = tr_pathbuf{ in };
std::transform(std::begin(in_upper), std::end(in_upper), std::begin(in_upper), [](auto ch) { return toupper(ch); });
auto const in_upper_sv = in_upper.sv();
static auto constexpr ReservedNames = std::array<std::string_view, 22>{
"AUX"sv, "CON"sv, "NUL"sv, "PRN"sv, //
"COM1"sv, "COM2"sv, "COM3"sv, "COM4"sv, "COM5"sv, "COM6"sv, "COM7"sv, "COM8"sv, "COM9"sv, //
"LPT1"sv, "LPT2"sv, "LPT3"sv, "LPT4"sv, "LPT5"sv, "LPT6"sv, "LPT7"sv, "LPT8"sv, "LPT9"sv, //
};
if (std::find(std::begin(ReservedNames), std::end(ReservedNames), in_upper_sv) != std::end(ReservedNames))
{
return true;
}
static auto constexpr ReservedPrefixes = std::array<std::string_view, 22>{
"AUX."sv, "CON."sv, "NUL."sv, "PRN."sv, //
"COM1."sv, "COM2"sv, "COM3."sv, "COM4."sv, "COM5."sv, "COM6."sv, "COM7."sv, "COM8."sv, "COM9."sv, //
"LPT1."sv, "LPT2."sv, "LPT3."sv, "LPT4."sv, "LPT5."sv, "LPT6."sv, "LPT7."sv, "LPT8."sv, "LPT9."sv, //
};
return std::any_of(
std::begin(ReservedPrefixes),
std::end(ReservedPrefixes),
[in_upper_sv](auto const& prefix) { return tr_strvStartsWith(in_upper_sv, prefix); });
}
// https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file
// Use any character in the current code page for a name, including Unicode
// characters and characters in the extended character set (128255),
// except for the following:
[[nodiscard]] auto constexpr isReservedChar(char ch) noexcept
{
switch (ch)
{
case '"':
case '*':
case '/':
case ':':
case '<':
case '>':
case '?':
case '\\':
case '|':
return true;
default:
return false;
}
}
void appendSanitizedComponent(std::string_view in, tr_pathbuf& out)
{
// remove leading and trailing spaces
in = tr_strvStrip(in);
// remove trailing periods
while (tr_strvEndsWith(in, '.'))
{
in.remove_suffix(1);
}
if (isReservedFile(in))
{
out.append('_');
}
// replace reserved characters with an underscore
static auto constexpr add_char = [](auto ch)
{
return isReservedChar(ch) ? '_' : ch;
};
std::transform(std::begin(in), std::end(in), std::back_inserter(out), add_char);
}
} // namespace
void tr_torrent_files::makeSubpathPortable(std::string_view path, tr_pathbuf& append_me)
{
auto segment = std::string_view{};
while (tr_strvSep(&path, &segment, '/'))
{
appendSanitizedComponent(segment, append_me);
append_me.append('/');
}
if (auto const n = std::size(append_me); n > 0)
{
append_me.resize(n - 1); // remove trailing slash
}
}

View File

@ -143,6 +143,20 @@ public:
[[nodiscard]] std::optional<FoundFile> find(tr_file_index_t, std::string_view const* search_paths, size_t n_paths) const;
[[nodiscard]] bool hasAnyLocalData(std::string_view const* search_paths, size_t n_paths) const;
static void makeSubpathPortable(std::string_view path, tr_pathbuf& append_me);
[[nodiscard]] static auto makeSubpathPortable(std::string_view path)
{
auto tmp = tr_pathbuf{};
makeSubpathPortable(path, tmp);
return std::string{ tmp.sv() };
}
[[nodiscard]] static bool isSubpathPortable(std::string_view path)
{
return makeSubpathPortable(path) == path;
}
static constexpr std::string_view PartialFileSuffix = ".part";
private:

View File

@ -19,7 +19,6 @@
#include "crypto-utils.h"
#include "error-types.h"
#include "error.h"
#include "file-info.h"
#include "file.h"
#include "log.h"
#include "quark.h"
@ -205,7 +204,7 @@ bool tr_torrent_metainfo::parsePath(std::string_view root, tr_variant* path, std
}
}
auto const sanitized = tr_file_info::sanitizePath(setme);
auto const sanitized = tr_torrent_files::makeSubpathPortable(setme);
if (std::size(sanitized) <= std::size(root))
{
@ -222,7 +221,7 @@ std::string_view tr_torrent_metainfo::parseFiles(tr_torrent_metainfo& setme, tr_
setme.files_.clear();
auto const root_name = tr_file_info::sanitizePath(setme.name_);
auto const root_name = tr_torrent_files::makeSubpathPortable(setme.name_);
if (std::empty(root_name))
{

View File

@ -13,7 +13,6 @@ add_executable(libtransmission-test
error-test.cc
file-piece-map-test.cc
file-test.cc
files-test.cc
getopt-test.cc
history-test.cc
json-test.cc
@ -33,6 +32,7 @@ add_executable(libtransmission-test
subprocess-test-script.cmd
subprocess-test.cc
test-fixtures.h
torrent-files-test.cc
torrent-metainfo-test.cc
torrents-test.cc
utils-test.cc

View File

@ -3,6 +3,10 @@
// or any future license endorsed by Mnemosyne LLC.
// License text can be found in the licenses/ folder.
#include <array>
#include <string_view>
#include <utility>
#include "transmission.h"
#include "torrent-files.h"
@ -11,11 +15,11 @@
using namespace std::literals;
class FilesTest : public ::libtransmission::test::SandboxedTest
class TorrentFilesTest : public ::libtransmission::test::SandboxedTest
{
};
TEST_F(FilesTest, add)
TEST_F(TorrentFilesTest, add)
{
auto constexpr Path = "/hello/world"sv;
auto constexpr Size = size_t{ 1024 };
@ -32,7 +36,7 @@ TEST_F(FilesTest, add)
EXPECT_FALSE(std::empty(files));
}
TEST_F(FilesTest, setPath)
TEST_F(TorrentFilesTest, setPath)
{
auto constexpr Path1 = "/hello/world"sv;
auto constexpr Path2 = "/hello/there"sv;
@ -48,7 +52,7 @@ TEST_F(FilesTest, setPath)
EXPECT_EQ(Size, files.fileSize(file_index));
}
TEST_F(FilesTest, clear)
TEST_F(TorrentFilesTest, clear)
{
auto constexpr Path1 = "/hello/world"sv;
auto constexpr Path2 = "/hello/there"sv;
@ -65,7 +69,7 @@ TEST_F(FilesTest, clear)
EXPECT_EQ(size_t{ 0U }, files.fileCount());
}
TEST_F(FilesTest, find)
TEST_F(TorrentFilesTest, find)
{
static auto constexpr Contents = "hello"sv;
auto const filename = tr_pathbuf{ sandboxDir(), "/first_dir/hello.txt"sv };
@ -107,7 +111,7 @@ TEST_F(FilesTest, find)
EXPECT_FALSE(files.find(file_index, std::data(search_path), std::size(search_path)));
}
TEST_F(FilesTest, hasAnyLocalData)
TEST_F(TorrentFilesTest, hasAnyLocalData)
{
static auto constexpr Contents = "hello"sv;
auto const filename = tr_pathbuf{ sandboxDir(), "/first_dir/hello.txt"sv };
@ -125,3 +129,38 @@ TEST_F(FilesTest, hasAnyLocalData)
EXPECT_FALSE(files.hasAnyLocalData(std::data(search_path) + 1, 1U));
EXPECT_FALSE(files.hasAnyLocalData(std::data(search_path), 0U));
}
TEST_F(TorrentFilesTest, isSubpathPortable)
{
static auto constexpr Tests = std::array<std::pair<std::string_view, bool>, 15>{ {
// don't end with periods
{ "foo.", false },
{ "foo..", false },
// don't begin or end with whitespace
{ " foo ", false },
{ " foo", false },
{ "foo ", false },
// reserved names
{ "COM1", false },
{ "COM1.txt", false },
{ "Com1", false },
{ "com1", false },
// reserved characters
{ "hell:o.txt", false },
// everything else
{ ".foo", true },
{ "com99.txt", true },
{ "foo", true },
{ "hello.txt", true },
{ "hello#.txt", true },
} };
for (auto const& [subpath, expected] : Tests)
{
EXPECT_EQ(expected, tr_torrent_files::isSubpathPortable(subpath)) << " subpath " << subpath;
}
}