From 19db28c04d0f923937ecafd85f8127f202834e7b Mon Sep 17 00:00:00 2001 From: Charles Kerr Date: Mon, 23 May 2022 17:53:26 -0500 Subject: [PATCH] perf: faster detection of invalid filenames (#3126) * refactor: move file_info into tr_torrent_files * chore: rename files-test as torrent-files-test.cc * test: add tests for tr_torrent_files::isSubpathPortable() --- Transmission.xcodeproj/project.pbxproj | 8 -- libtransmission/CMakeLists.txt | 2 - libtransmission/file-info.cc | 83 ------------- libtransmission/file-info.h | 19 --- libtransmission/makemeta.cc | 3 +- libtransmission/torrent-files.cc | 113 ++++++++++++++++++ libtransmission/torrent-files.h | 14 +++ libtransmission/torrent-metainfo.cc | 5 +- tests/libtransmission/CMakeLists.txt | 2 +- .../{files-test.cc => torrent-files-test.cc} | 51 +++++++- 10 files changed, 176 insertions(+), 124 deletions(-) delete mode 100644 libtransmission/file-info.cc delete mode 100644 libtransmission/file-info.h rename tests/libtransmission/{files-test.cc => torrent-files-test.cc} (78%) diff --git a/Transmission.xcodeproj/project.pbxproj b/Transmission.xcodeproj/project.pbxproj index 2d962c901..00f8f1f23 100644 --- a/Transmission.xcodeproj/project.pbxproj +++ b/Transmission.xcodeproj/project.pbxproj @@ -271,8 +271,6 @@ A2FB701C0D95CAEA0001F331 /* GroupsController.mm in Sources */ = {isa = PBXBuildFile; fileRef = A2FB701B0D95CAEA0001F331 /* GroupsController.mm */; }; A47A7C87B8B57BE50DF0D410 /* torrent-files.cc in Sources */ = {isa = PBXBuildFile; fileRef = A47A7C87B8B57BE50DF0D411 /* torrent-files.cc */; }; A47A7C87B8B57BE50DF0D412 /* torrent-files.h in Headers */ = {isa = PBXBuildFile; fileRef = A47A7C87B8B57BE50DF0D413 /* torrent-files.h */; }; - ACBE7A956ED89682EC4460E0 /* file-info.cc in Sources */ = {isa = PBXBuildFile; fileRef = ACBE7A956ED89682EC4460E1 /* file-info.cc */; }; - ACBE7A956ED89682EC4460E2 /* file-info.h in Headers */ = {isa = PBXBuildFile; fileRef = ACBE7A956ED89682EC4460E3 /* file-info.h */; settings = {ATTRIBUTES = (Project, ); }; }; BE1183580CE160C50002D0F3 /* miniupnpc_declspec.h in Headers */ = {isa = PBXBuildFile; fileRef = BE11834E0CE160C50002D0F3 /* miniupnpc_declspec.h */; }; BE1183590CE160C50002D0F3 /* igd_desc_parse.h in Headers */ = {isa = PBXBuildFile; fileRef = BE11834F0CE160C50002D0F3 /* igd_desc_parse.h */; }; BE11835A0CE160C50002D0F3 /* minixml.h in Headers */ = {isa = PBXBuildFile; fileRef = BE1183500CE160C50002D0F3 /* minixml.h */; }; @@ -1045,8 +1043,6 @@ A47A7C87B8B57BE50DF0D411 /* torrent-files.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "torrent-files.cc"; sourceTree = ""; }; A47A7C87B8B57BE50DF0D413 /* torrent-files.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "torrent-files.h"; sourceTree = ""; }; A54D44C6A7AAF131D9AE29F5 /* block-info.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "block-info.cc"; sourceTree = ""; }; - ACBE7A956ED89682EC4460E1 /* file-info.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = "file-info.cc"; sourceTree = ""; }; - ACBE7A956ED89682EC4460E3 /* file-info.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "file-info.h"; sourceTree = ""; }; BE1183480CE160960002D0F3 /* libminiupnp.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libminiupnp.a; sourceTree = BUILT_PRODUCTS_DIR; }; BE11834E0CE160C50002D0F3 /* miniupnpc_declspec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = miniupnpc_declspec.h; sourceTree = ""; }; BE11834F0CE160C50002D0F3 /* igd_desc_parse.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = igd_desc_parse.h; sourceTree = ""; }; @@ -1603,8 +1599,6 @@ children = ( A54D44C6A7AAF131D9AE29F5 /* block-info.cc */, 6A044CBD8C049AFCBD4DB411 /* block-info.h */, - ACBE7A956ED89682EC4460E1 /* file-info.cc */, - ACBE7A956ED89682EC4460E3 /* file-info.h */, E23B55A5FC3B557F7746D511 /* interned-string.h */, C17740D3273A002C00E455D2 /* web-utils.cc */, C17740D4273A002C00E455D2 /* web-utils.h */, @@ -2234,7 +2228,6 @@ A2AF23C916B44FA0003BC59E /* log.h in Headers */, A23FAE55178BC2950053DC5B /* platform-quota.h in Headers */, F11545ACA7C4D7A464F703AB /* block-info.h in Headers */, - ACBE7A956ED89682EC4460E2 /* file-info.h in Headers */, E23B55A5FC3B557F7746D510 /* interned-string.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -2994,7 +2987,6 @@ A2AF23C816B44FA0003BC59E /* log.cc in Sources */, A23FAE54178BC2950053DC5B /* platform-quota.cc in Sources */, 62F644738FE3D8788EBF73A9 /* block-info.cc in Sources */, - ACBE7A956ED89682EC4460E0 /* file-info.cc in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/libtransmission/CMakeLists.txt b/libtransmission/CMakeLists.txt index 10ae32912..947cfffcc 100644 --- a/libtransmission/CMakeLists.txt +++ b/libtransmission/CMakeLists.txt @@ -25,7 +25,6 @@ set(PROJECT_FILES crypto-utils.cc crypto.cc error.cc - file-info.cc file-piece-map.cc file-posix.cc file-win32.cc @@ -168,7 +167,6 @@ set(${PROJECT_NAME}_PRIVATE_HEADERS completion.h crypto-utils.h crypto.h - file-info.h file-piece-map.h handshake.h history.h diff --git a/libtransmission/file-info.cc b/libtransmission/file-info.cc deleted file mode 100644 index 7750bb1ac..000000000 --- a/libtransmission/file-info.cc +++ /dev/null @@ -1,83 +0,0 @@ -// This file Copyright © 2019-2022 Mnemosyne LLC. -// It may be used under GPLv2 (SPDX: GPL-2.0-only), GPLv3 (SPDX: GPL-3.0-only), -// or any future license endorsed by Mnemosyne LLC. -// License text can be found in the licenses/ folder. - -#include -#include -#include -#include - -#include // evutil_ascii_strncasecmp - -#include "file-info.h" -#include "utils.h" - -using namespace std::literals; - -static void appendSanitizedComponent(std::string& out, std::string_view in) -{ - // remove leading spaces - auto constexpr leading_test = [](unsigned char ch) - { - return isspace(ch); - }; - auto const it = std::find_if_not(std::begin(in), std::end(in), leading_test); - in.remove_prefix(std::distance(std::begin(in), it)); - - // remove trailing spaces and '.' - auto constexpr trailing_test = [](unsigned char ch) - { - return (isspace(ch) != 0) || ch == '.'; - }; - auto const rit = std::find_if_not(std::rbegin(in), std::rend(in), trailing_test); - in.remove_suffix(std::distance(std::rbegin(in), rit)); - - // munge banned characters - // https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file - auto constexpr ensure_legal_char = [](auto ch) - { - auto constexpr Banned = std::string_view{ "<>:\"/\\|?*" }; - auto const banned = Banned.find(ch) != std::string_view::npos || (unsigned char)ch < 0x20; - return banned ? '_' : ch; - }; - auto const old_out_len = std::size(out); - std::transform(std::begin(in), std::end(in), std::back_inserter(out), ensure_legal_char); - - // munge banned filenames - // https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file - auto constexpr ReservedNames = std::array{ - "CON"sv, "PRN"sv, "AUX"sv, "NUL"sv, "COM1"sv, "COM2"sv, "COM3"sv, "COM4"sv, "COM5"sv, "COM6"sv, "COM7"sv, - "COM8"sv, "COM9"sv, "LPT1"sv, "LPT2"sv, "LPT3"sv, "LPT4"sv, "LPT5"sv, "LPT6"sv, "LPT7"sv, "LPT8"sv, "LPT9"sv, - }; - for (auto const& name : ReservedNames) - { - size_t const name_len = std::size(name); - if (evutil_ascii_strncasecmp(out.c_str() + old_out_len, std::data(name), name_len) != 0 || - (out[old_out_len + name_len] != '\0' && out[old_out_len + name_len] != '.')) - { - continue; - } - - out.insert(std::begin(out) + old_out_len + name_len, '_'); - break; - } -} - -std::string tr_file_info::sanitizePath(std::string_view in) -{ - auto out = std::string{}; - - auto segment = std::string_view{}; - while (tr_strvSep(&in, &segment, '/')) - { - appendSanitizedComponent(out, segment); - out += '/'; - } - if (!std::empty(out)) // remove trailing slash - { - out.resize(std::size(out) - 1); - } - - return out; -} diff --git a/libtransmission/file-info.h b/libtransmission/file-info.h deleted file mode 100644 index 3c1f804db..000000000 --- a/libtransmission/file-info.h +++ /dev/null @@ -1,19 +0,0 @@ -// This file Copyright © 2021-2022 Mnemosyne LLC. -// It may be used under GPLv2 (SPDX: GPL-2.0), GPLv3 (SPDX: GPL-3.0), -// or any future license endorsed by Mnemosyne LLC. -// License text can be found in the licenses/ folder. - -#pragma once - -#include -#include - -struct tr_file_info -{ - [[nodiscard]] static std::string sanitizePath(std::string_view path); - - [[nodiscard]] static bool isPortable(std::string_view path) - { - return sanitizePath(path) == path; - } -}; diff --git a/libtransmission/makemeta.cc b/libtransmission/makemeta.cc index 16492753e..aaa4ad26c 100644 --- a/libtransmission/makemeta.cc +++ b/libtransmission/makemeta.cc @@ -20,7 +20,6 @@ #include "crypto-utils.h" #include "error.h" -#include "file-info.h" #include "file.h" #include "log.h" #include "makemeta.h" @@ -171,7 +170,7 @@ tr_metainfo_builder* tr_metaInfoBuilderCreate(char const* topFileArg) auto* const file = &ret->files[i++]; file->filename = tmp->filename; file->size = tmp->size; - file->is_portable = tr_file_info::isPortable(file->filename + offset); + file->is_portable = tr_torrent_files::isSubpathPortable(file->filename + offset); ret->totalSize += tmp->size; diff --git a/libtransmission/torrent-files.cc b/libtransmission/torrent-files.cc index b1854f2ea..3c7202cc5 100644 --- a/libtransmission/torrent-files.cc +++ b/libtransmission/torrent-files.cc @@ -310,3 +310,116 @@ void tr_torrent_files::remove(std::string_view parent_in, std::string_view tmpdi depthFirstWalk(filename.c_str(), remove_junk); } } + +namespace +{ + +// https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file +// Do not use the following reserved names for the name of a file: +// CON, PRN, AUX, NUL, COM1, COM2, COM3, COM4, COM5, COM6, COM7, COM8, +// COM9, LPT1, LPT2, LPT3, LPT4, LPT5, LPT6, LPT7, LPT8, and LPT9. +// Also avoid these names followed immediately by an extension; +// for example, NUL.txt is not recommended. +[[nodiscard]] bool isReservedFile(std::string_view in) noexcept +{ + if (std::empty(in)) + { + return false; + } + + // Shortcut to avoid extra work below. + // All the paths below involve filenames that begin with one of these chars + static auto constexpr ReservedFilesBeginWithOneOf = "ACLNP"sv; + if (ReservedFilesBeginWithOneOf.find(toupper(in.front())) == std::string_view::npos) + { + return false; + } + + auto in_upper = tr_pathbuf{ in }; + std::transform(std::begin(in_upper), std::end(in_upper), std::begin(in_upper), [](auto ch) { return toupper(ch); }); + auto const in_upper_sv = in_upper.sv(); + + static auto constexpr ReservedNames = std::array{ + "AUX"sv, "CON"sv, "NUL"sv, "PRN"sv, // + "COM1"sv, "COM2"sv, "COM3"sv, "COM4"sv, "COM5"sv, "COM6"sv, "COM7"sv, "COM8"sv, "COM9"sv, // + "LPT1"sv, "LPT2"sv, "LPT3"sv, "LPT4"sv, "LPT5"sv, "LPT6"sv, "LPT7"sv, "LPT8"sv, "LPT9"sv, // + }; + if (std::find(std::begin(ReservedNames), std::end(ReservedNames), in_upper_sv) != std::end(ReservedNames)) + { + return true; + } + + static auto constexpr ReservedPrefixes = std::array{ + "AUX."sv, "CON."sv, "NUL."sv, "PRN."sv, // + "COM1."sv, "COM2"sv, "COM3."sv, "COM4."sv, "COM5."sv, "COM6."sv, "COM7."sv, "COM8."sv, "COM9."sv, // + "LPT1."sv, "LPT2."sv, "LPT3."sv, "LPT4."sv, "LPT5."sv, "LPT6."sv, "LPT7."sv, "LPT8."sv, "LPT9."sv, // + }; + return std::any_of( + std::begin(ReservedPrefixes), + std::end(ReservedPrefixes), + [in_upper_sv](auto const& prefix) { return tr_strvStartsWith(in_upper_sv, prefix); }); +} + +// https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file +// Use any character in the current code page for a name, including Unicode +// characters and characters in the extended character set (128–255), +// except for the following: +[[nodiscard]] auto constexpr isReservedChar(char ch) noexcept +{ + switch (ch) + { + case '"': + case '*': + case '/': + case ':': + case '<': + case '>': + case '?': + case '\\': + case '|': + return true; + default: + return false; + } +} + +void appendSanitizedComponent(std::string_view in, tr_pathbuf& out) +{ + // remove leading and trailing spaces + in = tr_strvStrip(in); + + // remove trailing periods + while (tr_strvEndsWith(in, '.')) + { + in.remove_suffix(1); + } + + if (isReservedFile(in)) + { + out.append('_'); + } + + // replace reserved characters with an underscore + static auto constexpr add_char = [](auto ch) + { + return isReservedChar(ch) ? '_' : ch; + }; + std::transform(std::begin(in), std::end(in), std::back_inserter(out), add_char); +} + +} // namespace + +void tr_torrent_files::makeSubpathPortable(std::string_view path, tr_pathbuf& append_me) +{ + auto segment = std::string_view{}; + while (tr_strvSep(&path, &segment, '/')) + { + appendSanitizedComponent(segment, append_me); + append_me.append('/'); + } + + if (auto const n = std::size(append_me); n > 0) + { + append_me.resize(n - 1); // remove trailing slash + } +} diff --git a/libtransmission/torrent-files.h b/libtransmission/torrent-files.h index d5dd33756..4133de3ad 100644 --- a/libtransmission/torrent-files.h +++ b/libtransmission/torrent-files.h @@ -143,6 +143,20 @@ public: [[nodiscard]] std::optional find(tr_file_index_t, std::string_view const* search_paths, size_t n_paths) const; [[nodiscard]] bool hasAnyLocalData(std::string_view const* search_paths, size_t n_paths) const; + static void makeSubpathPortable(std::string_view path, tr_pathbuf& append_me); + + [[nodiscard]] static auto makeSubpathPortable(std::string_view path) + { + auto tmp = tr_pathbuf{}; + makeSubpathPortable(path, tmp); + return std::string{ tmp.sv() }; + } + + [[nodiscard]] static bool isSubpathPortable(std::string_view path) + { + return makeSubpathPortable(path) == path; + } + static constexpr std::string_view PartialFileSuffix = ".part"; private: diff --git a/libtransmission/torrent-metainfo.cc b/libtransmission/torrent-metainfo.cc index 72542f9d3..40504038c 100644 --- a/libtransmission/torrent-metainfo.cc +++ b/libtransmission/torrent-metainfo.cc @@ -19,7 +19,6 @@ #include "crypto-utils.h" #include "error-types.h" #include "error.h" -#include "file-info.h" #include "file.h" #include "log.h" #include "quark.h" @@ -205,7 +204,7 @@ bool tr_torrent_metainfo::parsePath(std::string_view root, tr_variant* path, std } } - auto const sanitized = tr_file_info::sanitizePath(setme); + auto const sanitized = tr_torrent_files::makeSubpathPortable(setme); if (std::size(sanitized) <= std::size(root)) { @@ -222,7 +221,7 @@ std::string_view tr_torrent_metainfo::parseFiles(tr_torrent_metainfo& setme, tr_ setme.files_.clear(); - auto const root_name = tr_file_info::sanitizePath(setme.name_); + auto const root_name = tr_torrent_files::makeSubpathPortable(setme.name_); if (std::empty(root_name)) { diff --git a/tests/libtransmission/CMakeLists.txt b/tests/libtransmission/CMakeLists.txt index 07f7a4ea5..0440a0aba 100644 --- a/tests/libtransmission/CMakeLists.txt +++ b/tests/libtransmission/CMakeLists.txt @@ -13,7 +13,6 @@ add_executable(libtransmission-test error-test.cc file-piece-map-test.cc file-test.cc - files-test.cc getopt-test.cc history-test.cc json-test.cc @@ -33,6 +32,7 @@ add_executable(libtransmission-test subprocess-test-script.cmd subprocess-test.cc test-fixtures.h + torrent-files-test.cc torrent-metainfo-test.cc torrents-test.cc utils-test.cc diff --git a/tests/libtransmission/files-test.cc b/tests/libtransmission/torrent-files-test.cc similarity index 78% rename from tests/libtransmission/files-test.cc rename to tests/libtransmission/torrent-files-test.cc index 85e5e7951..d8f195c32 100644 --- a/tests/libtransmission/files-test.cc +++ b/tests/libtransmission/torrent-files-test.cc @@ -3,6 +3,10 @@ // or any future license endorsed by Mnemosyne LLC. // License text can be found in the licenses/ folder. +#include +#include +#include + #include "transmission.h" #include "torrent-files.h" @@ -11,11 +15,11 @@ using namespace std::literals; -class FilesTest : public ::libtransmission::test::SandboxedTest +class TorrentFilesTest : public ::libtransmission::test::SandboxedTest { }; -TEST_F(FilesTest, add) +TEST_F(TorrentFilesTest, add) { auto constexpr Path = "/hello/world"sv; auto constexpr Size = size_t{ 1024 }; @@ -32,7 +36,7 @@ TEST_F(FilesTest, add) EXPECT_FALSE(std::empty(files)); } -TEST_F(FilesTest, setPath) +TEST_F(TorrentFilesTest, setPath) { auto constexpr Path1 = "/hello/world"sv; auto constexpr Path2 = "/hello/there"sv; @@ -48,7 +52,7 @@ TEST_F(FilesTest, setPath) EXPECT_EQ(Size, files.fileSize(file_index)); } -TEST_F(FilesTest, clear) +TEST_F(TorrentFilesTest, clear) { auto constexpr Path1 = "/hello/world"sv; auto constexpr Path2 = "/hello/there"sv; @@ -65,7 +69,7 @@ TEST_F(FilesTest, clear) EXPECT_EQ(size_t{ 0U }, files.fileCount()); } -TEST_F(FilesTest, find) +TEST_F(TorrentFilesTest, find) { static auto constexpr Contents = "hello"sv; auto const filename = tr_pathbuf{ sandboxDir(), "/first_dir/hello.txt"sv }; @@ -107,7 +111,7 @@ TEST_F(FilesTest, find) EXPECT_FALSE(files.find(file_index, std::data(search_path), std::size(search_path))); } -TEST_F(FilesTest, hasAnyLocalData) +TEST_F(TorrentFilesTest, hasAnyLocalData) { static auto constexpr Contents = "hello"sv; auto const filename = tr_pathbuf{ sandboxDir(), "/first_dir/hello.txt"sv }; @@ -125,3 +129,38 @@ TEST_F(FilesTest, hasAnyLocalData) EXPECT_FALSE(files.hasAnyLocalData(std::data(search_path) + 1, 1U)); EXPECT_FALSE(files.hasAnyLocalData(std::data(search_path), 0U)); } + +TEST_F(TorrentFilesTest, isSubpathPortable) +{ + static auto constexpr Tests = std::array, 15>{ { + // don't end with periods + { "foo.", false }, + { "foo..", false }, + + // don't begin or end with whitespace + { " foo ", false }, + { " foo", false }, + { "foo ", false }, + + // reserved names + { "COM1", false }, + { "COM1.txt", false }, + { "Com1", false }, + { "com1", false }, + + // reserved characters + { "hell:o.txt", false }, + + // everything else + { ".foo", true }, + { "com99.txt", true }, + { "foo", true }, + { "hello.txt", true }, + { "hello#.txt", true }, + } }; + + for (auto const& [subpath, expected] : Tests) + { + EXPECT_EQ(expected, tr_torrent_files::isSubpathPortable(subpath)) << " subpath " << subpath; + } +}