refactor: simplify blocklist code (#4086)

This commit is contained in:
Charles Kerr 2022-11-03 15:46:27 -05:00 committed by GitHub
parent 8857e8c1a4
commit ee8a6bd130
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 480 additions and 646 deletions

View File

@ -5,12 +5,8 @@
#include <algorithm>
#include <array>
#include <cstdio>
#include <cstdlib> // bsearch()
#include <fstream>
#include <memory>
#include <string_view>
#include <unordered_set>
#include <vector>
#include <fmt/core.h>
@ -22,191 +18,421 @@
#include "file.h"
#include "log.h"
#include "net.h"
#include "tr-assert.h"
#include "tr-strbuf.h"
#include "utils.h"
#include "utils.h" // for _(), tr_strerror(), tr_strvEndsWith()
using namespace std::literals;
/***
**** PRIVATE
***/
void BlocklistFile::close()
namespace libtransmission
{
rules_.clear();
namespace
{
// A string at the beginning of .bin files to test & make sure we don't load incompatible files
auto constexpr BinContentsPrefix = std::string_view{ "-tr-blocklist-file-format-v3-" };
// In the blocklists directory, the The plaintext source file can be anything, e.g. "level1".
// The pre-parsed, fast-to-load binary file will have a ".bin" suffix e.g. "level1.bin".
auto constexpr BinFileSuffix = std::string_view{ ".bin" };
using address_range_t = std::pair<tr_address, tr_address>;
void save(std::string_view filename, address_range_t const* ranges, size_t n_ranges)
{
auto out = std::ofstream{ tr_pathbuf{ filename }, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary };
if (!out.is_open())
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", filename),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
return;
}
if (!out.write(std::data(BinContentsPrefix), std::size(BinContentsPrefix)) ||
!out.write(reinterpret_cast<char const*>(ranges), n_ranges * sizeof(*ranges)))
{
tr_logAddWarn(fmt::format(
_("Couldn't save '{path}': {error} ({error_code})"),
fmt::arg("path", filename),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
}
else
{
tr_logAddInfo(fmt::format(
ngettext("Blocklist '{path}' has {count} entry", "Blocklist '{path}' has {count} entries", n_ranges),
fmt::arg("path", tr_sys_path_basename(filename)),
fmt::arg("count", n_ranges)));
}
out.close();
}
void BlocklistFile::ensureLoaded() const
namespace ParseHelpers
{
// P2P plaintext format: "comment:x.x.x.x-y.y.y.y" / "comment:x:x:x:x:x:x:x:x-x:x:x:x:x:x:x:x"
// https://web.archive.org/web/20100328075307/http://wiki.phoenixlabs.org/wiki/P2P_Format
// https://en.wikipedia.org/wiki/PeerGuardian#P2P_plaintext_format
std::optional<address_range_t> parsePeerGuardianLine(std::string_view line)
{
// remove leading "comment:"
auto pos = line.find(':');
if (pos == std::string_view::npos)
{
return {};
}
line = line.substr(pos + 1);
// parse the leading 'x.x.x.x'
pos = line.find('-');
if (pos == std::string_view::npos)
{
return {};
}
auto addrpair = address_range_t{};
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr)
{
addrpair.first = *addr;
}
else
{
return {};
}
line = line.substr(pos + 1);
// parse the trailing 'y.y.y.y'
if (auto const addr = tr_address::fromString(line); addr)
{
addrpair.second = *addr;
}
else
{
return {};
}
return addrpair;
}
// DAT / eMule format: "000.000.000.000 - 000.255.255.255 , 000 , invalid ip"
// https://sourceforge.net/p/peerguardian/wiki/dev-blocklist-format-dat/
std::optional<address_range_t> parseEmuleLine(std::string_view line)
{
static auto constexpr Delim1 = std::string_view{ " - " };
static auto constexpr Delim2 = std::string_view{ " , " };
auto pos = line.find(Delim1);
if (pos == std::string_view::npos)
{
return {};
}
auto addrpair = address_range_t{};
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr)
{
addrpair.first = *addr;
}
else
{
return {};
}
line = line.substr(pos + std::size(Delim1));
pos = line.find(Delim2);
if (pos == std::string_view::npos)
{
return {};
}
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr)
{
addrpair.second = *addr;
}
else
{
return {};
}
return addrpair;
}
// CIDR notation: "0.0.0.0/8", "::/64"
// https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation
// Example: `10.5.6.7/8` will block the range [10.0.0.0 .. 10.255.255.255]
std::optional<address_range_t> parseCidrLine(std::string_view line)
{
auto addrpair = address_range_t{};
auto pos = line.find('/');
if (pos == std::string_view::npos)
{
return {};
}
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr && addr->isIPv4())
{
addrpair.first = *addr;
}
else
{
return {};
}
auto const pflen = tr_parseNum<size_t>(line.substr(pos + 1));
if (!pflen)
{
return {};
}
auto const mask = uint32_t{ 0xFFFFFFFF } << (32 - *pflen);
auto const ip_u = htonl(addrpair.first.addr.addr4.s_addr);
addrpair.first.addr.addr4.s_addr = ntohl(ip_u & mask);
addrpair.second.addr.addr4.s_addr = ntohl(ip_u | (~mask));
return addrpair;
}
std::optional<address_range_t> parseLine(std::string_view line)
{
for (auto const& line_parser : { parsePeerGuardianLine, parseEmuleLine, parseCidrLine })
{
if (auto range = line_parser(line); range)
{
return range;
}
}
return {};
}
} // namespace ParseHelpers
auto parseFile(std::string_view filename)
{
using namespace ParseHelpers;
auto ranges = std::vector<address_range_t>{};
auto in = std::ifstream{ tr_pathbuf{ filename } };
if (!in.is_open())
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", filename),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
return ranges;
}
auto line = std::string{};
auto line_number = size_t{ 0U };
while (std::getline(in, line))
{
++line_number;
if (auto range = parseLine(line); range && (range->first.type == range->second.type))
{
ranges.push_back(*range);
}
else
{
// don't try to display the actual lines - it causes issues
tr_logAddWarn(fmt::format(_("Couldn't parse line: '{line}'"), fmt::arg("line", line_number)));
}
}
in.close();
if (std::empty(ranges))
{
return ranges;
}
// safeguard against some joker swapping the begin & end ranges
for (auto& range : ranges)
{
if (range.first > range.second)
{
std::swap(range.first, range.second);
}
}
// sort ranges by start address
std::sort(std::begin(ranges), std::end(ranges), [](auto const& a, auto const& b) { return a.first < b.first; });
// merge overlapping ranges
auto keep = size_t{ 0U };
for (auto const& range : ranges)
{
if (ranges[keep].second < range.first)
{
ranges[++keep] = range;
}
else if (ranges[keep].second < range.second)
{
ranges[keep].second = range.second;
}
}
TR_ASSERT_MSG(keep + 1 <= std::size(ranges), "Can shrink `ranges` or leave intact, but not grow");
ranges.resize(keep + 1);
#ifdef TR_ENABLE_ASSERTS
for (auto const& range : ranges)
{
TR_ASSERT(range.first <= range.second);
}
for (size_t i = 1, n = std::size(ranges); i < n; ++i)
{
TR_ASSERT(ranges[i - 1].second < ranges[i].first);
}
#endif
return ranges;
}
auto getFilenamesInDir(std::string_view folder)
{
auto files = std::vector<std::string>{};
if (auto const odir = tr_sys_dir_open(tr_pathbuf{ folder }); odir != TR_BAD_SYS_DIR)
{
char const* name = nullptr;
auto const prefix = std::string{ folder } + '/';
while ((name = tr_sys_dir_read_name(odir)) != nullptr)
{
if (name[0] == '.') // ignore dotfiles
{
continue;
}
files.emplace_back(prefix + name);
}
tr_sys_dir_close(odir);
}
return files;
}
} // namespace
void Blocklist::ensureLoaded() const
{
if (!std::empty(rules_))
{
return;
}
auto in = std::ifstream{ filename_, std::ios_base::in | std::ios_base::binary };
// get the file's size
tr_error* error = nullptr;
auto const file_info = tr_sys_path_get_info(bin_file_, 0, &error);
if (error != nullptr)
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", bin_file_),
fmt::arg("error", error->message),
fmt::arg("error_code", error->code)));
tr_error_clear(&error);
return;
}
// open the file
auto in = std::ifstream{ bin_file_, std::ios_base::in | std::ios_base::binary };
if (!in)
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", filename_),
fmt::arg("path", bin_file_),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
return;
}
auto file_info = tr_sys_path_get_info(filename_);
auto zeroes_count = 0;
auto max_zeroes = 0;
static auto constexpr RangeSize = sizeof(AddressRange);
if (file_info->size >= RangeSize)
// check to see if the file is usable
bool supported_file = true;
if (file_info->size < std::size(BinContentsPrefix)) // too small
{
std::array<char, 40> first_struct = {};
in.read(reinterpret_cast<char*>(&first_struct), std::size(first_struct));
in.clear();
in.seekg(0, std::ios::beg);
for (auto const struct_byte : first_struct)
{
if (struct_byte != 0)
{
zeroes_count = 0;
}
else
{
++zeroes_count;
if (zeroes_count > max_zeroes)
{
max_zeroes = zeroes_count;
}
}
}
supported_file = false;
}
// Check for old blocklist file format
// Old struct size was 8 bytes (2 IPv4), new struct size is 40 bytes (2 IPv4, 2 IPv6)
//
// If we encounter less than 4 continuous bytes containing 0 we are using old file format
// (as the new format guarantees at least 2 empty IPv4 OR 2 empty IPv6)
// If we confirm using old style convert to new style and rewrite blocklist file
if ((file_info->size >= 40 && max_zeroes < 4) || (file_info->size % 8 == 0 && file_info->size % 40 != 0))
else if (((file_info->size - std::size(BinContentsPrefix)) % sizeof(address_range_t)) != 0) // wrong size
{
auto range = AddressRange{};
while (in.read(reinterpret_cast<char*>(&range), 8))
{
rules_.emplace_back(range);
}
tr_logAddInfo(_("Rewriting old blocklist file format to new format"));
RewriteBlocklistFile();
supported_file = false;
}
else
{
auto range = AddressRange{};
while (in.read(reinterpret_cast<char*>(&range), sizeof(range)))
auto tmp = std::array<char, std::size(BinContentsPrefix)>{};
in.read(std::data(tmp), std::size(tmp));
supported_file = BinContentsPrefix == std::string_view{ std::data(tmp), std::size(tmp) };
}
if (!supported_file)
{
// bad binary file; try to rebuild it
in.close();
auto src_file = std::string_view{ bin_file_ };
src_file.remove_suffix(std::size(BinFileSuffix));
rules_ = parseFile(src_file);
if (!std::empty(rules_))
{
rules_.emplace_back(range);
tr_logAddInfo(_("Rewriting old blocklist file format to new format"));
tr_sys_path_remove(bin_file_);
save(bin_file_, std::data(rules_), std::size(rules_));
}
return;
}
auto range = address_range_t{};
rules_.reserve((file_info->size - std::size(BinContentsPrefix) / sizeof(address_range_t)));
while (in.read(reinterpret_cast<char*>(&range), sizeof(range)))
{
rules_.emplace_back(range);
}
tr_logAddInfo(fmt::format(
ngettext("Blocklist '{path}' has {count} entry", "Blocklist '{path}' has {count} entries", std::size(rules_)),
fmt::arg("path", tr_sys_path_basename(filename_)),
fmt::arg("path", tr_sys_path_basename(bin_file_)),
fmt::arg("count", std::size(rules_))));
}
/***
**** PACKAGE-VISIBLE
***/
std::vector<std::unique_ptr<BlocklistFile>> BlocklistFile::loadBlocklists(
std::string_view const config_dir,
bool const is_enabled)
std::vector<Blocklist> Blocklist::loadBlocklists(std::string_view const blocklist_dir, bool const is_enabled)
{
auto loadme = std::unordered_set<std::string>{};
auto working_set = std::vector<std::unique_ptr<BlocklistFile>>{};
/* walk the blocklist directory... */
auto const dirname = tr_pathbuf{ config_dir, "/blocklists"sv };
auto const odir = tr_sys_dir_open(dirname);
if (odir == TR_BAD_SYS_DIR)
// check for files that need to be updated
for (auto const& src_file : getFilenamesInDir(blocklist_dir))
{
return working_set;
}
char const* name = nullptr;
while ((name = tr_sys_dir_read_name(odir)) != nullptr)
{
auto load = std::string{};
if (name[0] == '.') /* ignore dotfiles */
if (tr_strvEndsWith(src_file, BinFileSuffix))
{
continue;
}
if (auto const path = tr_pathbuf{ dirname, '/', name }; tr_strvEndsWith(path, ".bin"sv))
// ensure this src_file has an up-to-date corresponding bin_file
auto const src_info = tr_sys_path_get_info(src_file);
auto const bin_file = tr_pathbuf{ src_file, BinFileSuffix };
auto const bin_info = tr_sys_path_get_info(bin_file);
auto const bin_needs_update = src_info && (!bin_info || bin_info->last_modified_at <= src_info->last_modified_at);
if (bin_needs_update)
{
load = path;
}
else
{
auto const binname = tr_pathbuf{ dirname, '/', name, ".bin"sv };
if (auto const bininfo = tr_sys_path_get_info(binname); !bininfo)
if (auto const ranges = parseFile(src_file); !std::empty(ranges))
{
// create it
auto b = BlocklistFile{ binname, is_enabled };
if (auto const n = b.setContent(path); n > 0)
{
load = binname;
}
save(bin_file, std::data(ranges), std::size(ranges));
}
else if (auto const pathinfo = tr_sys_path_get_info(path);
pathinfo && pathinfo->last_modified_at >= bininfo->last_modified_at)
{
// update it
auto const old = tr_pathbuf{ binname, ".old"sv };
tr_sys_path_remove(old);
tr_sys_path_rename(binname, old);
BlocklistFile b(binname, is_enabled);
if (b.setContent(path) > 0)
{
tr_sys_path_remove(old);
}
else
{
tr_sys_path_remove(binname);
tr_sys_path_rename(old, binname);
}
}
}
if (!std::empty(load))
{
loadme.emplace(load);
}
}
std::transform(
std::begin(loadme),
std::end(loadme),
std::back_inserter(working_set),
[&is_enabled](auto const& path) { return std::make_unique<BlocklistFile>(path.c_str(), is_enabled); });
/* cleanup */
tr_sys_dir_close(odir);
return working_set;
auto ret = std::vector<Blocklist>{};
for (auto const& bin_file : getFilenamesInDir(blocklist_dir))
{
if (tr_strvEndsWith(bin_file, BinFileSuffix))
{
ret.emplace_back(bin_file, is_enabled);
}
}
return ret;
}
bool BlocklistFile::hasAddress(tr_address const& addr)
bool Blocklist::contains(tr_address const& addr) const
{
TR_ASSERT(tr_address_is_valid(&addr));
@ -217,387 +443,73 @@ bool BlocklistFile::hasAddress(tr_address const& addr)
ensureLoaded();
if (std::empty(rules_))
struct Compare
{
return false;
}
if (addr.isIPv4())
{
auto const needle = ntohl(addr.addr.addr4.s_addr);
// std::binary_search works differently and requires a less-than comparison
// and two arguments of the same type. std::bsearch is the right choice.
auto const* range = static_cast<AddressRange const*>(std::bsearch(
&needle,
std::data(rules_),
std::size(rules_),
sizeof(AddressRange),
AddressRange::compareIPv4AddressToRange));
return range != nullptr;
}
if (addr.isIPv6())
{
auto const needle = addr.addr.addr6;
auto const* range = static_cast<AddressRange const*>(std::bsearch(
&needle,
std::data(rules_),
std::size(rules_),
sizeof(AddressRange),
AddressRange::compareIPv6AddressToRange));
return range != nullptr;
}
return false;
}
/*
* P2P plaintext format: "comment:x.x.x.x-y.y.y.y" / "comment:x:x:x:x:x:x:x:x-x:x:x:x:x:x:x:x"
* https://web.archive.org/web/20100328075307/http://wiki.phoenixlabs.org/wiki/P2P_Format
* https://en.wikipedia.org/wiki/PeerGuardian#P2P_plaintext_format
*/
bool BlocklistFile::parseLine1(std::string_view line, struct AddressRange* range)
{
// remove leading "comment:"
auto pos = line.find(':');
if (pos == std::string_view::npos)
{
return false;
}
line = line.substr(pos + 1);
// parse the leading 'x.x.x.x'
pos = line.find('-');
if (pos == std::string_view::npos)
{
return false;
}
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr)
{
if (addr->isIPv4())
[[nodiscard]] static auto compare(tr_address const& a, address_range_t const& b) noexcept // <=>
{
range->begin_ = ntohl(addr->addr.addr4.s_addr);
if (a < b.first)
{
return -1;
}
if (b.second < a)
{
return 1;
}
return 0;
}
else
[[nodiscard]] static auto compare(address_range_t const& a, tr_address const& b) noexcept // <=>
{
range->begin6_ = addr->addr.addr6;
return -compare(b, a);
}
}
else
{
return false;
}
line = line.substr(pos + 1);
// parse the trailing 'y.y.y.y'
if (auto const addr = tr_address::fromString(line); addr)
{
if (addr->isIPv4())
[[nodiscard]] auto operator()(address_range_t const& a, tr_address const& b) const noexcept // <
{
range->end_ = ntohl(addr->addr.addr4.s_addr);
return compare(a, b) < 0;
}
else
[[nodiscard]] auto operator()(tr_address const& a, address_range_t const& b) const noexcept // <
{
range->end6_ = addr->addr.addr6;
return compare(a, b) < 0;
}
}
else
{
return false;
}
};
return true;
return std::binary_search(std::begin(rules_), std::end(rules_), addr, Compare{});
}
/*
* DAT / eMule format: "000.000.000.000 - 000.255.255.255 , 000 , invalid ip"a
* https://sourceforge.net/p/peerguardian/wiki/dev-blocklist-format-dat/
*/
bool BlocklistFile::parseLine2(std::string_view line, struct AddressRange* range)
std::optional<Blocklist> Blocklist::saveNew(std::string_view external_file, std::string_view bin_file, bool is_enabled)
{
static auto constexpr Delim1 = std::string_view{ " - " };
static auto constexpr Delim2 = std::string_view{ " , " };
auto pos = line.find(Delim1);
if (pos == std::string_view::npos)
{
return false;
}
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr)
{
range->begin_ = ntohl(addr->addr.addr4.s_addr);
}
else
{
return false;
}
line = line.substr(pos + std::size(Delim1));
pos = line.find(Delim2);
if (pos == std::string_view::npos)
{
return false;
}
if (auto const addr = tr_address::fromString(line.substr(0, pos)); addr)
{
range->end_ = ntohl(addr->addr.addr4.s_addr);
}
else
{
return false;
}
return true;
}
/*
* CIDR notation: "0.0.0.0/8", "::/64"
* https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation
*/
bool BlocklistFile::parseLine3(char const* line, AddressRange* range)
{
auto ip = std::array<unsigned int, 4>{};
unsigned int pflen = 0;
uint32_t ip_u = 0;
uint32_t mask = 0xffffffff;
// NOLINTNEXTLINE readability-container-data-pointer
if (sscanf(line, "%u.%u.%u.%u/%u", TR_ARG_TUPLE(&ip[0], &ip[1], &ip[2], &ip[3]), &pflen) != 5)
{
return false;
}
if (pflen > 32 || ip[0] > 0xff || ip[1] > 0xff || ip[2] > 0xff || ip[3] > 0xff)
{
return false;
}
/* this is host order */
mask <<= 32 - pflen;
ip_u = ip[0] << 24 | ip[1] << 16 | ip[2] << 8 | ip[3];
/* fill the non-prefix bits the way we need it */
range->begin_ = ip_u & mask;
range->end_ = ip_u | (~mask);
return true;
}
bool BlocklistFile::parseLine(char const* line, AddressRange* range)
{
return parseLine1(line, range) || parseLine2(line, range) || parseLine3(line, range);
}
bool BlocklistFile::compareAddressRangesByFirstAddress(AddressRange const& a, AddressRange const& b)
{
if (a.begin_ == 0 && a.end_ == 0)
{
// IPv6
return (memcmp(a.begin6_.s6_addr, b.begin6_.s6_addr, sizeof(a.begin6_.s6_addr)) < 0);
}
return a.begin_ < b.begin_;
}
size_t BlocklistFile::setContent(char const* filename)
{
if (filename == nullptr)
// if we can't parse the file, do nothing
auto rules = parseFile(external_file);
if (std::empty(rules))
{
return {};
}
auto in = std::ifstream{ filename };
if (!in.is_open())
// make a copy of `external_file` for our own safekeeping
auto const src_file = std::string{ std::data(bin_file), std::size(bin_file) - std::size(BinFileSuffix) };
tr_sys_path_remove(src_file.c_str());
tr_error* error = nullptr;
if (!tr_sys_path_copy(tr_pathbuf{ external_file }, src_file.c_str(), &error))
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", filename),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
if (error != nullptr)
{
tr_logAddWarn(fmt::format(
_("Couldn't save '{path}': {error} ({error_code})"),
fmt::arg("path", src_file),
fmt::arg("error", error->message),
fmt::arg("error_code", error->code)));
tr_error_clear(&error);
}
return {};
}
auto line = std::string{};
auto line_number = size_t{ 0U };
auto ranges = std::vector<AddressRange>{};
while (std::getline(in, line))
{
++line_number;
auto range = AddressRange{};
if (!parseLine(line.c_str(), &range))
{
/* don't try to display the actual lines - it causes issues */
tr_logAddWarn(fmt::format(_("Couldn't parse line: '{line}'"), fmt::arg("line", line_number)));
continue;
}
ranges.push_back(range);
}
in.close();
save(bin_file, std::data(rules), std::size(rules));
if (std::empty(ranges))
{
return {};
}
//separate before sorting
auto ipv4_ranges = std::vector<AddressRange>{};
auto ipv6_ranges = std::vector<AddressRange>{};
for (auto const& range : ranges)
{
if (range.begin_ == 0 && range.end_ == 0)
{
// IPv6
ipv6_ranges.emplace_back(range);
}
else
{
ipv4_ranges.emplace_back(range);
}
}
std::sort(std::begin(ipv4_ranges), std::end(ipv4_ranges), BlocklistFile::compareAddressRangesByFirstAddress);
std::sort(std::begin(ipv6_ranges), std::end(ipv6_ranges), BlocklistFile::compareAddressRangesByFirstAddress);
// combine sorted
ranges.clear();
ranges.insert(ranges.end(), ipv4_ranges.begin(), ipv4_ranges.end());
ranges.insert(ranges.end(), ipv6_ranges.begin(), ipv6_ranges.end());
size_t keep = 0; // index in ranges
// merge
for (auto const& range : ranges)
{
if (range.begin_ == 0 && range.end_ == 0)
{
// IPv6
if (memcmp(ranges[keep].end6_.s6_addr, range.begin6_.s6_addr, sizeof(range.begin6_.s6_addr)) < 0)
{
ranges[++keep] = range;
}
else if (memcmp(ranges[keep].end6_.s6_addr, range.end6_.s6_addr, sizeof(range.begin6_.s6_addr)) < 0)
{
ranges[keep].end6_ = range.end6_;
}
}
else
{
if (ranges[keep].end_ < range.begin_)
{
ranges[++keep] = range;
}
else if (ranges[keep].end_ < range.end_)
{
ranges[keep].end_ = range.end_;
}
}
}
TR_ASSERT_MSG(keep + 1 <= std::size(ranges), "Can shrink `ranges` or leave intact, but not grow");
ranges.resize(keep + 1);
#ifdef TR_ENABLE_ASSERTS
assertValidRules(ranges);
#endif
auto out = std::ofstream{ filename_, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary };
if (!out.is_open())
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", filename_),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
return {};
}
if (!out.write(reinterpret_cast<char const*>(ranges.data()), std::size(ranges) * sizeof(AddressRange)))
{
tr_logAddWarn(fmt::format(
_("Couldn't save '{path}': {error} ({error_code})"),
fmt::arg("path", filename_),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
}
else
{
tr_logAddInfo(fmt::format(
ngettext("Blocklist '{path}' has {count} entry", "Blocklist '{path}' has {count} entries", std::size(rules_)),
fmt::arg("path", tr_sys_path_basename(filename_)),
fmt::arg("count", std::size(rules_))));
}
out.close();
close();
ensureLoaded();
return std::size(rules_);
// return a new Blocklist with these rules
auto ret = Blocklist{ bin_file, is_enabled };
ret.rules_ = std::move(rules);
return ret;
}
void BlocklistFile::RewriteBlocklistFile() const
{
auto out = std::ofstream{ filename_, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary };
if (!out.is_open())
{
tr_logAddWarn(fmt::format(
_("Couldn't read '{path}': {error} ({error_code})"),
fmt::arg("path", filename_),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
return;
}
if (!out.write(reinterpret_cast<char const*>(rules_.data()), std::size(rules_) * sizeof(AddressRange)))
{
tr_logAddWarn(fmt::format(
_("Couldn't save '{path}': {error} ({error_code})"),
fmt::arg("path", filename_),
fmt::arg("error", tr_strerror(errno)),
fmt::arg("error_code", errno)));
}
out.close();
ensureLoaded();
}
#ifdef TR_ENABLE_ASSERTS
void BlocklistFile::assertValidRules(std::vector<AddressRange> const& ranges)
{
for (auto const& r : ranges)
{
if (r.begin_ == 0 && r.end_ == 0)
{
TR_ASSERT(memcmp(r.begin6_.s6_addr, r.end6_.s6_addr, sizeof(r.begin6_.s6_addr)) <= 0);
}
else
{
TR_ASSERT(r.begin_ <= r.end_);
}
}
auto ranges_ipv4 = std::vector<AddressRange>{};
auto ranges_ipv6 = std::vector<AddressRange>{};
for (size_t i = 0; i < std::size(ranges); i++)
{
if (ranges[i].begin_ == 0 && ranges[i].end_ == 0)
{
ranges_ipv6.emplace_back(ranges[i]);
}
else
{
ranges_ipv4.emplace_back(ranges[i]);
}
}
TR_ASSERT(is_sorted(std::begin(ranges_ipv4), std::end(ranges_ipv4), BlocklistFile::compareAddressRangesByFirstAddress));
TR_ASSERT(is_sorted(std::begin(ranges_ipv6), std::end(ranges_ipv6), BlocklistFile::compareAddressRangesByFirstAddress));
}
#endif
} // namespace libtransmission

View File

@ -9,146 +9,65 @@
#error only libtransmission should #include this header.
#endif
#include <cstddef> // for size_t
#include <cstdint>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <utility> // for std::pair
#include <vector>
#ifdef _WIN32
#include <ws2tcpip.h>
#else
#include <netinet/in.h>
#endif
#include "file.h" // for tr_sys_file_t
#include "tr-assert.h"
#include "tr-macros.h"
#include "net.h" // for tr_address
struct tr_address;
struct BlocklistFile
namespace libtransmission
{
class Blocklist
{
public:
// Prevent moving to protect the fd_ from accidental destruction
BlocklistFile(BlocklistFile&&) = delete;
BlocklistFile(BlocklistFile const&) = delete;
BlocklistFile& operator=(BlocklistFile const&) = delete;
BlocklistFile& operator=(BlocklistFile&&) = delete;
[[nodiscard]] static std::vector<Blocklist> loadBlocklists(std::string_view const blocklist_dir, bool const is_enabled);
BlocklistFile(char const* filename, bool isEnabled)
: filename_(filename)
, is_enabled_(isEnabled)
static std::optional<Blocklist> saveNew(std::string_view external_file, std::string_view bin_file, bool is_enabled);
Blocklist() = default;
Blocklist(std::string_view bin_file, bool is_enabled)
: bin_file_{ bin_file }
, is_enabled_{ is_enabled }
{
}
~BlocklistFile()
{
close();
}
[[nodiscard]] bool contains(tr_address const& addr) const;
[[nodiscard]] constexpr auto& filename() const
{
return filename_;
}
[[nodiscard]] bool exists() const
{
return tr_sys_path_exists(filename_.c_str(), nullptr);
}
[[nodiscard]] size_t getRuleCount() const
[[nodiscard]] auto size() const
{
ensureLoaded();
return std::size(rules_);
}
[[nodiscard]] constexpr bool isEnabled() const
[[nodiscard]] constexpr bool enabled() const noexcept
{
return is_enabled_;
}
void setEnabled(bool isEnabled)
void setEnabled(bool is_enabled) noexcept
{
is_enabled_ = isEnabled;
is_enabled_ = is_enabled;
}
bool hasAddress(tr_address const& addr);
/// @brief Read the file of ranges, sort and merge, write to our own file, and reload from it
size_t setContent(char const* filename);
static std::vector<std::unique_ptr<BlocklistFile>> loadBlocklists(std::string_view const config_dir, bool const is_enabled);
[[nodiscard]] constexpr auto const& binFile() const noexcept
{
return bin_file_;
}
private:
struct AddressRange
{
uint32_t begin_ = 0;
uint32_t end_ = 0;
in6_addr begin6_;
in6_addr end6_;
/// @brief Used for std::bsearch of an IPv4 address
static int compareIPv4AddressToRange(void const* va, void const* vb)
{
auto const* a = reinterpret_cast<uint32_t const*>(va);
auto const* b = reinterpret_cast<AddressRange const*>(vb);
if (*a < b->begin_)
{
return -1;
}
if (*a > b->end_)
{
return 1;
}
return 0;
}
/// @brief Used for std::bsearch of an IPv6 address
static int compareIPv6AddressToRange(void const* va, void const* vb)
{
auto const* a = reinterpret_cast<in6_addr const*>(va);
auto const* b = reinterpret_cast<AddressRange const*>(vb);
if (memcmp(&a->s6_addr, &b->begin6_.s6_addr, sizeof(a->s6_addr)) < 0)
{
return -1;
}
if (memcmp(&a->s6_addr, &b->end6_.s6_addr, sizeof(a->s6_addr)) > 0)
{
return 1;
}
return 0;
}
};
void RewriteBlocklistFile() const;
void ensureLoaded() const;
void load();
void close();
static bool parseLine(char const* line, AddressRange* range);
static bool compareAddressRangesByFirstAddress(AddressRange const& a, AddressRange const& b);
static bool parseLine1(std::string_view line, struct AddressRange* range);
static bool parseLine2(std::string_view line, struct AddressRange* range);
static bool parseLine3(char const* line, AddressRange* range);
#ifdef TR_ENABLE_ASSERTS
/// @brief Sanity checks: make sure the rules are sorted in ascending order and don't overlap
static void assertValidRules(std::vector<AddressRange> const& ranges);
#endif
std::string const filename_;
mutable std::vector<std::pair<tr_address, tr_address>> rules_;
std::string bin_file_;
bool is_enabled_ = false;
mutable std::vector<AddressRange> rules_;
};
} // namespace libtransmission

View File

@ -243,6 +243,11 @@ struct tr_address
return this->compare(that) < 0;
}
[[nodiscard]] bool operator<=(tr_address const& that) const noexcept
{
return this->compare(that) <= 0;
}
[[nodiscard]] bool operator>(tr_address const& that) const noexcept
{
return this->compare(that) > 0;

View File

@ -536,7 +536,7 @@ void tr_session::initImpl(init_data& data)
tr_logSetQueueEnabled(data.message_queuing_enabled);
this->blocklists_ = BlocklistFile::loadBlocklists(configDir(), useBlocklist());
this->blocklists_ = libtransmission::Blocklist::loadBlocklists(blocklist_dir_, useBlocklist());
tr_announcerInit(this);
@ -1621,7 +1621,7 @@ void tr_session::useBlocklist(bool enabled)
std::for_each(
std::begin(blocklists_),
std::end(blocklists_),
[enabled](auto& blocklist) { blocklist->setEnabled(enabled); });
[enabled](auto& blocklist) { blocklist.setEnabled(enabled); });
}
bool tr_session::addressIsBlocked(tr_address const& addr) const noexcept
@ -1629,13 +1629,12 @@ bool tr_session::addressIsBlocked(tr_address const& addr) const noexcept
return std::any_of(
std::begin(blocklists_),
std::end(blocklists_),
[&addr](auto& blocklist) { return blocklist->hasAddress(addr); });
[&addr](auto& blocklist) { return blocklist.contains(addr); });
}
void tr_sessionReloadBlocklists(tr_session* session)
{
session->blocklists_.clear();
session->blocklists_ = BlocklistFile::loadBlocklists(session->configDir(), session->useBlocklist());
session->blocklists_ = libtransmission::Blocklist::loadBlocklists(session->blocklist_dir_, session->useBlocklist());
if (session->peer_mgr_)
{
@ -1648,7 +1647,7 @@ size_t tr_blocklistGetRuleCount(tr_session const* session)
TR_ASSERT(session != nullptr);
auto& src = session->blocklists_;
return std::accumulate(std::begin(src), std::end(src), 0, [](int sum, auto& cur) { return sum + cur->getRuleCount(); });
return std::accumulate(std::begin(src), std::end(src), 0, [](int sum, auto& cur) { return sum + std::size(cur); });
}
bool tr_blocklistIsEnabled(tr_session const* session)
@ -1676,29 +1675,35 @@ size_t tr_blocklistSetContent(tr_session* session, char const* content_filename)
{
auto const lock = session->unique_lock();
// find (or add) the default blocklist
auto& src = session->blocklists_;
char const* const name = DEFAULT_BLOCKLIST_FILENAME;
auto const it = std::find_if(
std::begin(src),
std::end(src),
[&name](auto const& blocklist) { return tr_strvEndsWith(blocklist->filename(), name); });
// These rules will replace the default blocklist.
// Build the path of the default blocklist .bin file where we'll save these rules.
auto const bin_file = tr_pathbuf{ session->blocklist_dir_, '/', DEFAULT_BLOCKLIST_FILENAME };
BlocklistFile* b = nullptr;
if (it == std::end(src))
// Try to save it
auto added = libtransmission::Blocklist::saveNew(content_filename, bin_file, session->useBlocklist());
if (!added)
{
auto path = tr_pathbuf{ session->configDir(), "/blocklists/"sv, name };
src.push_back(std::make_unique<BlocklistFile>(path, session->useBlocklist()));
b = std::rbegin(src)->get();
return 0U;
}
auto const n_rules = std::size(*added);
// Add (or replace) it in our blocklists_ vector
auto& src = session->blocklists_;
if (auto iter = std::find_if(
std::begin(src),
std::end(src),
[&bin_file](auto const& candidate) { return bin_file == candidate.binFile(); });
iter != std::end(src))
{
*iter = std::move(*added);
}
else
{
b = it->get();
src.emplace_back(std::move(*added));
}
// set the default blocklist's content
auto const rule_count = b->setContent(content_filename);
return rule_count;
return n_rules;
}
void tr_blocklistSetURL(tr_session* session, char const* url)
@ -2210,6 +2215,13 @@ auto makeTorrentDir(std::string_view config_dir)
return dir;
}
auto makeBlocklistDir(std::string_view config_dir)
{
auto dir = fmt::format("{:s}/blocklists"sv, config_dir);
tr_sys_dir_create(dir.c_str(), TR_SYS_DIR_CREATE_PARENTS, 0777);
return dir;
}
auto makeEventBase()
{
tr_evthread_init();
@ -2222,6 +2234,7 @@ tr_session::tr_session(std::string_view config_dir, tr_variant* settings_dict)
: config_dir_{ config_dir }
, resume_dir_{ makeResumeDir(config_dir) }
, torrent_dir_{ makeTorrentDir(config_dir) }
, blocklist_dir_{ makeBlocklistDir(config_dir) }
, event_base_{ makeEventBase() }
, timer_maker_{ std::make_unique<libtransmission::EvTimerMaker>(eventBase()) }
, dns_{ std::make_unique<libtransmission::EvDns>(eventBase(), tr_time) }

View File

@ -53,13 +53,13 @@ class tr_lpd;
class tr_port_forwarding;
class tr_rpc_server;
class tr_web;
struct BlocklistFile;
struct struct_utp_context;
struct tr_announcer;
struct tr_variant;
namespace libtransmission
{
class Blocklist;
class Dns;
class Timer;
class TimerMaker;
@ -1045,9 +1045,9 @@ private:
/// other fields
public:
std::vector<std::unique_ptr<BlocklistFile>> blocklists_;
std::vector<libtransmission::Blocklist> blocklists_;
public:
struct tr_event_handle* events = nullptr;
// depends-on: announcer_udp_

View File

@ -43,21 +43,6 @@ protected:
"IPv6 example:2001:db8::-2001:db8:ffff:ffff:ffff:ffff:ffff:ffff\n"
"Evilcorp:216.88.88.0-216.88.88.255\n";
#if 0
void createFileWithContents(char const* path, char const* contents)
{
auto const dir = tr_sys_path_dirname(path);
tr_sys_dir_create(dir, TR_SYS_DIR_CREATE_PARENTS, 0700);
auto const fd = tr_sys_file_open(path, TR_SYS_FILE_WRITE | TR_SYS_FILE_CREATE | TR_SYS_FILE_TRUNCATE, 0600);
blockingFileWrite(fd, contents, strlen(contents));
tr_sys_file_close(fd);
sync();
}
#endif
bool addressIsBlocked(char const* address_str)
{
auto const addr = tr_address::fromString(address_str);