perf: use fast_float to parse floating-point numbers (#3092)

This commit is contained in:
Charles Kerr 2022-05-16 00:06:17 -05:00 committed by GitHub
parent ebd1a0b7bf
commit 2293f4336a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 113 additions and 160 deletions

3
.gitmodules vendored
View File

@ -43,3 +43,6 @@
[submodule "third-party/fmt"]
path = third-party/fmt
url = https://github.com/transmission/fmt.git
[submodule "third-party/fast_float"]
path = third-party/fast_float
url = https://github.com/transmission/fast_float

View File

@ -154,6 +154,7 @@ find_package(Fmt)
add_definitions(-DFMT_HEADER_ONLY -DFMT_EXCEPTIONS=0)
include_directories(SYSTEM ${LIBFMT_INCLUDE_DIRS})
find_package(FastFloat)
find_package(UtfCpp)
find_package(Threads)
find_package(PkgConfig QUIET)
@ -587,8 +588,7 @@ set(NEEDED_FUNCTIONS
sendfile64
statvfs
strlcpy
syslog
uselocale)
syslog)
foreach(F ${NEEDED_FUNCTIONS})
tr_make_id("${F}" F_ID)

View File

@ -3591,11 +3591,12 @@
"$(inherited)",
"third-party/arc4/src",
"third-party/dht",
"third-party/fast_float/include",
"third-party/libb64/include",
"third-party/libutp",
"third-party/utfcpp/source",
"third-party/libdeflate",
"third-party/libpsl/include",
"third-party/libutp",
"third-party/utfcpp/source",
);
OTHER_CFLAGS = (
"$(inherited)",
@ -3805,11 +3806,12 @@
"$(inherited)",
"third-party/arc4/src",
"third-party/dht",
"third-party/fast_float/include",
"third-party/libb64/include",
"third-party/libutp",
"third-party/utfcpp/source",
"third-party/libdeflate",
"third-party/libpsl/include",
"third-party/libutp",
"third-party/utfcpp/source",
);
OTHER_CFLAGS = (
"$(inherited)",
@ -4091,11 +4093,12 @@
"$(inherited)",
"third-party/arc4/src",
"third-party/dht",
"third-party/fast_float/include",
"third-party/libb64/include",
"third-party/libutp",
"third-party/utfcpp/source",
"third-party/libdeflate",
"third-party/libpsl/include",
"third-party/libutp",
"third-party/utfcpp/source",
);
OTHER_CFLAGS = (
"$(inherited)",

View File

@ -0,0 +1 @@
set(FAST_FLOAT_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/third-party/fast_float/include)

View File

@ -262,6 +262,7 @@ include_directories(
SYSTEM
${UTFCPP_INCLUDE_DIRS}
${DEFLATE_INCLUDE_DIRS}
${FAST_FLOAT_INCLUDE_DIRS}
${CRYPTO_INCLUDE_DIRS}
${CURL_INCLUDE_DIRS}
${EVENT2_INCLUDE_DIRS}

View File

@ -25,6 +25,10 @@
#include <ws2tcpip.h> /* WSAStartup() */
#endif
#ifndef _WIN32
#include <sys/stat.h> // mode_t
#endif
#ifdef HAVE_ICONV
#include <iconv.h>
#endif
@ -37,6 +41,8 @@
#include <fmt/format.h>
#include <fast_float/fast_float.h>
#include "transmission.h"
#include "error-types.h"
@ -1355,3 +1361,80 @@ std::string_view tr_get_mime_type_for_filename(std::string_view filename)
auto constexpr Fallback = "application/octet-stream"sv;
return Fallback;
}
/// parseNum()
#if defined(__GNUC__) && !__has_include(<charconv>)
#include <iomanip> // std::setbase
#include <sstream>
template<typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv, int base)
{
auto val = T{};
auto const str = std::string(std::data(sv), std::min(std::size(sv), size_t{ 64 }));
auto sstream = std::stringstream{ str };
auto const oldpos = sstream.tellg();
/* The base parameter only works for bases 8, 10 and 16.
All other bases will be converted to 0 which activates the
prefix based parsing and therefore decimal in our usual cases.
This differs from the from_chars solution below. */
sstream >> std::setbase(base) >> val;
auto const newpos = sstream.tellg();
if ((newpos == oldpos) || (sstream.fail() && !sstream.eof()))
{
return std::nullopt;
}
sv.remove_prefix(sstream.eof() ? std::size(sv) : newpos - oldpos);
return val;
}
#else // #if defined(__GNUC__) && !__has_include(<charconv>)
#include <charconv> // std::from_chars()
template<typename T, std::enable_if_t<std::is_integral<T>::value, bool>>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv, int base)
{
auto val = T{};
auto const* const begin_ch = std::data(sv);
auto const* const end_ch = begin_ch + std::size(sv);
/* The base parameter works for any base from 2 to 36 (inclusive).
This is different from the behaviour of the stringstream
based solution above. */
auto const result = std::from_chars(begin_ch, end_ch, val, base);
if (result.ec != std::errc{})
{
return std::nullopt;
}
sv.remove_prefix(result.ptr - std::data(sv));
return val;
}
#endif // #if defined(__GNUC__) && !__has_include(<charconv>)
template std::optional<int64_t> tr_parseNum(std::string_view& sv, int base);
template std::optional<int> tr_parseNum(std::string_view& sv, int base);
template std::optional<size_t> tr_parseNum(std::string_view& sv, int base);
#ifndef _WIN32
template std::optional<mode_t> tr_parseNum(std::string_view& sv, int base);
#endif
template<typename T, std::enable_if_t<std::is_floating_point<T>::value, bool>>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv)
{
auto const* const begin_ch = std::data(sv);
auto const* const end_ch = begin_ch + std::size(sv);
auto val = T{};
auto const result = fast_float::from_chars(begin_ch, end_ch, val);
if (result.ec != std::errc{})
{
return std::nullopt;
}
sv.remove_prefix(result.ptr - std::data(sv));
return val;
}
template std::optional<double> tr_parseNum(std::string_view& sv);

View File

@ -115,55 +115,11 @@ uint64_t tr_time_msec();
/** @brief sleep the specified number of milliseconds */
void tr_wait_msec(long int delay_milliseconds);
#if defined(__GNUC__) && !__has_include(<charconv>)
template<typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv, int base = 10);
#include <iomanip> // std::setbase
#include <sstream>
template<typename T>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv, int base = 10)
{
auto val = T{};
auto const str = std::string(std::data(sv), std::min(std::size(sv), size_t{ 64 }));
auto sstream = std::stringstream{ str };
auto const oldpos = sstream.tellg();
/* The base parameter only works for bases 8, 10 and 16.
All other bases will be converted to 0 which activates the
prefix based parsing and therefore decimal in our usual cases.
This differs from the from_chars solution below. */
sstream >> std::setbase(base) >> val;
auto const newpos = sstream.tellg();
if ((newpos == oldpos) || (sstream.fail() && !sstream.eof()))
{
return std::nullopt;
}
sv.remove_prefix(sstream.eof() ? std::size(sv) : newpos - oldpos);
return val;
}
#else // #if defined(__GNUC__) && !__has_include(<charconv>)
#include <charconv> // std::from_chars()
template<typename T>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv, int base = 10)
{
auto val = T{};
auto const* const begin_ch = std::data(sv);
auto const* const end_ch = begin_ch + std::size(sv);
/* The base parameter works for any base from 2 to 36 (inclusive).
This is different from the behaviour of the stringstream
based solution above. */
auto const result = std::from_chars(begin_ch, end_ch, val, base);
if (result.ec != std::errc{})
{
return std::nullopt;
}
sv.remove_prefix(result.ptr - std::data(sv));
return val;
}
#endif // #if defined(__GNUC__) && !__has_include(<charconv>)
template<typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
[[nodiscard]] std::optional<T> tr_parseNum(std::string_view& sv);
bool tr_utf8_validate(std::string_view sv, char const** endptr);

View File

@ -324,9 +324,9 @@ static void action_callback_POP(
{
if ((state->special_flags & JSONSL_SPECIALf_NUMNOINT) != 0)
{
char const* begin = jsn->base + state->pos_begin;
data->has_content = true;
tr_variantInitReal(get_node(jsn), strtod(begin, nullptr));
auto sv = std::string_view{ jsn->base + state->pos_begin, jsn->pos - state->pos_begin };
auto const val = tr_parseNum<double>(sv);
tr_variantInitReal(get_node(jsn), val ? *val : double{});
}
else if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) != 0)
{

View File

@ -3,18 +3,8 @@
// or any future license endorsed by Mnemosyne LLC.
// License text can be found in the licenses/ folder.
#if defined(HAVE_USELOCALE) && (!defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 700)
#undef _XOPEN_SOURCE
#define XOPEN_SOURCE 700 // NOLINT
#endif
#if defined(HAVE_USELOCALE) && !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif
#include <algorithm> // std::sort
#include <cerrno>
#include <cstdlib> /* strtod() */
#include <cstring>
#include <stack>
#include <string>
@ -25,12 +15,6 @@
#include <share.h>
#endif
#include <clocale> /* setlocale() */
#if defined(HAVE_USELOCALE) && defined(HAVE_XLOCALE_H)
#include <xlocale.h>
#endif
#include <event2/buffer.h>
#include <fmt/core.h>
@ -48,70 +32,8 @@
#include "variant-common.h"
#include "variant.h"
/* don't use newlocale/uselocale on old versions of uClibc because they're buggy.
* https://trac.transmissionbt.com/ticket/6006 */
#if defined(__UCLIBC__) && !TR_UCLIBC_CHECK_VERSION(0, 9, 34)
#undef HAVE_USELOCALE
#endif
/**
***
**/
using namespace std::literals;
struct locale_context
{
#ifdef HAVE_USELOCALE
locale_t new_locale;
locale_t old_locale;
#else
#if defined(HAVE__CONFIGTHREADLOCALE) && defined(_ENABLE_PER_THREAD_LOCALE)
int old_thread_config;
#endif
int category;
char old_locale[128];
#endif
};
static void use_numeric_locale(struct locale_context* context, char const* locale_name)
{
#ifdef HAVE_USELOCALE
context->new_locale = newlocale(LC_NUMERIC_MASK, locale_name, nullptr);
context->old_locale = uselocale(context->new_locale);
#else
#if defined(HAVE__CONFIGTHREADLOCALE) && defined(_ENABLE_PER_THREAD_LOCALE)
context->old_thread_config = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
#endif
context->category = LC_NUMERIC;
tr_strlcpy(context->old_locale, setlocale(context->category, nullptr), sizeof(context->old_locale));
setlocale(context->category, locale_name);
#endif
}
static void restore_locale(struct locale_context* context)
{
#ifdef HAVE_USELOCALE
uselocale(context->old_locale);
freelocale(context->new_locale);
#else
setlocale(context->category, context->old_locale);
#if defined(HAVE__CONFIGTHREADLOCALE) && defined(_ENABLE_PER_THREAD_LOCALE)
_configthreadlocale(context->old_thread_config);
#endif
#endif
}
/***
****
***/
@ -392,17 +314,13 @@ bool tr_variantGetReal(tr_variant const* v, double* setme)
if (!success && tr_variantIsString(v))
{
/* the json spec requires a '.' decimal point regardless of locale */
struct locale_context locale_ctx;
use_numeric_locale(&locale_ctx, "C");
char* endptr = nullptr;
double const d = strtod(getStr(v), &endptr);
restore_locale(&locale_ctx);
if (getStr(v) != endptr && *endptr == '\0')
if (auto sv = std::string_view{}; tr_variantGetStrView(v, &sv))
{
*setme = d;
success = true;
if (auto d = tr_parseNum<double>(sv); d)
{
*setme = *d;
success = true;
}
}
}
@ -1173,12 +1091,8 @@ void tr_variantMergeDicts(tr_variant* target, tr_variant const* source)
struct evbuffer* tr_variantToBuf(tr_variant const* v, tr_variant_fmt fmt)
{
struct locale_context locale_ctx;
struct evbuffer* buf = evbuffer_new();
/* parse with LC_NUMERIC="C" to ensure a "." decimal separator */
use_numeric_locale(&locale_ctx, "C");
evbuffer_expand(buf, 4096); /* alloc a little memory to start off with */
switch (fmt)
@ -1196,8 +1110,6 @@ struct evbuffer* tr_variantToBuf(tr_variant const* v, tr_variant_fmt fmt)
break;
}
/* restore the previous locale */
restore_locale(&locale_ctx);
return buf;
}
@ -1242,10 +1154,6 @@ bool tr_variantFromBuf(tr_variant* setme, int opts, std::string_view buf, char c
// supported formats: benc, json
TR_ASSERT((opts & (TR_VARIANT_PARSE_BENC | TR_VARIANT_PARSE_JSON)) != 0);
// parse with LC_NUMERIC="C" to ensure a "." decimal separator
auto locale_ctx = locale_context{};
use_numeric_locale(&locale_ctx, "C");
*setme = {};
auto const success = ((opts & TR_VARIANT_PARSE_BENC) != 0) ? tr_variantParseBenc(*setme, opts, buf, setme_end, error) :
@ -1256,9 +1164,6 @@ bool tr_variantFromBuf(tr_variant* setme, int opts, std::string_view buf, char c
tr_variantFree(setme);
}
/* restore the previous locale */
restore_locale(&locale_ctx);
return success;
}

1
third-party/fast_float vendored Submodule

@ -0,0 +1 @@
Subproject commit f2082bf747eabc0873f2fdceb05f9451931b96dc