Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement support for uint64_t values in ICU backend #246

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ jobs:
- name: Test output
if: '!matrix.coverity && always()'
run: |
for f in $(find "$BOOST_ROOT/bin.v2/libs/$SELF/test" -type f -name 'test_*.run'); do
for f in $(find "$BOOST_ROOT/bin.v2/libs/$SELF/test" -type f -name '*.run'); do
name=$(basename "$f")
name=${name%.run}
config=$(dirname "$f")
Expand Down
2 changes: 1 addition & 1 deletion build/Jamfile.v2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright 2003 John Maddock
# Copyright 2010 Artyom Beilis
# Copyright 2021 - 2022 Alexander Grund
# Copyright 2021 - 2024 Alexander Grund
#
# Distributed under the Boost Software License, Version 1.0.
# https://www.boost.org/LICENSE_1_0.txt.
Expand Down
63 changes: 46 additions & 17 deletions src/icu/formatter.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2021-2023 Alexander Grund
// Copyright (c) 2021-2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand All @@ -9,18 +9,21 @@
#include <boost/locale/formatting.hpp>
#include <boost/locale/info.hpp>
#include "../util/foreach_char.hpp"
#include "../util/numeric_conversion.hpp"
#include "formatters_cache.hpp"
#include "icu_util.hpp"
#include "time_zone.hpp"
#include "uconv.hpp"
#include <boost/assert.hpp>
#include <boost/charconv/limits.hpp>
#include <boost/charconv/to_chars.hpp>
#include <limits>
#include <memory>
#ifdef BOOST_MSVC
# pragma warning(push)
# pragma warning(disable : 4251) // "identifier" : class "type" needs to have dll-interface...
#endif
#include <unicode/datefmt.h>
#include <unicode/decimfmt.h>
#include <unicode/numfmt.h>
#include <unicode/rbnf.h>
#include <unicode/smpdtfmt.h>
Expand Down Expand Up @@ -62,35 +65,64 @@ namespace boost { namespace locale { namespace impl_icu {
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); }
size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); }

string_type format(const uint64_t value, size_t& code_points) const override
{
// ICU only supports int64_t as the largest integer type
if(value <= static_cast<uint64_t>(std::numeric_limits<int64_t>::max()))
return format(static_cast<int64_t>(value), code_points);

// Fallback to using a StringPiece (decimal number) as input
char buffer[boost::charconv::limits<uint64_t>::max_chars10 + 1];
auto res = boost::charconv::to_chars(buffer, std::end(buffer), value);
BOOST_ASSERT(res);
BOOST_ASSERT(res.ptr < std::end(buffer));
*res.ptr = '\0'; // ICU expects a NULL-terminated string even for the StringPiece
icu::UnicodeString tmp;
UErrorCode err = U_ZERO_ERROR;
icu_fmt_.format(icu::StringPiece(buffer, res.ptr - buffer), tmp, nullptr, err);
check_and_throw_icu_error(err);
code_points = tmp.countChar32();
return cvt_.std(tmp);
}

private:
bool get_value(double& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
v = fmt.getDouble(err);
if(U_FAILURE(err))
return false;
return true;
return U_SUCCESS(err);
}

bool get_value(int64_t& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
v = fmt.getInt64(err);
if(U_FAILURE(err))
return false;
return true;
return U_SUCCESS(err);
}

bool get_value(uint64_t& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
// ICU only supports int64_t as the largest integer type
const int64_t tmp = fmt.getInt64(err);
if(U_SUCCESS(err)) {
if(tmp < 0)
return false;
v = static_cast<uint64_t>(tmp);
return true;
}
return util::try_parse_icu(fmt, v);
}

bool get_value(int32_t& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
v = fmt.getLong(err);
if(U_FAILURE(err))
return false;
return true;
return U_SUCCESS(err);
}

template<typename ValueType>
Expand All @@ -114,14 +146,11 @@ namespace boost { namespace locale { namespace impl_icu {
icu_fmt_.setParseIntegerOnly(std::is_integral<ValueType>::value && isNumberOnly_);
icu_fmt_.parse(tmp, val, pp);

ValueType tmp_v;

if(pp.getIndex() == 0 || !get_value(tmp_v, val))
if(pp.getIndex() == 0 || !get_value(v, val))
return 0;
size_t cut = cvt_.cut(tmp, str.data(), str.data() + str.size(), pp.getIndex());
if(cut == 0)
return 0;
v = tmp_v;
return cut;
}

Expand All @@ -136,11 +165,11 @@ namespace boost { namespace locale { namespace impl_icu {
typedef std::basic_string<CharType> string_type;

string_type format(double value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(uint64_t value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }

string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); }

size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); }

Expand Down
6 changes: 6 additions & 0 deletions src/icu/formatter.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand Down Expand Up @@ -31,6 +32,8 @@ namespace boost { namespace locale { namespace impl_icu {
/// Format the value and return the number of Unicode code points
virtual string_type format(double value, size_t& code_points) const = 0;
/// Format the value and return the number of Unicode code points
virtual string_type format(uint64_t value, size_t& code_points) const = 0;
/// Format the value and return the number of Unicode code points
virtual string_type format(int64_t value, size_t& code_points) const = 0;
/// Format the value and return the number of Unicode code points
virtual string_type format(int32_t value, size_t& code_points) const = 0;
Expand All @@ -40,6 +43,9 @@ namespace boost { namespace locale { namespace impl_icu {
virtual size_t parse(const string_type& str, double& value) const = 0;
/// Parse the string and return the number of used characters. If it returns 0
/// then parsing failed.
virtual size_t parse(const string_type& str, uint64_t& value) const = 0;
/// Parse the string and return the number of used characters. If it returns 0
/// then parsing failed.
virtual size_t parse(const string_type& str, int64_t& value) const = 0;
/// Parse the string and return the number of used characters. If it returns 0
/// then parsing failed.
Expand Down
41 changes: 16 additions & 25 deletions src/icu/numeric.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand All @@ -19,41 +20,31 @@
namespace boost { namespace locale { namespace impl_icu {

namespace detail {
template<typename T, bool integer = std::numeric_limits<T>::is_integer>
struct icu_format_type;
template<typename T, typename PreferredType, typename AlternativeType>
struct choose_type_by_digits
: std::conditional<std::numeric_limits<T>::digits <= std::numeric_limits<PreferredType>::digits,
PreferredType,
AlternativeType> {};

template<typename T>
struct icu_format_type<T, true> {
// ICU supports 32 and 64 bit ints, use the former as long as it fits, else the latter
typedef typename std::conditional<std::numeric_limits<T>::digits <= 31, int32_t, int64_t>::type type;
template<typename T, bool integer = std::numeric_limits<T>::is_integer>
struct icu_format_type {
static_assert(sizeof(T) <= sizeof(int64_t), "Only up to 64 bit integer types are supported by ICU");
// ICU supports (only) int32_t and int64_t, use the former as long as it fits, else the latter
using large_type = typename choose_type_by_digits<T, int64_t, uint64_t>::type;
using type = typename choose_type_by_digits<T, int32_t, large_type>::type;
};
template<typename T>
struct icu_format_type<T, false> {
// Only float type ICU supports is double
typedef double type;
};

// ICU does not support uint64_t values so fall back to the parent/std formatting
// if the number is to large to fit into an int64_t
template<typename T,
bool BigUInt = !std::numeric_limits<T>::is_signed && std::numeric_limits<T>::is_integer
&& (sizeof(T) >= sizeof(uint64_t))>
struct use_parent_traits {
static bool use(T /*v*/) { return false; }
};
template<typename T>
struct use_parent_traits<T, true> {
static bool use(T v) { return v > static_cast<T>(std::numeric_limits<int64_t>::max()); }
using type = double;
};

template<typename ValueType>
static bool use_parent(std::ios_base& ios, ValueType v)
static bool use_parent(std::ios_base& ios)
{
const uint64_t flg = ios_info::get(ios).display_flags();
if(flg == flags::posix)
return true;
if(use_parent_traits<ValueType>::use(v))
return true;

if(!std::numeric_limits<ValueType>::is_integer)
return false;
Expand Down Expand Up @@ -105,7 +96,7 @@ namespace boost { namespace locale { namespace impl_icu {
template<typename ValueType>
iter_type do_real_put(iter_type out, std::ios_base& ios, CharType fill, ValueType val) const
{
if(detail::use_parent(ios, val))
if(detail::use_parent<ValueType>(ios))
return std::num_put<CharType>::do_put(out, ios, fill, val);

const std::unique_ptr<formatter_type> formatter = formatter_type::create(ios, loc_, enc_);
Expand Down Expand Up @@ -240,7 +231,7 @@ namespace boost { namespace locale { namespace impl_icu {
do_real_get(iter_type in, iter_type end, std::ios_base& ios, std::ios_base::iostate& err, ValueType& val) const
{
stream_type* stream_ptr = dynamic_cast<stream_type*>(&ios);
if(!stream_ptr || detail::use_parent(ios, ValueType(0)))
if(!stream_ptr || detail::use_parent<ValueType>(ios))
return std::num_get<CharType>::do_get(in, end, ios, err, val);

const std::unique_ptr<formatter_type> formatter = formatter_type::create(ios, loc_, enc_);
Expand Down
116 changes: 116 additions & 0 deletions src/util/numeric_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,45 @@
#define BOOST_LOCALE_IMPL_UTIL_NUMERIC_CONVERSIONS_HPP

#include <boost/locale/config.hpp>
#include <boost/assert.hpp>
#include <boost/charconv/from_chars.hpp>
#include <boost/core/detail/string_view.hpp>
#include <algorithm>
#include <array>
#include <limits>
#include <type_traits>
#ifdef BOOST_LOCALE_WITH_ICU
# include <unicode/fmtable.h>
#endif

namespace boost { namespace locale { namespace util {
namespace {

// Create lookup table where: powers_of_10[i] == 10**i
constexpr uint64_t pow10(unsigned exponent)
{
return (exponent == 0) ? 1 : pow10(exponent - 1) * 10u;
}
template<bool condition, std::size_t Length>
using array_if_true = typename std::enable_if<condition, std::array<uint64_t, Length>>::type;

template<std::size_t Length, typename... Values>
constexpr array_if_true<sizeof...(Values) == Length, Length> make_powers_of_10(Values... values)
{
return {{values...}};
}
template<std::size_t Length, typename... Values>
constexpr array_if_true<sizeof...(Values) < Length, Length> make_powers_of_10(Values... values)
{
return make_powers_of_10<Length>(values..., pow10(sizeof...(Values)));
}
constexpr auto powers_of_10 = make_powers_of_10<std::numeric_limits<uint64_t>::digits10 + 1>();
#ifndef BOOST_NO_CXX14_CONSTEXPR
static_assert(powers_of_10[0] == 1u, "!");
static_assert(powers_of_10[1] == 10u, "!");
static_assert(powers_of_10[5] == 100000u, "!");
#endif
} // namespace

template<typename Integer>
bool try_to_int(core::string_view s, Integer& value)
Expand All @@ -24,6 +59,87 @@ namespace boost { namespace locale { namespace util {
const auto res = boost::charconv::from_chars(s, value);
return res && res.ptr == (s.data() + s.size());
}

/// Parse a string in scientific format to an integer.
/// In particular the "E notation" is used.
/// I.e. "\d.\d+E\d+", e.g. 5.12E3 == 5120; 5E2 == 500; 2E+1 == 20)
/// Additionally plain integers are recognized.
template<typename Integer>
bool try_scientific_to_int(const core::string_view s, Integer& value)
{
static_assert(std::is_integral<Integer>::value && std::is_unsigned<Integer>::value,
"Must be an unsigned integer");
if(s.size() < 3) // At least: iEj for E notation
return try_to_int(s, value);
if(s[0] == '-')
return false;
constexpr auto maxDigits = std::numeric_limits<Integer>::digits10 + 1;

const auto expPos = s.find('E', 1);
if(expPos == core::string_view::npos)
return (s[1] != '.') && try_to_int(s, value); // Shortcut: Regular integer
uint8_t exponent; // Negative exponent would be a fractional
if(BOOST_UNLIKELY(!try_to_int(s.substr(expPos + 1), exponent)))
return false;

core::string_view significant = s.substr(0, expPos);
Integer significant_value;
if(s[1] == '.') {
const auto numSignificantDigits = significant.size() - 1u; // Exclude dot
const auto numDigits = exponent + 1u; // E0 -> 1 digit
if(BOOST_UNLIKELY(numDigits < numSignificantDigits))
return false; // Fractional
else if(BOOST_UNLIKELY(numDigits > maxDigits))
return false; // Too large
// Factor to get from the fractional number to an integer
BOOST_ASSERT(numSignificantDigits - 1u < powers_of_10.size());
const auto factor = static_cast<Integer>(powers_of_10[numSignificantDigits - 1]);
exponent = static_cast<uint8_t>(numDigits - numSignificantDigits);

const unsigned firstDigit = significant[0] - '0';
if(firstDigit > 9u)
return false; // Not a digit
if(numSignificantDigits == maxDigits) {
const auto maxFirstDigit = std::numeric_limits<Integer>::max() / powers_of_10[maxDigits - 1];
if(firstDigit > maxFirstDigit)
return false;
}
significant.remove_prefix(2);
if(BOOST_UNLIKELY(!try_to_int(significant, significant_value)))
return false;
// firstDigit * factor + significant_value <= max
if(static_cast<Integer>(firstDigit) > (std::numeric_limits<Integer>::max() - significant_value) / factor)
return false;
significant_value += static_cast<Integer>(firstDigit * factor);
} else if(BOOST_UNLIKELY(significant.size() + exponent > maxDigits))
return false;
else if(BOOST_UNLIKELY(!try_to_int(significant, significant_value)))
return false;
// Add zeros if necessary
if(exponent > 0u) {
BOOST_ASSERT(exponent < powers_of_10.size());
const auto factor = static_cast<Integer>(powers_of_10[exponent]);
if(significant_value > std::numeric_limits<Integer>::max() / factor)
return false;
value = significant_value * factor;
} else
value = significant_value;
return true;
}

#ifdef BOOST_LOCALE_WITH_ICU
template<typename Integer>
bool try_parse_icu(icu::Formattable& fmt, Integer& value)
{
// Get value as a decimal number and parse that
UErrorCode err = U_ZERO_ERROR;
const auto decimals = fmt.getDecimalNumber(err);
if(U_FAILURE(err))
return false; // Not a number
const core::string_view s(decimals.data(), decimals.length());
return try_scientific_to_int(s, value);
}
#endif
}}} // namespace boost::locale::util

#endif
Loading
Loading