From f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Mon, 22 Aug 2022 14:51:38 +0100 Subject: Create a string_view like thing for utf8 strings utf8_string_view provides length and character iteration over a std::string_view containing utf8 data. --- lib/unicode.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test-text.cpp | 13 +++++++++++ 2 files changed, 78 insertions(+) diff --git a/lib/unicode.h b/lib/unicode.h index dde1d3d..2945650 100644 --- a/lib/unicode.h +++ b/lib/unicode.h @@ -14,4 +14,69 @@ uint32_t get_codepoint(const char *); #ifdef __cplusplus } + +# include +# include + +struct utf8_string_view { + struct iter { + constexpr explicit iter(const char * p) : pos {p} { } + + [[nodiscard]] auto + operator!=(const iter & other) const + { + return pos != other.pos; + } + + auto & + operator++() + { + pos = next_char(pos); + return *this; + } + + [[nodiscard]] auto + operator*() const + { + return get_codepoint(pos); + } + + private: + const char * pos; + }; + + // cppcheck-suppress noExplicitConstructor; NOLINTNEXTLINE(hicpp-explicit-conversions) + template constexpr utf8_string_view(Args &&... args) : str {std::forward(args)...} { } + + [[nodiscard]] auto + begin() const + { + return iter {str.cbegin()}; + } + + [[nodiscard]] auto + end() const + { + return iter {str.cend()}; + } + + [[nodiscard]] size_t length() const; + +private: + std::string_view str; +}; +template<> struct std::iterator_traits { + using difference_type = size_t; + using value_type = uint32_t; + using pointer = void; + using reference = void; + using iterator_category = std::forward_iterator_tag; +}; + +[[nodiscard]] inline size_t +utf8_string_view::length() const +{ + return std::distance(begin(), end()); +} + #endif diff --git a/test/test-text.cpp b/test/test-text.cpp index 6762b09..0df6885 100644 --- a/test/test-text.cpp +++ b/test/test-text.cpp @@ -9,8 +9,21 @@ #include #include #include +#include #include +BOOST_AUTO_TEST_CASE(utf8_string_view_iter) +{ + static constexpr utf8_string_view text {"Some UTF-8 €£²¹ text."}; + static constexpr std::array codepoints { + 83, 111, 109, 101, 32, 85, 84, 70, 45, 56, 32, 8364, 163, 178, 185, 32, 116, 101, 120, 116, 46}; + BOOST_CHECK_EQUAL(std::count_if(text.begin(), text.end(), isspace), 3); + BOOST_CHECK_EQUAL(text.length(), 21); + std::vector codepointsOut; + std::copy(text.begin(), text.end(), std::back_inserter(codepointsOut)); + BOOST_CHECK_EQUAL_COLLECTIONS(codepoints.begin(), codepoints.end(), codepointsOut.begin(), codepointsOut.end()); +} + struct FontTest : public Font { FontTest() : Font {"/usr/share/fonts/corefonts/arial.ttf", 48} { } }; -- cgit v1.2.3