diff options
author | Dan Goodliffe <dan.goodliffe@octal.co.uk> | 2022-08-22 14:51:38 +0100 |
---|---|---|
committer | Dan Goodliffe <dan.goodliffe@octal.co.uk> | 2022-08-22 14:51:38 +0100 |
commit | f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300 (patch) | |
tree | fbe8e0802a988e2866f498051b4876a8e7dfc3dd | |
parent | Enable all cppchecks, no further fixes needed (diff) | |
download | ilt-f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300.tar.bz2 ilt-f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300.tar.xz ilt-f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300.zip |
Create a string_view like thing for utf8 strings
utf8_string_view provides length and character iteration over a std::string_view containing utf8 data.
-rw-r--r-- | lib/unicode.h | 65 | ||||
-rw-r--r-- | test/test-text.cpp | 13 |
2 files changed, 78 insertions, 0 deletions
diff --git a/lib/unicode.h b/lib/unicode.h index dde1d3d..2945650 100644 --- a/lib/unicode.h +++ b/lib/unicode.h @@ -14,4 +14,69 @@ uint32_t get_codepoint(const char *); #ifdef __cplusplus } + +# include <string_view> +# include <algorithm> + +struct utf8_string_view { + struct iter { + constexpr explicit iter(const char * p) : pos {p} { } + + [[nodiscard]] auto + operator!=(const iter & other) const + { + return pos != other.pos; + } + + auto & + operator++() + { + pos = next_char(pos); + return *this; + } + + [[nodiscard]] auto + operator*() const + { + return get_codepoint(pos); + } + + private: + const char * pos; + }; + + // cppcheck-suppress noExplicitConstructor; NOLINTNEXTLINE(hicpp-explicit-conversions) + template<typename... Args> constexpr utf8_string_view(Args &&... args) : str {std::forward<Args>(args)...} { } + + [[nodiscard]] auto + begin() const + { + return iter {str.cbegin()}; + } + + [[nodiscard]] auto + end() const + { + return iter {str.cend()}; + } + + [[nodiscard]] size_t length() const; + +private: + std::string_view str; +}; +template<> struct std::iterator_traits<utf8_string_view::iter> { + using difference_type = size_t; + using value_type = uint32_t; + using pointer = void; + using reference = void; + using iterator_category = std::forward_iterator_tag; +}; + +[[nodiscard]] inline size_t +utf8_string_view::length() const +{ + return std::distance(begin(), end()); +} + #endif diff --git a/test/test-text.cpp b/test/test-text.cpp index 6762b09..0df6885 100644 --- a/test/test-text.cpp +++ b/test/test-text.cpp @@ -9,8 +9,21 @@ #include <glm/glm.hpp> #include <span> #include <ui/font.h> +#include <unicode.h> #include <vector> +BOOST_AUTO_TEST_CASE(utf8_string_view_iter) +{ + static constexpr utf8_string_view text {"Some UTF-8 €£²¹ text."}; + static constexpr std::array codepoints { + 83, 111, 109, 101, 32, 85, 84, 70, 45, 56, 32, 8364, 163, 178, 185, 32, 116, 101, 120, 116, 46}; + BOOST_CHECK_EQUAL(std::count_if(text.begin(), text.end(), isspace), 3); + BOOST_CHECK_EQUAL(text.length(), 21); + std::vector<uint32_t> codepointsOut; + std::copy(text.begin(), text.end(), std::back_inserter(codepointsOut)); + BOOST_CHECK_EQUAL_COLLECTIONS(codepoints.begin(), codepoints.end(), codepointsOut.begin(), codepointsOut.end()); +} + struct FontTest : public Font { FontTest() : Font {"/usr/share/fonts/corefonts/arial.ttf", 48} { } }; |