From f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Mon, 22 Aug 2022 14:51:38 +0100 Subject: Create a string_view like thing for utf8 strings utf8_string_view provides length and character iteration over a std::string_view containing utf8 data. --- lib/unicode.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'lib') diff --git a/lib/unicode.h b/lib/unicode.h index dde1d3d..2945650 100644 --- a/lib/unicode.h +++ b/lib/unicode.h @@ -14,4 +14,69 @@ uint32_t get_codepoint(const char *); #ifdef __cplusplus } + +# include +# include + +struct utf8_string_view { + struct iter { + constexpr explicit iter(const char * p) : pos {p} { } + + [[nodiscard]] auto + operator!=(const iter & other) const + { + return pos != other.pos; + } + + auto & + operator++() + { + pos = next_char(pos); + return *this; + } + + [[nodiscard]] auto + operator*() const + { + return get_codepoint(pos); + } + + private: + const char * pos; + }; + + // cppcheck-suppress noExplicitConstructor; NOLINTNEXTLINE(hicpp-explicit-conversions) + template constexpr utf8_string_view(Args &&... args) : str {std::forward(args)...} { } + + [[nodiscard]] auto + begin() const + { + return iter {str.cbegin()}; + } + + [[nodiscard]] auto + end() const + { + return iter {str.cend()}; + } + + [[nodiscard]] size_t length() const; + +private: + std::string_view str; +}; +template<> struct std::iterator_traits { + using difference_type = size_t; + using value_type = uint32_t; + using pointer = void; + using reference = void; + using iterator_category = std::forward_iterator_tag; +}; + +[[nodiscard]] inline size_t +utf8_string_view::length() const +{ + return std::distance(begin(), end()); +} + #endif -- cgit v1.2.3