summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan.goodliffe@octal.co.uk>2022-08-22 14:51:38 +0100
committerDan Goodliffe <dan.goodliffe@octal.co.uk>2022-08-22 14:51:38 +0100
commitf9e95c2b7d2c50f6d12b9a67ce4ed897e032a300 (patch)
treefbe8e0802a988e2866f498051b4876a8e7dfc3dd
parentEnable all cppchecks, no further fixes needed (diff)
downloadilt-f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300.tar.bz2
ilt-f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300.tar.xz
ilt-f9e95c2b7d2c50f6d12b9a67ce4ed897e032a300.zip
Create a string_view like thing for utf8 strings
utf8_string_view provides length and character iteration over a std::string_view containing utf8 data.
-rw-r--r--lib/unicode.h65
-rw-r--r--test/test-text.cpp13
2 files changed, 78 insertions, 0 deletions
diff --git a/lib/unicode.h b/lib/unicode.h
index dde1d3d..2945650 100644
--- a/lib/unicode.h
+++ b/lib/unicode.h
@@ -14,4 +14,69 @@ uint32_t get_codepoint(const char *);
#ifdef __cplusplus
}
+
+# include <string_view>
+# include <algorithm>
+
+struct utf8_string_view {
+ struct iter {
+ constexpr explicit iter(const char * p) : pos {p} { }
+
+ [[nodiscard]] auto
+ operator!=(const iter & other) const
+ {
+ return pos != other.pos;
+ }
+
+ auto &
+ operator++()
+ {
+ pos = next_char(pos);
+ return *this;
+ }
+
+ [[nodiscard]] auto
+ operator*() const
+ {
+ return get_codepoint(pos);
+ }
+
+ private:
+ const char * pos;
+ };
+
+ // cppcheck-suppress noExplicitConstructor; NOLINTNEXTLINE(hicpp-explicit-conversions)
+ template<typename... Args> constexpr utf8_string_view(Args &&... args) : str {std::forward<Args>(args)...} { }
+
+ [[nodiscard]] auto
+ begin() const
+ {
+ return iter {str.cbegin()};
+ }
+
+ [[nodiscard]] auto
+ end() const
+ {
+ return iter {str.cend()};
+ }
+
+ [[nodiscard]] size_t length() const;
+
+private:
+ std::string_view str;
+};
+template<> struct std::iterator_traits<utf8_string_view::iter> {
+ using difference_type = size_t;
+ using value_type = uint32_t;
+ using pointer = void;
+ using reference = void;
+ using iterator_category = std::forward_iterator_tag;
+};
+
+[[nodiscard]] inline size_t
+utf8_string_view::length() const
+{
+ return std::distance(begin(), end());
+}
+
#endif
diff --git a/test/test-text.cpp b/test/test-text.cpp
index 6762b09..0df6885 100644
--- a/test/test-text.cpp
+++ b/test/test-text.cpp
@@ -9,8 +9,21 @@
#include <glm/glm.hpp>
#include <span>
#include <ui/font.h>
+#include <unicode.h>
#include <vector>
+BOOST_AUTO_TEST_CASE(utf8_string_view_iter)
+{
+ static constexpr utf8_string_view text {"Some UTF-8 €£²¹ text."};
+ static constexpr std::array codepoints {
+ 83, 111, 109, 101, 32, 85, 84, 70, 45, 56, 32, 8364, 163, 178, 185, 32, 116, 101, 120, 116, 46};
+ BOOST_CHECK_EQUAL(std::count_if(text.begin(), text.end(), isspace), 3);
+ BOOST_CHECK_EQUAL(text.length(), 21);
+ std::vector<uint32_t> codepointsOut;
+ std::copy(text.begin(), text.end(), std::back_inserter(codepointsOut));
+ BOOST_CHECK_EQUAL_COLLECTIONS(codepoints.begin(), codepoints.end(), codepointsOut.begin(), codepointsOut.end());
+}
+
struct FontTest : public Font {
FontTest() : Font {"/usr/share/fonts/corefonts/arial.ttf", 48} { }
};