summaryrefslogtreecommitdiff
path: root/lib/unicode.h
blob: bc491f357e7ad803d82965bb4a530be3ab93ea7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#pragma once

// Wrappers of some glib functions (why are we using glib then?) which we want, but glib.h is a bit C like

#ifdef __cplusplus
#	include <cstdint>
extern "C" {
#else
#	include <stdint.h>
#endif

const char * next_char(const char *);
uint32_t get_codepoint(const char *);

#ifdef __cplusplus
}

#	include <string_view>
#	include <algorithm>

struct utf8_string_view {
	struct iter {
		constexpr explicit iter(const char * p) : pos {p} { }

		[[nodiscard]] auto
		operator!=(const iter & other) const
		{
			return pos != other.pos;
		}

		auto &
		operator++()
		{
			pos = next_char(pos);
			return *this;
		}

		[[nodiscard]] auto
		operator*() const
		{
			return get_codepoint(pos);
		}

	private:
		const char * pos;
	};

	template<typename Str>
	// cppcheck-suppress noExplicitConstructor; NOLINTNEXTLINE(hicpp-explicit-conversions)
	constexpr utf8_string_view(const Str & str) : begin_ {str.data()}, end_ {str.data() + str.length()}
	{
	}

	// cppcheck-suppress noExplicitConstructor; NOLINTNEXTLINE(hicpp-explicit-conversions)
	constexpr utf8_string_view(const char * const str) : utf8_string_view {std::string_view {str}} { }

	[[nodiscard]] auto
	begin() const
	{
		return iter {begin_};
	}

	[[nodiscard]] auto
	end() const
	{
		return iter {end_};
	}

	[[nodiscard]] size_t length() const;

private:
	const char *begin_, *end_;
};

template<> struct std::iterator_traits<utf8_string_view::iter> {
	using difference_type = size_t;
	using value_type = uint32_t;
	using pointer = void;
	using reference = void;
	using iterator_category = std::forward_iterator_tag;
};

[[nodiscard]] inline size_t
utf8_string_view::length() const
{
	return std::distance(begin(), end());
}

#endif