HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_grapheme_cluster_break.hpp
1// Copyright Take Vos 2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "ucd_grapheme_cluster_breaks.hpp"
8#include "unicode_break_opportunity.hpp"
9#include <cstdint>
10
11namespace hi { inline namespace v1 {
12namespace detail {
13
15 unicode_grapheme_cluster_break previous = unicode_grapheme_cluster_break::Other;
16 int RI_count = 0;
17 bool first_character = true;
18 bool in_extended_pictograph = false;
19
20 constexpr void reset() noexcept
21 {
22 previous = unicode_grapheme_cluster_break::Other;
23 RI_count = 0;
24 first_character = true;
25 in_extended_pictograph = false;
26 }
27};
28
37[[nodiscard]] constexpr bool breaks_grapheme(unicode_grapheme_cluster_break cluster_break, grapheme_break_state& state) noexcept
38{
39 using enum unicode_grapheme_cluster_break;
40
41 hilet lhs = state.previous;
42 hilet rhs = cluster_break;
43
44 enum class break_state {
45 unknown,
46 do_break,
47 dont_break,
48 };
49
50 break_state break_state = break_state::unknown;
51
52 // GB1, GB2: Break at the start and end of text, unless the text is empty.
53 bool GB1 = state.first_character;
54 if ((break_state == break_state::unknown) & GB1) {
55 break_state = break_state::do_break;
56 }
57
58 state.first_character = false;
59
60 // GB3, GB4, GB5: Do not break between a CR and LF. Otherwise, break before and after controls.
61 hilet GB3 = (lhs == CR) && (rhs == LF);
62 hilet GB4 = (lhs == Control) || (lhs == CR) || (lhs == LF);
63 hilet GB5 = (rhs == Control) || (rhs == CR) || (rhs == LF);
64 if (break_state == break_state::unknown) {
65 if (GB3) {
66 break_state = break_state::dont_break;
67 } else if (GB4 || GB5) {
68 break_state = break_state::do_break;
69 }
70 }
71
72 // GB6, GB7, GB8: Do not break Hangul syllable sequences.
73 hilet GB6 = (lhs == L) && ((rhs == L) || (rhs == V) || (rhs == LV) | (rhs == LVT));
74 hilet GB7 = ((lhs == LV) || (lhs == V)) && ((rhs == V) || (rhs == T));
75 hilet GB8 = ((lhs == LVT) || (lhs == T)) && (rhs == T);
76 if ((break_state == break_state::unknown) && (GB6 || GB7 || GB8)) {
77 break_state = break_state::dont_break;
78 }
79
80 // GB9: Do not break before extending characters or ZWJ.
81 hilet GB9 = ((rhs == Extend) || (rhs == ZWJ));
82
83 // GB9a, GB9b: Do not break before SpacingMarks, or after Prepend characters.
84 // Both rules only apply to extended grapheme clusters.
85 hilet GB9a = (rhs == SpacingMark);
86 hilet GB9b = (lhs == Prepend);
87 if ((break_state == break_state::unknown) & (GB9 || GB9a || GB9b)) {
88 break_state = break_state::dont_break;
89 }
90
91 // GB11: Do not break within emoji modifier sequences or emoji zwj sequences.
92 hilet GB11 = state.in_extended_pictograph && (lhs == ZWJ) && (rhs == Extended_Pictographic);
93 if ((break_state == break_state::unknown) && GB11) {
94 break_state = break_state::dont_break;
95 }
96
97 if (rhs == Extended_Pictographic) {
98 state.in_extended_pictograph = true;
99 } else if (!((rhs == Extend) || (rhs == ZWJ))) {
100 state.in_extended_pictograph = false;
101 }
102
103 // GB12, GB13: Do not break within emoji flag sequences.
104 // That is, do not break between regional indicator (RI) symbols,
105 // if there is an odd number of RI characters before the break point.
106 hilet GB12_13 = (lhs == Regional_Indicator) && (rhs == Regional_Indicator) && ((state.RI_count % 2) == 1);
107 if ((break_state == break_state::unknown) && (GB12_13)) {
108 break_state = break_state::dont_break;
109 }
110
111 if (rhs == Regional_Indicator) {
112 state.RI_count++;
113 } else {
114 state.RI_count = 0;
115 }
116
117 // GB999: Otherwise, break everywhere.
118 if (break_state == break_state::unknown) {
119 break_state = break_state::do_break;
120 }
121
122 state.previous = rhs;
123 return break_state == break_state::do_break;
124}
125
133[[nodiscard]] constexpr bool breaks_grapheme(char32_t code_point, grapheme_break_state& state) noexcept
134{
135 return breaks_grapheme(ucd_get_grapheme_cluster_break(code_point), state);
136}
137
138} // namespace detail
139
140template<typename It, typename ItEnd>
141[[nodiscard]] constexpr std::vector<unicode_break_opportunity> unicode_grapheme_break(It first, ItEnd last) noexcept
142{
144 auto state = detail::grapheme_break_state{};
145
146 for (auto it = first; it != last; ++it) {
147 hilet opportunity = detail::breaks_grapheme(*it, state) ? unicode_break_opportunity::yes : unicode_break_opportunity::no;
148 r.push_back(opportunity);
149 }
150
151 r.push_back(unicode_break_opportunity::yes);
152 return r;
153}
154
155}} // namespace hi::v1
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
Definition unicode_grapheme_cluster_break.hpp:14
T push_back(T... args)