HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_grapheme_cluster_break.hpp
1// Copyright Take Vos 2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "ucd_grapheme_cluster_breaks.hpp"
8#include "unicode_break_opportunity.hpp"
9#include "../macros.hpp"
10#include <cstdint>
11#include <vector>
12
13hi_export_module(hikogui.unicode.unicode_grapheme_cluster_break);
14
15hi_export namespace hi { inline namespace v1 {
16namespace detail {
17
19 unicode_grapheme_cluster_break previous = unicode_grapheme_cluster_break::Other;
20 int RI_count = 0;
21 bool first_character = true;
22 bool in_extended_pictograph = false;
23
24 constexpr void reset() noexcept
25 {
26 previous = unicode_grapheme_cluster_break::Other;
27 RI_count = 0;
28 first_character = true;
29 in_extended_pictograph = false;
30 }
31};
32
41[[nodiscard]] constexpr bool breaks_grapheme(unicode_grapheme_cluster_break cluster_break, grapheme_break_state& state) noexcept
42{
43 using enum unicode_grapheme_cluster_break;
44
45 auto const lhs = state.previous;
46 auto const rhs = cluster_break;
47
48 enum class break_state {
49 unknown,
50 do_break,
51 dont_break,
52 };
53
54 break_state break_state = break_state::unknown;
55
56 // GB1, GB2: Break at the start and end of text, unless the text is empty.
57 bool GB1 = state.first_character;
58 if ((break_state == break_state::unknown) & GB1) {
59 break_state = break_state::do_break;
60 }
61
62 state.first_character = false;
63
64 // GB3, GB4, GB5: Do not break between a CR and LF. Otherwise, break before and after controls.
65 auto const GB3 = (lhs == CR) && (rhs == LF);
66 auto const GB4 = (lhs == Control) || (lhs == CR) || (lhs == LF);
67 auto const GB5 = (rhs == Control) || (rhs == CR) || (rhs == LF);
68 if (break_state == break_state::unknown) {
69 if (GB3) {
70 break_state = break_state::dont_break;
71 } else if (GB4 || GB5) {
72 break_state = break_state::do_break;
73 }
74 }
75
76 // GB6, GB7, GB8: Do not break Hangul syllable sequences.
77 auto const GB6 = (lhs == L) && ((rhs == L) || (rhs == V) || (rhs == LV) | (rhs == LVT));
78 auto const GB7 = ((lhs == LV) || (lhs == V)) && ((rhs == V) || (rhs == T));
79 auto const GB8 = ((lhs == LVT) || (lhs == T)) && (rhs == T);
80 if ((break_state == break_state::unknown) && (GB6 || GB7 || GB8)) {
81 break_state = break_state::dont_break;
82 }
83
84 // GB9: Do not break before extending characters or ZWJ.
85 auto const GB9 = ((rhs == Extend) || (rhs == ZWJ));
86
87 // GB9a, GB9b: Do not break before SpacingMarks, or after Prepend characters.
88 // Both rules only apply to extended grapheme clusters.
89 auto const GB9a = (rhs == SpacingMark);
90 auto const GB9b = (lhs == Prepend);
91 if ((break_state == break_state::unknown) & (GB9 || GB9a || GB9b)) {
92 break_state = break_state::dont_break;
93 }
94
95 // GB11: Do not break within emoji modifier sequences or emoji zwj sequences.
96 auto const GB11 = state.in_extended_pictograph && (lhs == ZWJ) && (rhs == Extended_Pictographic);
97 if ((break_state == break_state::unknown) && GB11) {
98 break_state = break_state::dont_break;
99 }
100
101 if (rhs == Extended_Pictographic) {
102 state.in_extended_pictograph = true;
103 } else if (!((rhs == Extend) || (rhs == ZWJ))) {
104 state.in_extended_pictograph = false;
105 }
106
107 // GB12, GB13: Do not break within emoji flag sequences.
108 // That is, do not break between regional indicator (RI) symbols,
109 // if there is an odd number of RI characters before the break point.
110 auto const GB12_13 = (lhs == Regional_Indicator) && (rhs == Regional_Indicator) && ((state.RI_count % 2) == 1);
111 if ((break_state == break_state::unknown) && (GB12_13)) {
112 break_state = break_state::dont_break;
113 }
114
115 if (rhs == Regional_Indicator) {
116 state.RI_count++;
117 } else {
118 state.RI_count = 0;
119 }
120
121 // GB999: Otherwise, break everywhere.
122 if (break_state == break_state::unknown) {
123 break_state = break_state::do_break;
124 }
125
126 state.previous = rhs;
127 return break_state == break_state::do_break;
128}
129
137[[nodiscard]] constexpr bool breaks_grapheme(char32_t code_point, grapheme_break_state& state) noexcept
138{
139 return breaks_grapheme(ucd_get_grapheme_cluster_break(code_point), state);
140}
141
142} // namespace detail
143
144template<typename It, typename ItEnd>
145[[nodiscard]] constexpr std::vector<unicode_break_opportunity> unicode_grapheme_break(It first, ItEnd last) noexcept
146{
148 auto state = detail::grapheme_break_state{};
149
150 for (auto it = first; it != last; ++it) {
151 auto const opportunity = detail::breaks_grapheme(*it, state) ? unicode_break_opportunity::yes : unicode_break_opportunity::no;
152 r.push_back(opportunity);
153 }
154
155 r.push_back(unicode_break_opportunity::yes);
156 return r;
157}
158
159}} // namespace hi::v1
The HikoGUI namespace.
Definition array_generic.hpp:20
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
Definition unicode_grapheme_cluster_break.hpp:18
T push_back(T... args)