10#include "ucd_sentence_break_properties.hpp"
11#include "unicode_break_opportunity.hpp"
12#include "../utility/utility.hpp"
13#include "../macros.hpp"
19hi_export_module(hikogui.unicode.unicode_sentence_break);
21hi_export
namespace hi::inline
v1 {
34 constexpr unicode_sentence_break_info(unicode_sentence_break_property
const &sentence_break_property) noexcept : _value(std::to_underlying(sentence_break_property))
43 [[nodiscard]]
constexpr bool is_skip()
const noexcept
45 return to_bool(_value & 0x40);
48 [[nodiscard]]
constexpr friend bool operator==(
unicode_sentence_break_info const &lhs, unicode_sentence_break_property
const &rhs)
noexcept
50 return (lhs._value & 0x3f) == std::to_underlying(rhs);
57 return rhs == unicode_sentence_break_property::Sep or rhs == unicode_sentence_break_property::CR or rhs == unicode_sentence_break_property::LF;
62 return rhs == unicode_sentence_break_property::STerm or rhs == unicode_sentence_break_property::ATerm;
69inline void unicode_sentence_break_SB1_SB4(
73 using enum unicode_break_opportunity;
74 using enum unicode_sentence_break_property;
76 hi_axiom(r.size() == infos.size() + 1);
81 for (
auto i = 1_uz; i < infos.size(); ++i) {
82 auto const prev = infos[i - 1];
83 auto const next = infos[i];
86 if (prev == CR and next == LF) {
88 }
else if (is_ParaSep(prev)) {
97inline void unicode_sentence_break_SB5(
98 unicode_break_vector &r,
101 using enum unicode_break_opportunity;
102 using enum unicode_sentence_break_property;
104 hi_axiom(r.size() == infos.size() + 1);
106 for (
auto i = 1_uz; i < infos.size(); ++i) {
107 auto const prev = infos[i - 1];
108 auto &
next = infos[i];
110 if ((not is_ParaSep(prev) and prev != CR and prev != LF) and (next == Extend or next == Format)) {
111 if (r[i] == unassigned) {
119inline void unicode_sentence_break_SB6_SB998(
120 unicode_break_vector &r,
123 using enum unicode_break_opportunity;
124 using enum unicode_sentence_break_property;
126 hi_axiom(r.size() == infos.size() + 1);
128 for (
auto i = 0_z; i < std::ssize(infos); ++i) {
129 auto const &
next = infos[i];
130 if (r[i] != unassigned) {
134 hi_axiom(not
next.is_skip());
138 auto const prev = [&] {
139 for (k = i - 1; k >= 0; --k) {
140 if (not infos[k].is_skip()) {
144 return unicode_sentence_break_info{};
147 auto const prev_prev = [&] {
148 for (--k; k >= 0; --k) {
149 if (not infos[k].is_skip()) {
153 return unicode_sentence_break_info{};
161 auto const [prefix, close_sp_par_found] = [&]() {
162 using enum unicode_break_opportunity;
166 for (
auto j = i - 1; j >= 0; --j) {
167 if (not infos[j].is_skip()) {
170 if (is_ParaSep(infos[j])) {
173 }
else if (infos[j] == Sp) {
176 }
else if (infos[j] == Close) {
185 if (infos[j] == Sp) {
188 }
else if (infos[j] == Close) {
196 if (infos[j] == Close) {
208 auto const optional_close = (close_sp_par_found & 3) == 0;
209 auto const optional_close_sp = (close_sp_par_found & 1) == 0;
210 auto const optional_close_sp_par =
true;
212 auto const end_in_lower = [&]{
213 for (
auto j = i; j < std::ssize(infos); ++j) {
214 if (not infos[j].is_skip()) {
215 if (infos[j] == Lower) {
217 }
else if (infos[j] == OLetter or infos[j] == Upper or is_ParaSep(infos[j]) or is_SATerm(infos[j])) {
226 if (prev == ATerm and next == Numeric) {
228 }
else if ((prev_prev == Upper or prev_prev == Lower) and
prev == ATerm and
next == Upper) {
230 }
else if (prefix == ATerm and optional_close_sp and end_in_lower) {
232 }
else if (is_SATerm(prefix) and optional_close_sp and (next == SContinue or is_SATerm(next))) {
234 }
else if (is_SATerm(prefix) and optional_close and (next == Close or next == Sp or is_ParaSep(next))) {
236 }
else if (is_SATerm(prefix) and optional_close_sp and (next == Sp or is_ParaSep(next))) {
238 }
else if (is_SATerm(prefix) and optional_close_sp_par) {
256template<
typename It,
typename ItEnd,
typename CodePo
intFunc>
257[[nodiscard]]
inline unicode_break_vector
266 auto const code_point = code_point_func(item);
270 detail::unicode_sentence_break_SB1_SB4(r, infos);
271 detail::unicode_sentence_break_SB5(r, infos);
272 detail::unicode_sentence_break_SB6_SB998(r, infos);
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
unicode_break_vector unicode_sentence_break(It first, ItEnd last, CodePointFunc const &code_point_func) noexcept
The unicode word break algorithm UAX#29.
Definition unicode_sentence_break.hpp:258
Definition unicode_sentence_break.hpp:25
T back_inserter(T... args)