10#include "ucd_sentence_break_properties.hpp"
13namespace hi::inline
v1 {
26 constexpr unicode_sentence_break_info(unicode_sentence_break_property
const &sentence_break_property) noexcept : _value(to_underlying(sentence_break_property))
35 [[nodiscard]]
constexpr bool is_skip()
const noexcept
37 return to_bool(_value & 0x40);
40 [[nodiscard]]
constexpr friend bool operator==(
unicode_sentence_break_info const &lhs, unicode_sentence_break_property
const &rhs)
noexcept
42 return (lhs._value & 0x3f) == to_underlying(rhs);
49 return rhs == unicode_sentence_break_property::Sep or rhs == unicode_sentence_break_property::CR or rhs == unicode_sentence_break_property::LF;
54 return rhs == unicode_sentence_break_property::STerm or rhs == unicode_sentence_break_property::ATerm;
61[[nodiscard]]
inline void unicode_sentence_break_SB1_SB4(
65 using enum unicode_break_opportunity;
66 using enum unicode_sentence_break_property;
68 hi_axiom(r.size() == infos.size() + 1);
73 for (
auto i = 1_uz; i < infos.size(); ++i) {
74 hilet prev = infos[i - 1];
75 hilet next = infos[i];
78 if (prev == CR and next == LF) {
80 }
else if (is_ParaSep(prev)) {
89[[nodiscard]]
inline void unicode_sentence_break_SB5(
90 unicode_break_vector &r,
93 using enum unicode_break_opportunity;
94 using enum unicode_sentence_break_property;
96 hi_axiom(r.size() == infos.size() + 1);
98 for (
auto i = 1_uz; i < infos.size(); ++i) {
100 auto &
next = infos[i];
102 if ((not is_ParaSep(prev) and prev != CR and prev != LF) and (next == Extend or next == Format)) {
103 if (r[i] == unassigned) {
111[[nodiscard]]
inline void unicode_sentence_break_SB6_SB998(
112 unicode_break_vector &r,
115 using enum unicode_break_opportunity;
116 using enum unicode_sentence_break_property;
118 hi_axiom(r.size() == infos.size() + 1);
120 for (
auto i = 0_z; i < std::ssize(infos); ++i) {
122 if (r[i] != unassigned) {
131 for (k = i - 1; k >= 0; --k) {
132 if (not infos[k].is_skip()) {
136 return unicode_sentence_break_info{};
139 hilet prev_prev = [&] {
140 for (--k; k >= 0; --k) {
141 if (not infos[k].is_skip()) {
145 return unicode_sentence_break_info{};
153 hilet [prefix, close_sp_par_found] = [&]() {
154 using enum unicode_break_opportunity;
158 for (
auto j = i - 1; j >= 0; --j) {
159 if (not infos[j].is_skip()) {
162 if (is_ParaSep(infos[j])) {
165 }
else if (infos[j] == Sp) {
168 }
else if (infos[j] == Close) {
177 if (infos[j] == Sp) {
180 }
else if (infos[j] == Close) {
188 if (infos[j] == Close) {
200 hilet optional_close = (close_sp_par_found & 3) == 0;
201 hilet optional_close_sp = (close_sp_par_found & 1) == 0;
202 hilet optional_close_sp_par =
true;
204 hilet end_in_lower = [&]{
205 for (
auto j = i; j < std::ssize(infos); ++j) {
206 if (not infos[j].is_skip()) {
207 if (infos[j] == Lower) {
209 }
else if (infos[j] == OLetter or infos[j] == Upper or is_ParaSep(infos[j]) or is_SATerm(infos[j])) {
218 if (prev == ATerm and next == Numeric) {
220 }
else if ((prev_prev == Upper or prev_prev == Lower) and
prev == ATerm and
next == Upper) {
222 }
else if (prefix == ATerm and optional_close_sp and end_in_lower) {
224 }
else if (is_SATerm(prefix) and optional_close_sp and (next == SContinue or is_SATerm(next))) {
226 }
else if (is_SATerm(prefix) and optional_close and (next == Close or next == Sp or is_ParaSep(next))) {
228 }
else if (is_SATerm(prefix) and optional_close_sp and (next == Sp or is_ParaSep(next))) {
230 }
else if (is_SATerm(prefix) and optional_close_sp_par) {
248template<
typename It,
typename ItEnd,
typename CodePo
intFunc>
249[[nodiscard]]
inline unicode_break_vector
258 hilet code_point = code_point_func(item);
262 detail::unicode_sentence_break_SB1_SB4(r, infos);
263 detail::unicode_sentence_break_SB5(r, infos);
264 detail::unicode_sentence_break_SB6_SB998(r, infos);
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
unicode_break_vector unicode_sentence_break(It first, ItEnd last, CodePointFunc const &code_point_func) noexcept
The unicode word break algorithm UAX#29.
Definition unicode_sentence_break.hpp:250
Definition unicode_sentence_break.hpp:17
T back_inserter(T... args)