12namespace hi::inline
v1 {
14enum class unicode_sentence_break_property : uint8_t {
43 constexpr unicode_sentence_break_info(unicode_sentence_break_property
const &sentence_break_property) noexcept : _value(to_underlying(sentence_break_property))
52 [[nodiscard]]
constexpr bool is_skip()
const noexcept
54 return to_bool(_value & 0x40);
57 [[nodiscard]]
constexpr friend bool operator==(
unicode_sentence_break_info const &lhs, unicode_sentence_break_property
const &rhs)
noexcept
59 return (lhs._value & 0x3f) == to_underlying(rhs);
66 return rhs == unicode_sentence_break_property::Sep or rhs == unicode_sentence_break_property::CR or rhs == unicode_sentence_break_property::LF;
71 return rhs == unicode_sentence_break_property::STerm or rhs == unicode_sentence_break_property::ATerm;
78[[nodiscard]]
inline void unicode_sentence_break_SB1_SB4(
82 using enum unicode_break_opportunity;
83 using enum unicode_sentence_break_property;
85 hi_axiom(r.size() == infos.size() + 1);
90 for (
auto i = 1_uz; i < infos.size(); ++i) {
91 hilet prev = infos[i - 1];
92 hilet next = infos[i];
95 if (prev == CR and next == LF) {
97 }
else if (is_ParaSep(prev)) {
106[[nodiscard]]
inline void unicode_sentence_break_SB5(
107 unicode_break_vector &r,
110 using enum unicode_break_opportunity;
111 using enum unicode_sentence_break_property;
113 hi_axiom(r.size() == infos.size() + 1);
115 for (
auto i = 1_uz; i < infos.size(); ++i) {
117 auto &
next = infos[i];
119 if ((not is_ParaSep(prev) and prev != CR and prev != LF) and (next == Extend or next == Format)) {
120 if (r[i] == unassigned) {
128[[nodiscard]]
inline void unicode_sentence_break_SB6_SB998(
129 unicode_break_vector &r,
132 using enum unicode_break_opportunity;
133 using enum unicode_sentence_break_property;
135 hi_axiom(r.size() == infos.size() + 1);
137 for (
auto i = 0_z; i < std::ssize(infos); ++i) {
139 if (r[i] != unassigned) {
143 hi_axiom(not
next.is_skip());
148 for (k = i - 1; k >= 0; --k) {
149 if (not infos[k].is_skip()) {
153 return unicode_sentence_break_info{};
156 hilet prev_prev = [&] {
157 for (--k; k >= 0; --k) {
158 if (not infos[k].is_skip()) {
162 return unicode_sentence_break_info{};
170 hilet [prefix, close_sp_par_found] = [&]() {
171 using enum unicode_break_opportunity;
175 for (
auto j = i - 1; j >= 0; --j) {
176 if (not infos[j].is_skip()) {
179 if (is_ParaSep(infos[j])) {
182 }
else if (infos[j] == Sp) {
185 }
else if (infos[j] == Close) {
194 if (infos[j] == Sp) {
197 }
else if (infos[j] == Close) {
205 if (infos[j] == Close) {
217 hilet optional_close = (close_sp_par_found & 3) == 0;
218 hilet optional_close_sp = (close_sp_par_found & 1) == 0;
219 hilet optional_close_sp_par =
true;
221 hilet end_in_lower = [&]{
222 for (
auto j = i; j < std::ssize(infos); ++j) {
223 if (not infos[j].is_skip()) {
224 if (infos[j] == Lower) {
226 }
else if (infos[j] == OLetter or infos[j] == Upper or is_ParaSep(infos[j]) or is_SATerm(infos[j])) {
235 if (prev == ATerm and next == Numeric) {
237 }
else if ((prev_prev == Upper or prev_prev == Lower) and
prev == ATerm and
next == Upper) {
239 }
else if (prefix == ATerm and optional_close_sp and end_in_lower) {
241 }
else if (is_SATerm(prefix) and optional_close_sp and (next == SContinue or is_SATerm(next))) {
243 }
else if (is_SATerm(prefix) and optional_close and (next == Close or next == Sp or is_ParaSep(next))) {
245 }
else if (is_SATerm(prefix) and optional_close_sp and (next == Sp or is_ParaSep(next))) {
247 }
else if (is_SATerm(prefix) and optional_close_sp_par) {
265template<
typename It,
typename ItEnd,
typename DescriptionFunc>
274 hilet &description = description_func(item);
278 detail::unicode_sentence_break_SB1_SB4(r, infos);
279 detail::unicode_sentence_break_SB5(r, infos);
280 detail::unicode_sentence_break_SB6_SB998(r, infos);
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:15
unicode_break_vector unicode_sentence_break(It first, ItEnd last, DescriptionFunc const &description_func)
The unicode word break algorithm UAX#29.
Definition unicode_sentence_break.hpp:266
Definition unicode_sentence_break.hpp:34
T back_inserter(T... args)