10#include "unicode_break_opportunity.hpp"
11#include "ucd_general_categories.hpp"
12#include "ucd_grapheme_cluster_breaks.hpp"
13#include "ucd_line_break_classes.hpp"
14#include "ucd_east_asian_widths.hpp"
15#include "../utility/utility.hpp"
16#include "../macros.hpp"
22hi_export_module(hikogui.unicode.unicode_line_break);
25hi_export
namespace hi::inline
v1 {
31 unicode_line_break_class original_class = unicode_line_break_class::XX;
32 unicode_line_break_class current_class = unicode_line_break_class::XX;
33 bool is_extended_pictographic =
false;
35 unicode_east_asian_width east_asian_width = unicode_east_asian_width::A;
44 unicode_line_break_class break_class,
46 bool is_extended_pictographic,
47 unicode_east_asian_width east_asian_width) noexcept :
48 original_class(break_class),
49 current_class(break_class),
51 is_extended_pictographic(is_extended_pictographic),
52 east_asian_width(east_asian_width)
56 constexpr explicit operator unicode_line_break_class()
const noexcept
67 [[nodiscard]]
constexpr bool operator==(unicode_line_break_class rhs)
const noexcept
69 return current_class == rhs;
72 [[nodiscard]]
constexpr bool operator==(unicode_east_asian_width rhs)
const noexcept
74 return east_asian_width == rhs;
79using unicode_line_break_info_iterator = unicode_line_break_info_vector::iterator;
80using unicode_line_break_info_const_iterator = unicode_line_break_info_vector::const_iterator;
82template<
typename It,
typename ItEnd,
typename CodePo
intFunc>
84unicode_LB1(It first, ItEnd last, CodePointFunc
const& code_point_func)
noexcept
89 for (
auto it = first; it != last; ++it) {
90 auto const code_point = code_point_func(*it);
91 auto const east_asian_width = ucd_get_east_asian_width(code_point);
92 auto const break_class = ucd_get_line_break_class(code_point);
93 auto const general_category = ucd_get_general_category(code_point);
94 auto const grapheme_cluster_break = ucd_get_grapheme_cluster_break(code_point);
96 auto const resolved_break_class = [&]() {
97 switch (break_class) {
98 using enum unicode_line_break_class;
106 return is_Mn_or_Mc(general_category) ? CM : AL;
113 resolved_break_class,
114 general_category == unicode_general_category::Cn,
115 grapheme_cluster_break == unicode_grapheme_cluster_break::Extended_Pictographic,
122constexpr void unicode_LB2_3(unicode_break_vector& opportunities)
noexcept
124 hi_axiom(not opportunities.empty());
126 opportunities.front() = unicode_break_opportunity::no;
128 opportunities.back() = unicode_break_opportunity::mandatory;
131template<
typename MatchFunc>
132constexpr void unicode_LB_walk(
133 unicode_break_vector& opportunities,
135 MatchFunc match_func)
noexcept
143 auto cur = infos.begin();
144 auto const last = infos.end() - 1;
145 auto const last2 = infos.end();
146 auto opportunity = opportunities.begin() + 1;
148 auto cur_sp_class = XX;
149 auto cur_nu_class = XX;
150 auto prev_class = XX;
152 while (cur != last) {
153 auto const next = cur + 1;
158 if (cur_class != SP) {
159 cur_sp_class = cur_class;
163 if (cur_nu_class == CL) {
166 }
else if (cur_nu_class == NU) {
167 if (cur_class == CL or cur_class == CP) {
169 }
else if (cur_class != NU and cur_class != SY and cur_class != IS) {
172 }
else if (cur_class == NU) {
177 if (cur->original_class == RI) {
179 }
else if (*cur != RI) {
183 if (*opportunity == unicode_break_opportunity::unassigned) {
184 *opportunity = match_func(prev_class, cur, next, next2_class, cur_sp_class, cur_nu_class, num_ri);
187 prev_class = cur_class;
196 opportunities, infos, [](
auto const prev,
auto const cur,
auto const next,
auto const next2,
auto const cur_sp,
auto const cur_nu,
auto const num_ri) {
197 using enum unicode_break_opportunity;
201 }
else if (*cur == CR and *next == LF) {
203 }
else if (*cur == CR or *cur == LF or *cur == NL) {
205 }
else if (*next == BK or *next == CR or *next == LF or *next == NL) {
207 }
else if (*next == SP or *next == ZW) {
209 }
else if (cur_sp == ZW) {
211 }
else if (*cur == ZWJ) {
222 using enum unicode_break_opportunity;
228 auto cur = infos.begin();
229 auto const last = infos.end() - 1;
230 auto opportunity = opportunities.begin() + 1;
233 while (cur != last) {
234 auto const next = cur + 1;
236 if ((*cur == CM or *cur == ZWJ) and X != XX) {
244 if ((*cur != BK and *cur != CR and *cur != LF and *cur != NL and *cur != SP and *cur != ZW) and
245 (*next == CM or *next == ZWJ)) {
264 for (
auto& x : infos) {
265 if (x == CM or x == ZWJ) {
274 opportunities, infos, [&](
auto const prev,
auto const cur,
auto const next,
auto const next2,
auto const cur_sp,
auto const cur_nu,
auto const num_ri) {
275 using enum unicode_break_opportunity;
277 using enum unicode_east_asian_width;
279 if (*cur == WJ or *next == WJ) {
281 }
else if (*cur == GL) {
283 }
else if (*cur != SP and *cur != BA and *cur != HY and *next == GL) {
285 }
else if (*next == CL or *next == CP or *next == EX or *next == IS or *next == SY) {
287 }
else if (cur_sp == OP) {
289 }
else if (cur_sp == QU and *next == OP) {
291 }
else if ((cur_sp == CL or cur_sp == CP) and *next == NS) {
293 }
else if (cur_sp == B2 and *next == B2) {
295 }
else if (*cur == SP) {
297 }
else if (*cur == QU or *next == QU) {
299 }
else if (*cur == CB or *next == CB) {
301 }
else if (*cur == BB or *next == BA or *next == HY or *next == NS) {
303 }
else if (prev == HL and (*cur == HY or *cur == BA)) {
305 }
else if (*cur == SY and *next == HL) {
307 }
else if (*next == IN) {
309 }
else if ((*cur == AL or *cur == HL) and *next == NU) {
311 }
else if (*cur == NU and (*next == AL or *next == HL)) {
313 }
else if (*cur == PR and (*next == ID or *next == EB or *next == EM)) {
315 }
else if ((*cur == ID or *cur == EB or *cur == EM) and *next == PO) {
317 }
else if ((*cur == PR or *cur == PO) and (*next == AL or *next == HL)) {
319 }
else if ((*cur == AL or *cur == HL) and (*next == PR or *next == PO)) {
322 (*cur == PR or *cur == PO) and ((*next == OP and next2 == NU) or (*next == HY and next2 == NU) or *next == NU)) {
324 }
else if ((*cur == OP or *cur == HY) and *next == NU) {
326 }
else if (*cur == NU and (*next == NU or *next == SY or *next == IS)) {
328 }
else if (cur_nu == NU and (*next == NU or *next == SY or *next == IS or *next == CL or *next == CP)) {
330 }
else if ((cur_nu == NU or cur_nu == CL) and (*next == PO or *next == PR)) {
332 }
else if (*cur == JL and (*next == JL or *next == JV or *next == H2 or *next == H3)) {
334 }
else if ((*cur == JV or *cur == H2) and (*next == JV or *next == JT)) {
336 }
else if ((*cur == JT or *cur == H3) and *next == JT) {
338 }
else if ((*cur == JL or *cur == JV or *cur == JT or *cur == H2 or *cur == H3) and *next == PO) {
340 }
else if (*cur == PR and (*next == JL or *next == JV or *next == JT or *next == H2 or *next == H3)) {
342 }
else if ((*cur == AL or *cur == HL) and (*next == AL or *next == HL)) {
344 }
else if (*cur == IS and (*next == AL or *next == HL)) {
346 }
else if ((*cur == AL or *cur == HL or *cur == NU) and (*next == OP and *next != F and *next != W and *next != H)) {
348 }
else if ((*cur == CP and *cur != F and *cur != W and *cur != H) and (*next == AL or *next == HL or *next == NU)) {
350 }
else if (*cur == RI and *next == RI and (num_ri % 2) == 1) {
352 }
else if (*cur == EB and *next == EM) {
354 }
else if (cur->is_extended_pictographic and cur->is_Cn and *next == EM) {
368[[nodiscard]]
constexpr float
369unicode_LB_width(std::vector<float>::const_iterator first, std::vector<float>::const_iterator last)
noexcept
375 auto rfirst = std::make_reverse_iterator(last);
376 auto rlast = std::make_reverse_iterator(first);
378 auto it =
std::find_if(rfirst, rlast, [](
auto const& width) {
381 return std::accumulate(it, rlast, 0.0f, [](
float acc,
auto const& width) {
382 return acc + abs(width);
394 auto max_width = 0.0f;
395 auto it = widths.
begin();
396 for (
auto length : lengths) {
410[[nodiscard]]
constexpr bool
413 auto it = widths.begin();
414 for (
auto length : lengths) {
432 for (
auto it = opportunities.begin() + 1; it != opportunities.end(); ++it) {
434 if (*it == unicode_break_opportunity::mandatory) {
452 for (
auto it = opportunities.begin() + 1; it != opportunities.end(); ++it) {
454 if (*it != unicode_break_opportunity::no) {
463[[nodiscard]]
constexpr unicode_break_const_iterator unicode_LB_fast_fit_line(
464 unicode_break_const_iterator opportunity_it,
465 std::vector<float>::const_iterator width_it,
466 float maximum_line_width)
noexcept
468 using enum unicode_break_opportunity;
471 auto end_of_line = opportunity_it;
473 width += abs(*width_it);
474 if (width > maximum_line_width) {
478 }
else if (*opportunity_it == mandatory) {
480 return opportunity_it;
482 }
else if (*opportunity_it == yes) {
484 end_of_line = opportunity_it;
493[[nodiscard]]
constexpr unicode_break_const_iterator unicode_LB_slow_fit_line(
494 unicode_break_const_iterator first,
495 unicode_break_const_iterator end_of_line,
496 std::vector<float>::const_iterator first_width,
497 float maximum_line_width)
noexcept
499 using enum unicode_break_opportunity;
502 auto it = end_of_line;
505 auto const line_width =
unicode_LB_width(first_width, first_width + num_characters);
507 if (line_width <= maximum_line_width) {
508 if (*it == mandatory) {
512 }
else if (*it == yes) {
526[[nodiscard]]
constexpr unicode_break_const_iterator
527unicode_LB_finish_fit_line(unicode_break_const_iterator first, unicode_break_const_iterator end_of_line)
noexcept
529 if (first == end_of_line) {
531 while (*end_of_line == unicode_break_opportunity::no) {
537 return end_of_line + 1;
547 float maximum_line_width)
noexcept
549 using enum unicode_break_opportunity;
552 if (widths.empty()) {
556 auto opportunity_it = opportunities.
begin() + 1;
557 auto width_it = widths.begin();
558 while (width_it != widths.end()) {
560 auto opportunity_eol = unicode_LB_fast_fit_line(opportunity_it, width_it, maximum_line_width);
561 opportunity_eol = unicode_LB_slow_fit_line(opportunity_it, opportunity_eol, width_it, maximum_line_width);
562 opportunity_eol = unicode_LB_finish_fit_line(opportunity_it, opportunity_eol);
564 auto const num_characters =
std::distance(opportunity_it, opportunity_eol);
565 r.push_back(num_characters);
566 opportunity_it += num_characters;
567 width_it += num_characters;
584 auto line_lengths = detail::unicode_LB_mandatory_lines(opportunities);
585 auto const width = detail::unicode_LB_width(char_widths, line_lengths);
600 auto line_lengths = detail::unicode_LB_optional_lines(opportunities);
601 auto const width = detail::unicode_LB_width(char_widths, line_lengths);
617 auto line_lengths = detail::unicode_LB_fit_lines(opportunities, char_widths, maximum_line_width);
618 auto const width = detail::unicode_LB_width(char_widths, line_lengths);
631template<
typename It,
typename ItEnd,
typename CodePo
intFunc>
632[[nodiscard]]
inline unicode_break_vector
638 auto infos = detail::unicode_LB1(first, last, code_point_func);
639 detail::unicode_LB2_3(r);
640 detail::unicode_LB4_8a(r, infos);
641 detail::unicode_LB9(r, infos);
642 detail::unicode_LB10(infos);
643 detail::unicode_LB11_31(r, infos);
658 auto r = detail::unicode_LB_mandatory_lines(opportunities);
659 if (detail::unicode_LB_width_check(widths, r, maximum_line_width)) {
663 r = detail::unicode_LB_fit_lines(opportunities, widths, maximum_line_width);
664 hi_axiom(detail::unicode_LB_width_check(widths, r, maximum_line_width));
constexpr float unicode_LB_width(std::vector< float >::const_iterator first, std::vector< float >::const_iterator last) noexcept
Calculate the width of a line.
Definition unicode_line_break.hpp:369
constexpr std::vector< size_t > unicode_LB_mandatory_lines(unicode_break_vector const &opportunities) noexcept
Get the length of each line when broken with mandatory breaks.
Definition unicode_line_break.hpp:427
constexpr std::vector< size_t > unicode_LB_optional_lines(unicode_break_vector const &opportunities) noexcept
Get the length of each line when broken with mandatory and optional breaks.
Definition unicode_line_break.hpp:447
constexpr std::pair< float, std::vector< size_t > > unicode_LB_minimum_width(unicode_break_vector const &opportunities, std::vector< float > const &char_widths)
Get the minimum width of the text.
Definition unicode_line_break.hpp:598
constexpr std::pair< float, std::vector< size_t > > unicode_LB_maximum_width(unicode_break_vector const &opportunities, std::vector< float > const &char_widths)
Get the maximum width of the text.
Definition unicode_line_break.hpp:582
constexpr std::vector< size_t > unicode_LB_fit_lines(unicode_break_vector const &opportunities, std::vector< float > const &widths, float maximum_line_width) noexcept
Get the length of each line when broken after folding text to a maximum width.
Definition unicode_line_break.hpp:544
constexpr bool unicode_LB_width_check(std::vector< float > const &widths, std::vector< size_t > const &lengths, float maximum_line_width) noexcept
Check if all the lines in the text fit the maximum width.
Definition unicode_line_break.hpp:411
unicode_line_break_class
Unicode line break class.
Definition ucd_line_break_classes.hpp:1017
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
unicode_break_vector unicode_line_break(It first, ItEnd last, CodePointFunc const &code_point_func) noexcept
The unicode line break algorithm UAX #14.
Definition unicode_line_break.hpp:633
Combined unicode_line_break_class and unicode_line_break_opportunity.
Definition unicode_line_break.hpp:30