10#include "unicode_general_category.hpp"
11#include "unicode_grapheme_cluster_break.hpp"
13#include "unicode_break_opportunity.hpp"
16#include "../assert.hpp"
28namespace hi::inline v1 {
36enum class unicode_line_break_class : uint8_t {
100[[nodiscard]]
constexpr float
101unicode_line_break_width(std::vector<float>::const_iterator first, std::vector<float>::const_iterator last)
noexcept
107 auto rfirst = std::make_reverse_iterator(last);
108 auto rlast = std::make_reverse_iterator(first);
114 return acc + abs(width);
123 unicode_line_break_class original_class = unicode_line_break_class::XX;
124 unicode_line_break_class current_class = unicode_line_break_class::XX;
125 bool is_extended_pictographic =
false;
127 unicode_east_asian_width east_asian_width = unicode_east_asian_width::A;
136 unicode_line_break_class break_class,
138 bool is_extended_pictographic,
139 unicode_east_asian_width east_asian_width) noexcept :
140 original_class(break_class),
141 current_class(break_class),
143 is_extended_pictographic(is_extended_pictographic),
144 east_asian_width(east_asian_width)
148 constexpr explicit operator unicode_line_break_class()
const noexcept
150 return current_class;
159 [[nodiscard]]
constexpr bool operator==(unicode_line_break_class rhs)
const noexcept
161 return current_class == rhs;
164 [[nodiscard]]
constexpr bool operator==(unicode_east_asian_width rhs)
const noexcept
166 return east_asian_width == rhs;
171using unicode_line_break_info_iterator = unicode_line_break_info_vector::iterator;
172using unicode_line_break_info_const_iterator = unicode_line_break_info_vector::const_iterator;
174template<
typename It,
typename ItEnd,
typename DescriptionFunc>
176unicode_LB1(It first, ItEnd last, DescriptionFunc
const &description_func)
noexcept
181 for (
auto it = first; it != last; ++it) {
182 hilet &description = description_func(*it);
183 hilet break_class = description.line_break_class();
184 hilet general_category = description.general_category();
186 hilet resolved_break_class = [&]() {
187 switch (break_class) {
188 using enum unicode_line_break_class;
193 case SA:
return is_Mn_or_Mc(general_category) ? CM : AL;
194 default:
return break_class;
199 resolved_break_class,
200 general_category == unicode_general_category::Cn,
201 description.grapheme_cluster_break() == unicode_grapheme_cluster_break::Extended_Pictographic,
202 description.east_asian_width());
208[[nodiscard]]
constexpr void unicode_LB2_3(unicode_break_vector &opportunities)
noexcept
210 hi_axiom(not opportunities.empty());
212 opportunities.front() = unicode_break_opportunity::no;
214 opportunities.back() = unicode_break_opportunity::mandatory;
217template<
typename MatchFunc>
218constexpr void unicode_LB_walk(
219 unicode_break_vector &opportunities,
221 MatchFunc match_func)
noexcept
223 using enum unicode_line_break_class;
229 auto cur = infos.begin();
230 hilet last = infos.end() - 1;
231 hilet last2 = infos.end();
232 auto opportunity = opportunities.begin() + 1;
234 auto cur_sp_class = XX;
235 auto cur_nu_class = XX;
236 auto prev_class = XX;
238 while (cur != last) {
240 hilet cur_class = unicode_line_break_class{*cur};
241 hilet next2_class = cur + 2 == last2 ? XX : unicode_line_break_class{*(cur + 2)};
244 if (cur_class != SP) {
245 cur_sp_class = cur_class;
249 if (cur_nu_class == CL) {
252 }
else if (cur_nu_class == NU) {
253 if (cur_class == CL or cur_class == CP) {
255 }
else if (cur_class != NU and cur_class != SY and cur_class != IS) {
258 }
else if (cur_class == NU) {
263 if (cur->original_class == RI) {
265 }
else if (*cur != RI) {
269 if (*opportunity == unicode_break_opportunity::unassigned) {
270 *opportunity = match_func(prev_class, cur, next, next2_class, cur_sp_class, cur_nu_class, num_ri);
273 prev_class = cur_class;
283 using enum unicode_break_opportunity;
284 using enum unicode_line_break_class;
287 }
else if (*cur == CR and *next == LF) {
289 }
else if (*cur == CR or *cur == LF or *cur == NL) {
291 }
else if (*next == BK or *next == CR or *next == LF or *next == NL) {
293 }
else if (*next == SP or *next == ZW) {
295 }
else if (cur_sp == ZW) {
297 }
else if (*cur == ZWJ) {
307 using enum unicode_line_break_class;
308 using enum unicode_break_opportunity;
314 auto cur = infos.begin();
315 hilet last = infos.end() - 1;
316 auto opportunity = opportunities.begin() + 1;
319 while (cur != last) {
322 if ((*cur == CM or *cur == ZWJ) and X != XX) {
330 if ((*cur != BK and *cur != CR and *cur != LF and *cur != NL and *cur != SP and *cur != ZW) and
331 (*next == CM or *next == ZWJ)) {
337 X =
static_cast<unicode_line_break_class
>(*cur);
348 using enum unicode_line_break_class;
350 for (
auto &x : infos) {
351 if (x == CM or x == ZWJ) {
361 using enum unicode_break_opportunity;
362 using enum unicode_line_break_class;
363 using enum unicode_east_asian_width;
365 if (*cur == WJ or *next == WJ) {
367 }
else if (*cur == GL) {
369 }
else if (*cur != SP and *cur != BA and *cur != HY and *next == GL) {
371 }
else if (*next == CL or *next == CP or *next == EX or *next == IS or *next == SY) {
373 }
else if (cur_sp == OP) {
375 }
else if (cur_sp == QU and *next == OP) {
377 }
else if ((cur_sp == CL or cur_sp == CP) and *next == NS) {
379 }
else if (cur_sp == B2 and *next == B2) {
381 }
else if (*cur == SP) {
383 }
else if (*cur == QU or *next == QU) {
385 }
else if (*cur == CB or *next == CB) {
387 }
else if (*cur == BB or *next == BA or *next == HY or *next == NS) {
389 }
else if (prev == HL and (*cur == HY or *cur == BA)) {
391 }
else if (*cur == SY and *next == HL) {
393 }
else if (*next == IN) {
395 }
else if ((*cur == AL or *cur == HL) and *next == NU) {
397 }
else if (*cur == NU and (*next == AL or *next == HL)) {
399 }
else if (*cur == PR and (*next == ID or *next == EB or *next == EM)) {
401 }
else if ((*cur == ID or *cur == EB or *cur == EM) and *next == PO) {
403 }
else if ((*cur == PR or *cur == PO) and (*next == AL or *next == HL)) {
405 }
else if ((*cur == AL or *cur == HL) and (*next == PR or *next == PO)) {
408 (*cur == PR or *cur == PO) and ((*next == OP and next2 == NU) or (*next == HY and next2 == NU) or *next == NU)) {
410 }
else if ((*cur == OP or *cur == HY) and *next == NU) {
412 }
else if (*cur == NU and (*next == NU or *next == SY or *next == IS)) {
414 }
else if (cur_nu == NU and (*next == NU or *next == SY or *next == IS or *next == CL or *next == CP)) {
416 }
else if ((cur_nu == NU or cur_nu == CL) and (*next == PO or *next == PR)) {
418 }
else if (*cur == JL and (*next == JL or *next == JV or *next == H2 or *next == H3)) {
420 }
else if ((*cur == JV or *cur == H2) and (*next == JV or *next == JT)) {
422 }
else if ((*cur == JT or *cur == H3) and *next == JT) {
424 }
else if ((*cur == JL or *cur == JV or *cur == JT or *cur == H2 or *cur == H3) and *next == PO) {
426 }
else if (*cur == PR and (*next == JL or *next == JV or *next == JT or *next == H2 or *next == H3)) {
428 }
else if ((*cur == AL or *cur == HL) and (*next == AL or *next == HL)) {
430 }
else if (*cur == IS and (*next == AL or *next == HL)) {
432 }
else if ((*cur == AL or *cur == HL or *cur == NU) and (*next == OP and *next != F and *next != W and *next != H)) {
434 }
else if ((*cur == CP and *cur != F and *cur != W and *cur != H) and (*next == AL or *next == HL or *next == NU)) {
436 }
else if (*cur == RI and *next == RI and (num_ri % 2) == 1) {
438 }
else if (*cur == EB and *next == EM) {
440 }
else if (cur->is_extended_pictographic and cur->is_Cn and *next == EM) {
448[[nodiscard]]
constexpr bool unicode_LB_width_check(
451 float maximum_line_width)
noexcept
453 auto it = widths.begin();
454 for (
auto length : lengths) {
455 if (unicode_line_break_width(it, it + length) > maximum_line_width) {
467[[nodiscard]]
constexpr std::vector<size_t> unicode_LB_mandatory_lines(unicode_break_vector
const &opportunities)
noexcept
472 for (
auto it = opportunities.begin() + 1; it != opportunities.end(); ++it) {
474 if (*it == unicode_break_opportunity::mandatory) {
483[[nodiscard]]
constexpr unicode_break_const_iterator unicode_LB_fast_fit_line(
484 unicode_break_const_iterator opportunity_it,
485 std::vector<float>::const_iterator width_it,
486 float maximum_line_width)
noexcept
488 using enum unicode_break_opportunity;
491 auto end_of_line = opportunity_it;
493 width += abs(*width_it);
494 if (width > maximum_line_width) {
498 }
else if (*opportunity_it == mandatory) {
500 return opportunity_it;
502 }
else if (*opportunity_it == yes) {
504 end_of_line = opportunity_it;
513[[nodiscard]]
constexpr unicode_break_const_iterator unicode_LB_slow_fit_line(
514 unicode_break_const_iterator first,
515 unicode_break_const_iterator end_of_line,
516 std::vector<float>::const_iterator first_width,
517 float maximum_line_width)
noexcept
519 using enum unicode_break_opportunity;
522 auto it = end_of_line;
525 hilet line_width = unicode_line_break_width(first_width, first_width + num_characters);
527 if (line_width <= maximum_line_width) {
528 if (*it == mandatory) {
532 }
else if (*it == yes) {
546[[nodiscard]]
constexpr unicode_break_const_iterator
547unicode_LB_finish_fit_line(unicode_break_const_iterator first, unicode_break_const_iterator end_of_line)
noexcept
549 if (first == end_of_line) {
551 while (*end_of_line == unicode_break_opportunity::no) {
557 return end_of_line + 1;
565 unicode_break_vector
const &opportunities,
567 float maximum_line_width)
noexcept
569 using enum unicode_break_opportunity;
572 if (widths.empty()) {
576 auto opportunity_it = opportunities.
begin() + 1;
577 auto width_it = widths.begin();
578 while (width_it != widths.end()) {
580 auto opportunity_eol = unicode_LB_fast_fit_line(opportunity_it, width_it, maximum_line_width);
581 opportunity_eol = unicode_LB_slow_fit_line(opportunity_it, opportunity_eol, width_it, maximum_line_width);
582 opportunity_eol = unicode_LB_finish_fit_line(opportunity_it, opportunity_eol);
585 r.push_back(num_characters);
586 opportunity_it += num_characters;
587 width_it += num_characters;
602template<
typename It,
typename ItEnd,
typename DescriptionFunc>
603[[nodiscard]]
inline unicode_break_vector unicode_line_break(It first, ItEnd last, DescriptionFunc
const &description_func)
606 auto r = unicode_break_vector{size + 1, unicode_break_opportunity::unassigned};
608 auto infos = detail::unicode_LB1(first, last, description_func);
609 detail::unicode_LB2_3(r);
610 detail::unicode_LB4_8a(r, infos);
611 detail::unicode_LB9(r, infos);
612 detail::unicode_LB10(infos);
613 detail::unicode_LB11_31(r, infos);
623unicode_line_break(unicode_break_vector
const &opportunities,
std::vector<float> const &widths,
float maximum_line_width)
626 auto r = detail::unicode_LB_mandatory_lines(opportunities);
627 if (detail::unicode_LB_width_check(widths, r, maximum_line_width)) {
631 r = detail::unicode_LB_fit_lines(opportunities, widths, maximum_line_width);
632 hi_axiom(detail::unicode_LB_width_check(widths, r, maximum_line_width));
This file includes required definitions.
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
#define hi_unreachable()
Marker to tell the compiler that this line will never be executed.
Definition architecture.hpp:214
Combined unicode_line_break_class and unicode_line_break_opportunity.
Definition unicode_line_break.hpp:122