7#include "ucd_bidi_classes.hpp"
8#include "ucd_bidi_paired_bracket_types.hpp"
9#include "ucd_bidi_mirroring_glyphs.hpp"
10#include "ucd_decompositions.hpp"
11#include "ucd_general_categories.hpp"
12#include "../utility/module.hpp"
13#include "../stack.hpp"
14#include "../recursive_iterator.hpp"
16namespace hi::inline
v1 {
19 enum class mode_type : uint8_t { LTR, RTL, auto_LTR, auto_RTL };
21 mode_type direction_mode = mode_type::auto_LTR;
22 bool enable_mirrored_brackets =
true;
23 bool enable_line_separator =
true;
33 if (text_direction == unicode_bidi_class::L) {
34 direction_mode = mode_type::auto_LTR;
35 }
else if (text_direction == unicode_bidi_class::R) {
36 direction_mode = mode_type::auto_RTL;
77 this->code_point = code_point;
78 this->embedding_level = 0;
79 this->direction = this->bidi_class = ucd_get_bidi_class(code_point);
80 this->bracket_type = ucd_get_bidi_paired_bracket_type(code_point);
88 code_point(U
'\ufffd'),
89 direction(bidi_class),
90 bidi_class(bidi_class),
91 bracket_type(unicode_bidi_paired_bracket_type::n),
98using unicode_bidi_char_info_iterator = unicode_bidi_char_info_vector::iterator;
99using unicode_bidi_char_info_const_iterator = unicode_bidi_char_info_vector::const_iterator;
106 template<
typename... Args>
107 constexpr void emplace_character(Args&&...args)
noexcept
113[[nodiscard]]
constexpr unicode_bidi_class unicode_bidi_P2(
114 unicode_bidi_char_info_iterator first,
115 unicode_bidi_char_info_iterator last,
117 bool rule_X5c)
noexcept;
119[[nodiscard]]
constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class)
noexcept;
122 int8_t embedding_level;
123 unicode_bidi_class override_status;
126 constexpr unicode_bidi_stack_element(int8_t embedding_level, unicode_bidi_class override_status,
bool isolate_status) noexcept
128 embedding_level(embedding_level), override_status(override_status), isolate_status(isolate_status)
135 using iterator = unicode_bidi_char_info_iterator;
136 using const_iterator = unicode_bidi_char_info_const_iterator;
141 [[nodiscard]]
constexpr iterator begin()
const noexcept
146 [[nodiscard]]
constexpr iterator end()
const noexcept
151 [[nodiscard]]
constexpr int8_t embedding_level()
const noexcept
154 return _begin->embedding_level;
157 [[nodiscard]]
constexpr bool ends_with_isolate_initiator()
const noexcept
159 using enum unicode_bidi_class;
162 hilet& last_char = *(_end - 1);
163 return last_char.direction == LRI || last_char.direction == RLI || last_char.direction == FSI;
166 [[nodiscard]]
constexpr bool starts_with_PDI()
const noexcept
169 return _begin->direction == unicode_bidi_class::PDI;
179 using iterator = recursive_iterator<run_container_type::iterator>;
180 using const_iterator = recursive_iterator<run_container_type::const_iterator>;
183 unicode_bidi_class sos;
184 unicode_bidi_class eos;
187 runs({rhs}), sos(unicode_bidi_class::ON), eos(unicode_bidi_class::ON)
191 [[nodiscard]]
constexpr auto begin()
noexcept
193 return recursive_iterator_begin(runs);
196 [[nodiscard]]
constexpr auto end()
noexcept
198 return recursive_iterator_end(runs);
201 [[nodiscard]]
constexpr auto begin()
const noexcept
203 return recursive_iterator_begin(runs);
206 [[nodiscard]]
constexpr auto end()
const noexcept
208 return recursive_iterator_end(runs);
236 [[nodiscard]]
constexpr int8_t embedding_level()
const noexcept
239 return runs.
front().embedding_level();
242 [[nodiscard]]
constexpr unicode_bidi_class embedding_direction()
const noexcept
244 return (embedding_level() % 2) == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
247 [[nodiscard]]
constexpr bool ends_with_isolate_initiator()
const noexcept
250 return runs.
back().ends_with_isolate_initiator();
255 unicode_bidi_isolated_run_sequence::iterator open;
256 unicode_bidi_isolated_run_sequence::iterator close;
259 unicode_bidi_isolated_run_sequence::iterator open,
260 unicode_bidi_isolated_run_sequence::iterator close) :
265 [[nodiscard]]
constexpr friend auto
268 return lhs.open <=> rhs.open;
272constexpr void unicode_bidi_X1(
273 unicode_bidi_char_info_iterator first,
274 unicode_bidi_char_info_iterator last,
275 int8_t paragraph_embedding_level,
278 using enum unicode_bidi_class;
280 constexpr int8_t max_depth = 125;
282 auto next_even = [](int8_t x) -> int8_t {
283 return (x % 2 == 0) ? x + 2 : x + 1;
286 auto next_odd = [](int8_t x) -> int8_t {
287 return (x % 2 == 1) ? x + 2 : x + 1;
290 long long overflow_isolate_count = 0;
291 long long overflow_embedding_count = 0;
292 long long valid_isolate_count = 0;
295 auto stack = hi::stack<unicode_bidi_stack_element, max_depth + 2>{{paragraph_embedding_level, ON,
false}};
297 for (
auto it = first; it != last; ++it) {
298 hilet current_embedding_level = stack.back().embedding_level;
299 hilet current_override_status = stack.back().override_status;
300 hilet next_odd_embedding_level = next_odd(current_embedding_level);
301 hilet next_even_embedding_level = next_even(current_embedding_level);
303 auto RLI_implementation = [&] {
304 it->embedding_level = current_embedding_level;
305 if (current_override_status != ON) {
306 it->direction = current_override_status;
309 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
310 ++valid_isolate_count;
311 stack.emplace_back(next_odd_embedding_level, ON,
true);
313 ++overflow_isolate_count;
317 auto LRI_implementation = [&] {
318 it->embedding_level = current_embedding_level;
319 if (current_override_status != ON) {
320 it->direction = current_override_status;
323 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
324 ++valid_isolate_count;
325 stack.emplace_back(next_even_embedding_level, ON,
true);
327 ++overflow_isolate_count;
331 switch (it->direction) {
333 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
334 stack.emplace_back(next_odd_embedding_level, ON,
false);
335 }
else if (overflow_isolate_count == 0) {
336 ++overflow_embedding_count;
341 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
342 stack.emplace_back(next_even_embedding_level, ON,
false);
343 }
else if (overflow_isolate_count == 0) {
344 ++overflow_embedding_count;
349 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
350 stack.emplace_back(next_odd_embedding_level, R,
false);
351 }
else if (overflow_isolate_count == 0) {
352 ++overflow_embedding_count;
357 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
358 stack.emplace_back(next_even_embedding_level, L,
false);
359 }
else if (overflow_isolate_count == 0) {
360 ++overflow_embedding_count;
365 RLI_implementation();
369 LRI_implementation();
374 auto sub_context = context;
375 sub_context.direction_mode = unicode_bidi_context::mode_type::auto_LTR;
376 hilet sub_paragraph_bidi_class = unicode_bidi_P2(it + 1, last, sub_context,
true);
377 hilet sub_paragraph_embedding_level = unicode_bidi_P3(sub_paragraph_bidi_class);
378 if (sub_paragraph_embedding_level == 0) {
379 LRI_implementation();
381 RLI_implementation();
387 if (overflow_isolate_count > 0) {
388 --overflow_isolate_count;
389 }
else if (valid_isolate_count == 0) {
393 overflow_embedding_count = 0;
394 while (stack.back().isolate_status ==
false) {
398 --valid_isolate_count;
401 it->embedding_level = stack.back().embedding_level;
402 if (stack.back().override_status != ON) {
403 it->direction = stack.back().override_status;
408 if (overflow_isolate_count > 0) {
411 }
else if (overflow_embedding_count > 0) {
412 --overflow_embedding_count;
413 }
else if (stack.back().isolate_status ==
false && stack.size() >= 2) {
421 it->embedding_level = paragraph_embedding_level;
428 it->embedding_level = current_embedding_level;
429 if (current_override_status != ON) {
430 it->direction = current_override_status;
436[[nodiscard]]
constexpr unicode_bidi_char_info_iterator
437unicode_bidi_X9(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last)
noexcept
442 return character.direction == RLE || character.direction == LRE || character.direction == RLO ||
443 character.direction == LRO || character.direction == PDF || character.direction == BN;
447constexpr void unicode_bidi_W1(unicode_bidi_isolated_run_sequence& sequence)
noexcept
451 auto previous_bidi_class = sequence.sos;
452 for (
auto& char_info : sequence) {
453 if (char_info.direction == NSM) {
454 switch (previous_bidi_class) {
459 char_info.direction = ON;
462 char_info.direction = previous_bidi_class;
467 previous_bidi_class = char_info.direction;
471constexpr void unicode_bidi_W2(unicode_bidi_isolated_run_sequence& sequence)
noexcept
475 auto last_strong_direction = sequence.sos;
476 for (
auto& char_info : sequence) {
477 switch (char_info.direction) {
481 last_strong_direction = char_info.direction;
484 if (last_strong_direction == AL) {
485 char_info.direction = AN;
493constexpr void unicode_bidi_W3(unicode_bidi_isolated_run_sequence& sequence)
noexcept
497 for (
auto& char_info : sequence) {
498 if (char_info.direction == AL) {
499 char_info.direction = R;
504constexpr void unicode_bidi_W4(unicode_bidi_isolated_run_sequence& sequence)
noexcept
508 unicode_bidi_char_info *back1 =
nullptr;
509 unicode_bidi_char_info *back2 =
nullptr;
510 for (
auto& char_info : sequence) {
511 if (char_info.direction == EN && back2 !=
nullptr && back2->direction == EN && back1 !=
nullptr &&
512 (back1->direction == ES || back1->direction == CS)) {
513 back1->direction = EN;
515 if (char_info.direction == AN && back2 !=
nullptr && back2->direction == AN && back1 !=
nullptr &&
516 back1->direction == CS) {
517 back1->direction = AN;
520 back2 = std::exchange(back1, &char_info);
524constexpr void unicode_bidi_W5(unicode_bidi_isolated_run_sequence& sequence)
noexcept
528 auto ET_start =
end(sequence);
529 auto starts_with_EN =
false;
531 for (
auto it =
begin(sequence); it !=
end(sequence); ++it) {
532 auto& char_info = *it;
534 switch (char_info.direction) {
536 if (starts_with_EN) {
537 char_info.direction = EN;
538 }
else if (ET_start ==
end(sequence)) {
544 starts_with_EN =
true;
545 if (ET_start !=
end(sequence)) {
546 for (
auto jt = ET_start; jt != it; ++jt) {
549 ET_start =
end(sequence);
554 starts_with_EN =
false;
555 ET_start =
end(sequence);
560constexpr void unicode_bidi_W6(unicode_bidi_isolated_run_sequence& sequence)
noexcept
564 for (
auto& char_info : sequence) {
565 if (char_info.direction == ET || char_info.direction == ES || char_info.direction == CS) {
566 char_info.direction = ON;
571constexpr void unicode_bidi_W7(unicode_bidi_isolated_run_sequence& sequence)
noexcept
575 auto last_strong_direction = sequence.sos;
576 for (
auto& char_info : sequence) {
577 switch (char_info.direction) {
580 last_strong_direction = char_info.direction;
583 if (last_strong_direction == L) {
584 char_info.direction = L;
594 struct bracket_start {
595 unicode_bidi_isolated_run_sequence::iterator it;
596 char32_t mirrored_bracket;
598 bracket_start(unicode_bidi_isolated_run_sequence::iterator it,
char32_t mirrored_bracket) noexcept :
599 it(
std::move(it)), mirrored_bracket(mirrored_bracket)
607 auto stack = hi::stack<bracket_start, 63>{};
609 for (
auto it =
begin(isolated_run_sequence); it !=
end(isolated_run_sequence); ++it) {
610 if (it->direction == ON) {
611 switch (it->bracket_type) {
612 case unicode_bidi_paired_bracket_type::o:
623 hi_axiom(ucd_get_bidi_paired_bracket_type(*canonical_equivalent) == unicode_bidi_paired_bracket_type::o);
628 stack.emplace_back(it, mirrored_glyph);
632 case unicode_bidi_paired_bracket_type::c:
635 for (
auto jt = stack.end() - 1; jt >= stack.begin(); --jt) {
636 if (jt->mirrored_bracket == it->code_point or
637 (canonical_equivalent and jt->mirrored_bracket == *canonical_equivalent)) {
638 pairs.emplace_back(jt->it, it);
655[[nodiscard]]
constexpr unicode_bidi_class unicode_bidi_N0_strong(unicode_bidi_class direction)
672 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
673 unicode_bidi_isolated_run_sequence::iterator
const& open_bracket)
noexcept
677 auto it = open_bracket;
678 while (it !=
begin(isolated_run_sequence)) {
681 if (
hilet direction = unicode_bidi_N0_strong(it->direction); direction != ON) {
686 return isolated_run_sequence.sos;
690unicode_bidi_N0_enclosed_strong_type(unicode_bidi_bracket_pair
const& pair, unicode_bidi_class embedding_direction)
noexcept
694 auto opposite_direction = ON;
695 for (
auto it = pair.open + 1; it != pair.close; ++it) {
696 hilet direction = unicode_bidi_N0_strong(it->direction);
697 if (direction == ON) {
700 if (direction == embedding_direction) {
703 opposite_direction = direction;
706 return opposite_direction;
709constexpr void unicode_bidi_N0(unicode_bidi_isolated_run_sequence& isolated_run_sequence, unicode_bidi_context
const& context)
713 if (not context.enable_mirrored_brackets) {
717 auto bracket_pairs = unicode_bidi_BD16(isolated_run_sequence);
718 hilet embedding_direction = isolated_run_sequence.embedding_direction();
720 for (
auto& pair : bracket_pairs) {
721 auto pair_direction = unicode_bidi_N0_enclosed_strong_type(pair, embedding_direction);
723 if (pair_direction == ON) {
727 if (pair_direction != embedding_direction) {
728 pair_direction = unicode_bidi_N0_preceding_strong_type(isolated_run_sequence, pair.open);
730 if (pair_direction == embedding_direction || pair_direction == ON) {
731 pair_direction = embedding_direction;
735 pair.open->direction = pair_direction;
736 pair.close->direction = pair_direction;
738 for (
auto it = pair.open + 1; it != pair.close; ++it) {
739 if (it->bidi_class != NSM) {
742 it->direction = pair_direction;
745 for (
auto it = pair.close + 1; it !=
end(isolated_run_sequence); ++it) {
746 if (it->bidi_class != NSM) {
749 it->direction = pair_direction;
754constexpr void unicode_bidi_N1(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
758 auto direction_before_NI = isolated_run_sequence.sos;
759 auto first_NI =
end(isolated_run_sequence);
761 for (
auto it =
begin(isolated_run_sequence); it !=
end(isolated_run_sequence); ++it) {
762 hilet& char_info = *it;
763 if (first_NI !=
end(isolated_run_sequence)) {
764 if (!is_NI(char_info.direction)) {
765 hilet direction_after_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
767 if ((direction_before_NI == L || direction_before_NI == R) && direction_before_NI == direction_after_NI) {
768 std::for_each(first_NI, it, [direction_before_NI](
auto& item) {
769 item.direction = direction_before_NI;
773 first_NI =
end(isolated_run_sequence);
774 direction_before_NI = direction_after_NI;
777 }
else if (is_NI(char_info.direction)) {
780 direction_before_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
784 if (first_NI !=
end(isolated_run_sequence) && direction_before_NI == isolated_run_sequence.eos) {
785 std::for_each(first_NI,
end(isolated_run_sequence), [direction_before_NI](
auto& item) {
786 item.direction = direction_before_NI;
791constexpr void unicode_bidi_N2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
793 hilet embedding_direction = isolated_run_sequence.embedding_direction();
795 for (
auto& char_info : isolated_run_sequence) {
796 if (is_NI(char_info.direction)) {
797 char_info.direction = embedding_direction;
802constexpr void unicode_bidi_I1_I2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
806 for (
auto& char_info : isolated_run_sequence) {
807 if ((char_info.embedding_level % 2) == 0) {
809 if (char_info.direction == R) {
810 char_info.embedding_level += 1;
811 }
else if (char_info.direction == AN || char_info.direction == EN) {
812 char_info.embedding_level += 2;
816 if (char_info.direction == L || char_info.direction == AN || char_info.direction == EN) {
817 char_info.embedding_level += 1;
824unicode_bidi_BD7(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last)
noexcept
828 auto embedding_level = int8_t{0};
829 auto run_start = first;
830 for (
auto it = first; it != last; ++it) {
832 embedding_level = it->embedding_level;
834 }
else if (it->embedding_level != embedding_level) {
835 embedding_level = it->embedding_level;
841 if (run_start != last) {
854 while (!level_runs.
empty()) {
855 auto isolated_run_sequence = unicode_bidi_isolated_run_sequence(level_runs.
back());
858 while (isolated_run_sequence.ends_with_isolate_initiator() && !level_runs.
empty()) {
860 auto isolation_level = 1;
861 for (
auto it = std::rbegin(level_runs); it != std::rend(level_runs); ++it) {
862 if (it->starts_with_PDI() && --isolation_level == 0) {
863 hi_axiom(it->embedding_level() == isolated_run_sequence.embedding_level());
864 isolated_run_sequence.add_run(*it);
868 if (it->ends_with_isolate_initiator()) {
873 if (isolation_level != 0) {
886 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
887 unicode_bidi_char_info_iterator first,
888 unicode_bidi_char_info_iterator last,
889 int8_t paragraph_embedding_level)
noexcept
891 if (
begin(isolated_run_sequence) !=
end(isolated_run_sequence)) {
894 hilet first_char_it =
begin(isolated_run_sequence).child();
895 hilet last_char_it = (
end(isolated_run_sequence) - 1).child() + 1;
897 hilet has_char_before = first_char_it != first;
898 hilet has_char_after = last_char_it != last;
901 isolated_run_sequence.embedding_level(),
902 has_char_before ? (first_char_it - 1)->embedding_level : paragraph_embedding_level);
904 isolated_run_sequence.embedding_level(),
905 has_char_after && !isolated_run_sequence.ends_with_isolate_initiator() ? last_char_it->embedding_level :
906 paragraph_embedding_level);
918constexpr void unicode_bidi_X10(
919 unicode_bidi_char_info_iterator first,
920 unicode_bidi_char_info_iterator last,
921 int8_t paragraph_embedding_level,
922 unicode_bidi_context
const& context)
noexcept
924 auto isolated_run_sequence_set = unicode_bidi_BD13(unicode_bidi_BD7(first, last));
929 for (
auto& isolated_run_sequence : isolated_run_sequence_set) {
930 std::tie(isolated_run_sequence.sos, isolated_run_sequence.eos) =
931 unicode_bidi_X10_sos_eos(isolated_run_sequence, first, last, paragraph_embedding_level);
934 for (
auto& isolated_run_sequence : isolated_run_sequence_set) {
935 unicode_bidi_W1(isolated_run_sequence);
936 unicode_bidi_W2(isolated_run_sequence);
937 unicode_bidi_W3(isolated_run_sequence);
938 unicode_bidi_W4(isolated_run_sequence);
939 unicode_bidi_W5(isolated_run_sequence);
940 unicode_bidi_W6(isolated_run_sequence);
941 unicode_bidi_W7(isolated_run_sequence);
942 unicode_bidi_N0(isolated_run_sequence, context);
943 unicode_bidi_N1(isolated_run_sequence);
944 unicode_bidi_N2(isolated_run_sequence);
945 unicode_bidi_I1_I2(isolated_run_sequence);
950 unicode_bidi_char_info_iterator first,
951 unicode_bidi_char_info_iterator last,
952 int8_t paragraph_embedding_level)
noexcept
957 auto highest = paragraph_embedding_level;
958 auto preceding_is_segment =
true;
961 while (it != first) {
964 auto bidi_class = it->bidi_class;
966 if (bidi_class == B || bidi_class == S) {
967 it->embedding_level = paragraph_embedding_level;
968 preceding_is_segment =
true;
970 }
else if (preceding_is_segment && (bidi_class == WS || is_isolate_formatter(bidi_class))) {
971 it->embedding_level = paragraph_embedding_level;
972 preceding_is_segment =
true;
975 highest =
std::max(highest, it->embedding_level);
976 if ((it->embedding_level % 2) == 1) {
977 lowest_odd =
std::min(lowest_odd, it->embedding_level);
980 preceding_is_segment =
false;
984 if ((paragraph_embedding_level % 2) == 1) {
985 lowest_odd =
std::min(lowest_odd, paragraph_embedding_level);
988 if (lowest_odd > highest) {
990 if (highest % 2 == 1) {
992 lowest_odd = highest;
996 lowest_odd = highest - 1;
1000 return {lowest_odd, highest};
1003constexpr void unicode_bidi_L2(
1004 unicode_bidi_char_info_iterator first,
1005 unicode_bidi_char_info_iterator last,
1007 int8_t highest)
noexcept
1009 for (int8_t level = highest; level >= lowest_odd; --level) {
1010 auto sequence_start = last;
1011 for (
auto it = first; it != last; ++it) {
1012 if (sequence_start == last) {
1013 if (it->embedding_level >= level) {
1014 sequence_start = it;
1016 }
else if (it->embedding_level < level) {
1018 sequence_start = last;
1021 if (sequence_start != last) {
1027constexpr void unicode_bidi_L3(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last)
noexcept {}
1029[[nodiscard]]
constexpr unicode_bidi_class unicode_bidi_P2_default(unicode_bidi_context
const& context)
noexcept
1031 if (context.direction_mode == unicode_bidi_context::mode_type::auto_LTR) {
1032 return unicode_bidi_class::L;
1033 }
else if (context.direction_mode == unicode_bidi_context::mode_type::auto_RTL) {
1034 return unicode_bidi_class::R;
1041 unicode_bidi_char_info_iterator first,
1042 unicode_bidi_char_info_iterator last,
1043 unicode_bidi_context
const& context,
1044 bool rule_X5c)
noexcept
1048 if (context.direction_mode == unicode_bidi_context::mode_type::LTR) {
1049 return unicode_bidi_class::L;
1050 }
else if (context.direction_mode == unicode_bidi_context::mode_type::RTL) {
1051 return unicode_bidi_class::R;
1054 long long isolate_level = 0;
1055 for (
auto it = first; it != last; ++it) {
1056 switch (it->direction) {
1060 if (isolate_level == 0) {
1061 return it->direction;
1070 if (isolate_level > 0) {
1072 }
else if (rule_X5c) {
1074 return unicode_bidi_P2_default(context);
1080 return unicode_bidi_P2_default(context);
1083[[nodiscard]]
constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class)
noexcept
1085 return wide_cast<int8_t>(paragraph_bidi_class == unicode_bidi_class::AL or paragraph_bidi_class == unicode_bidi_class::R);
1088constexpr void unicode_bidi_P1_line(
1089 unicode_bidi_char_info_iterator first,
1090 unicode_bidi_char_info_iterator last,
1091 int8_t paragraph_embedding_level,
1092 unicode_bidi_context
const& context)
noexcept
1094 hilet[lowest_odd, highest] = unicode_bidi_L1(first, last, paragraph_embedding_level);
1095 unicode_bidi_L2(first, last, lowest_odd, highest);
1096 unicode_bidi_L3(first, last);
1101 unicode_bidi_char_info_iterator first,
1102 unicode_bidi_char_info_iterator last,
1103 unicode_bidi_context
const& context)
noexcept
1105 hilet default_paragraph_direction = unicode_bidi_P2(first, last, context,
false);
1106 hilet paragraph_embedding_level = unicode_bidi_P3(default_paragraph_direction);
1107 hilet paragraph_direction = paragraph_embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1108 return {paragraph_embedding_level, paragraph_direction};
1112 unicode_bidi_char_info_iterator first,
1113 unicode_bidi_char_info_iterator last,
1114 unicode_bidi_context
const& context)
noexcept
1116 hilet[paragraph_embedding_level, paragraph_direction] = unicode_bidi_P2_P3(first, last, context);
1118 unicode_bidi_X1(first, last, paragraph_embedding_level, context);
1119 last = unicode_bidi_X9(first, last);
1120 unicode_bidi_X10(first, last, paragraph_embedding_level, context);
1122 auto line_begin = first;
1123 for (
auto it = first; it != last; ++it) {
1124 hilet general_category = ucd_get_general_category(it->code_point);
1125 if (context.enable_line_separator and general_category == unicode_general_category::Zl) {
1126 hilet line_end = it + 1;
1127 unicode_bidi_P1_line(line_begin, line_end, paragraph_embedding_level, context);
1128 line_begin = line_end;
1132 if (line_begin != last) {
1133 unicode_bidi_P1_line(line_begin, last, paragraph_embedding_level, context);
1136 return {last, paragraph_direction};
1140 unicode_bidi_char_info_iterator first,
1141 unicode_bidi_char_info_iterator last,
1142 unicode_bidi_context
const& context)
noexcept
1145 auto paragraph_begin = it;
1147 while (it != last) {
1148 if (it->direction == unicode_bidi_class::B) {
1149 hilet paragraph_end = it + 1;
1150 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, paragraph_end, context);
1151 paragraph_directions.push_back(paragraph_bidi_class);
1154 std::rotate(new_paragraph_end, paragraph_end, last);
1157 paragraph_begin = it = new_paragraph_end;
1163 if (paragraph_begin != last) {
1164 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, last, context);
1165 paragraph_directions.push_back(paragraph_bidi_class);
1166 last = new_paragraph_end;
1169 return {last,
std::move(paragraph_directions)};
1172template<
typename OutputIt,
typename SetCodePo
int,
typename SetTextDirection>
1173constexpr void unicode_bidi_L4(
1174 unicode_bidi_char_info_iterator first,
1175 unicode_bidi_char_info_iterator last,
1177 SetCodePoint set_code_point,
1178 SetTextDirection set_text_direction)
noexcept
1180 for (
auto it = first; it != last; ++it, ++output_it) {
1181 hilet text_direction = it->embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1182 set_text_direction(*output_it, text_direction);
1183 if (it->direction == unicode_bidi_class::R and it->bracket_type != unicode_bidi_paired_bracket_type::n) {
1214template<
typename It,
typename GetCodePo
int,
typename SetCodePo
int,
typename SetTextDirection>
1218 GetCodePoint get_code_point,
1219 SetCodePoint set_code_point,
1220 SetTextDirection set_text_direction,
1223 auto proxy = detail::unicode_bidi_char_info_vector{};
1227 for (
auto it = first; it != last; ++it) {
1228 proxy.emplace_back(index++, get_code_point(*it));
1231 auto [proxy_last, paragraph_directions] = detail::unicode_bidi_P1(
begin(proxy),
end(proxy), context);
1236 detail::unicode_bidi_L4(
1240 std::forward<SetCodePoint>(set_code_point),
1241 std::forward<SetTextDirection>(set_text_direction));
1242 return {last,
std::move(paragraph_directions)};
1253template<
typename It,
typename GetCodePo
int>
1257 auto proxy = detail::unicode_bidi_char_info_vector{};
1261 for (
auto it = first; it != last; ++it) {
1262 proxy.emplace_back(index++, get_code_point(*it));
1263 if (proxy.back().direction == unicode_bidi_class::B) {
1269 return detail::unicode_bidi_P2_P3(
begin(proxy),
end(proxy), context).second;
1282template<
typename It,
typename EndIt,
typename CodePo
intFunc>
1286 hilet code_point = code_point_func(item);
1287 hilet bidi_class = ucd_get_bidi_class(code_point);
1288 return is_control(bidi_class);
#define hi_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:279
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
auto shuffle_by_index(auto first, auto last, auto indices_first, auto indices_last, auto index_op) noexcept
Shuffle a container based on a list of indices.
Definition algorithm.hpp:232
constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const &code_point_func)
Removes control characters which will not survive the bidi-algorithm.
Definition unicode_bidi.hpp:1283
constexpr std::pair< It, std::vector< unicode_bidi_class > > unicode_bidi(It first, It last, GetCodePoint get_code_point, SetCodePoint set_code_point, SetTextDirection set_text_direction, unicode_bidi_context const &context={})
Reorder a given range of characters based on the unicode_bidi algorithm.
Definition unicode_bidi.hpp:1215
constexpr unicode_bidi_class unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const &context={})
Get the unicode bidi direction for the first paragraph and context.
Definition unicode_bidi.hpp:1255
constexpr char32_t ucd_get_bidi_mirroring_glyph(char32_t code_point) noexcept
Get the bidi-mirroring-glyph for a code-point.
Definition ucd_bidi_mirroring_glyphs.hpp:171
constexpr ucd_decomposition_info ucd_get_decomposition(char32_t code_point) noexcept
Get the decomposition info of a code-point.
Definition ucd_decompositions.hpp:4798
unicode_bidi_class
Bidirectional class Unicode Standard Annex #9: https://unicode.org/reports/tr9/.
Definition ucd_bidi_classes.hpp:856
constexpr std::optional< char32_t > canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition ucd_decompositions.hpp:4785
Definition unicode_bidi.hpp:18
Definition unicode_bidi.hpp:45
unicode_bidi_class direction
Current computed direction of the code-point.
Definition unicode_bidi.hpp:63
constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept
Constructor for testing to bypass normal initialization.
Definition unicode_bidi.hpp:86
int8_t embedding_level
The embedding level.
Definition unicode_bidi.hpp:58
unicode_bidi_class bidi_class
The original bidi class of the code-point.
Definition unicode_bidi.hpp:68
unicode_bidi_paired_bracket_type bracket_type
The type of bidi-paired-bracket.
Definition unicode_bidi.hpp:72
std::size_t index
Index from the first character in the original list.
Definition unicode_bidi.hpp:48
char32_t code_point
The current code point.
Definition unicode_bidi.hpp:53
Definition unicode_bidi.hpp:101
Definition unicode_bidi.hpp:121
Definition unicode_bidi.hpp:133
Definition unicode_bidi.hpp:177
Definition unicode_bidi.hpp:254
Definition concepts.hpp:39
T emplace_back(T... args)