7#include "ucd_bidi_classes.hpp"
8#include "ucd_bidi_paired_bracket_types.hpp"
9#include "ucd_bidi_mirroring_glyphs.hpp"
10#include "ucd_decompositions.hpp"
11#include "ucd_general_categories.hpp"
12#include "../utility/utility.hpp"
13#include "../container/module.hpp"
14#include "../algorithm/module.hpp"
15#include "../macros.hpp"
22 enum class mode_type : uint8_t { LTR, RTL, auto_LTR, auto_RTL };
24 mode_type direction_mode = mode_type::auto_LTR;
25 bool enable_mirrored_brackets =
true;
26 bool enable_line_separator =
true;
36 if (text_direction == unicode_bidi_class::L) {
37 direction_mode = mode_type::auto_LTR;
38 }
else if (text_direction == unicode_bidi_class::R) {
39 direction_mode = mode_type::auto_RTL;
80 this->code_point = code_point;
81 this->embedding_level = 0;
82 this->direction = this->bidi_class = ucd_get_bidi_class(code_point);
83 this->bracket_type = ucd_get_bidi_paired_bracket_type(code_point);
91 code_point(U
'\ufffd'),
92 direction(bidi_class),
93 bidi_class(bidi_class),
94 bracket_type(unicode_bidi_paired_bracket_type::n),
101using unicode_bidi_char_info_iterator = unicode_bidi_char_info_vector::iterator;
102using unicode_bidi_char_info_const_iterator = unicode_bidi_char_info_vector::const_iterator;
109 template<
typename... Args>
110 constexpr void emplace_character(Args&&...args)
noexcept
116[[nodiscard]]
constexpr unicode_bidi_class unicode_bidi_P2(
117 unicode_bidi_char_info_iterator first,
118 unicode_bidi_char_info_iterator last,
120 bool rule_X5c)
noexcept;
122[[nodiscard]]
constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class)
noexcept;
125 int8_t embedding_level;
126 unicode_bidi_class override_status;
129 constexpr unicode_bidi_stack_element(int8_t embedding_level, unicode_bidi_class override_status,
bool isolate_status) noexcept
131 embedding_level(embedding_level), override_status(override_status), isolate_status(isolate_status)
138 using iterator = unicode_bidi_char_info_iterator;
139 using const_iterator = unicode_bidi_char_info_const_iterator;
144 [[nodiscard]]
constexpr iterator begin()
const noexcept
149 [[nodiscard]]
constexpr iterator end()
const noexcept
154 [[nodiscard]]
constexpr int8_t embedding_level()
const noexcept
156 hi_axiom(_begin != _end);
157 return _begin->embedding_level;
160 [[nodiscard]]
constexpr bool ends_with_isolate_initiator()
const noexcept
162 using enum unicode_bidi_class;
164 hi_axiom(_begin != _end);
165 hilet& last_char = *(_end - 1);
166 return last_char.direction == LRI || last_char.direction == RLI || last_char.direction == FSI;
169 [[nodiscard]]
constexpr bool starts_with_PDI()
const noexcept
171 hi_axiom(_begin != _end);
172 return _begin->direction == unicode_bidi_class::PDI;
182 using iterator = recursive_iterator<run_container_type::iterator>;
183 using const_iterator = recursive_iterator<run_container_type::const_iterator>;
186 unicode_bidi_class sos;
187 unicode_bidi_class eos;
190 runs({rhs}), sos(unicode_bidi_class::ON), eos(unicode_bidi_class::ON)
194 [[nodiscard]]
constexpr auto begin()
noexcept
196 return recursive_iterator_begin(runs);
199 [[nodiscard]]
constexpr auto end()
noexcept
201 return recursive_iterator_end(runs);
204 [[nodiscard]]
constexpr auto begin()
const noexcept
206 return recursive_iterator_begin(runs);
209 [[nodiscard]]
constexpr auto end()
const noexcept
211 return recursive_iterator_end(runs);
239 [[nodiscard]]
constexpr int8_t embedding_level()
const noexcept
241 hi_axiom(not runs.
empty());
242 return runs.
front().embedding_level();
245 [[nodiscard]]
constexpr unicode_bidi_class embedding_direction()
const noexcept
247 return (embedding_level() % 2) == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
250 [[nodiscard]]
constexpr bool ends_with_isolate_initiator()
const noexcept
252 hi_axiom(not runs.
empty());
253 return runs.
back().ends_with_isolate_initiator();
258 unicode_bidi_isolated_run_sequence::iterator open;
259 unicode_bidi_isolated_run_sequence::iterator close;
262 unicode_bidi_isolated_run_sequence::iterator open,
263 unicode_bidi_isolated_run_sequence::iterator close) :
268 [[nodiscard]]
constexpr friend auto
271 return lhs.open <=> rhs.open;
275constexpr void unicode_bidi_X1(
276 unicode_bidi_char_info_iterator first,
277 unicode_bidi_char_info_iterator last,
278 int8_t paragraph_embedding_level,
281 using enum unicode_bidi_class;
283 constexpr int8_t max_depth = 125;
285 auto next_even = [](int8_t x) -> int8_t {
286 return (x % 2 == 0) ? x + 2 : x + 1;
289 auto next_odd = [](int8_t x) -> int8_t {
290 return (x % 2 == 1) ? x + 2 : x + 1;
293 long long overflow_isolate_count = 0;
294 long long overflow_embedding_count = 0;
295 long long valid_isolate_count = 0;
300 for (
auto it = first; it != last; ++it) {
301 hilet current_embedding_level = stack.back().embedding_level;
302 hilet current_override_status = stack.back().override_status;
303 hilet next_odd_embedding_level = next_odd(current_embedding_level);
304 hilet next_even_embedding_level = next_even(current_embedding_level);
306 auto RLI_implementation = [&] {
307 it->embedding_level = current_embedding_level;
308 if (current_override_status != ON) {
309 it->direction = current_override_status;
312 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
313 ++valid_isolate_count;
314 stack.emplace_back(next_odd_embedding_level, ON,
true);
316 ++overflow_isolate_count;
320 auto LRI_implementation = [&] {
321 it->embedding_level = current_embedding_level;
322 if (current_override_status != ON) {
323 it->direction = current_override_status;
326 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
327 ++valid_isolate_count;
328 stack.emplace_back(next_even_embedding_level, ON,
true);
330 ++overflow_isolate_count;
334 switch (it->direction) {
336 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
337 stack.emplace_back(next_odd_embedding_level, ON,
false);
338 }
else if (overflow_isolate_count == 0) {
339 ++overflow_embedding_count;
344 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
345 stack.emplace_back(next_even_embedding_level, ON,
false);
346 }
else if (overflow_isolate_count == 0) {
347 ++overflow_embedding_count;
352 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
353 stack.emplace_back(next_odd_embedding_level, R,
false);
354 }
else if (overflow_isolate_count == 0) {
355 ++overflow_embedding_count;
360 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
361 stack.emplace_back(next_even_embedding_level, L,
false);
362 }
else if (overflow_isolate_count == 0) {
363 ++overflow_embedding_count;
368 RLI_implementation();
372 LRI_implementation();
377 auto sub_context = context;
378 sub_context.direction_mode = unicode_bidi_context::mode_type::auto_LTR;
379 hilet sub_paragraph_bidi_class = unicode_bidi_P2(it + 1, last, sub_context,
true);
380 hilet sub_paragraph_embedding_level = unicode_bidi_P3(sub_paragraph_bidi_class);
381 if (sub_paragraph_embedding_level == 0) {
382 LRI_implementation();
384 RLI_implementation();
390 if (overflow_isolate_count > 0) {
391 --overflow_isolate_count;
392 }
else if (valid_isolate_count == 0) {
396 overflow_embedding_count = 0;
397 while (stack.back().isolate_status ==
false) {
401 --valid_isolate_count;
404 it->embedding_level = stack.back().embedding_level;
405 if (stack.back().override_status != ON) {
406 it->direction = stack.back().override_status;
411 if (overflow_isolate_count > 0) {
414 }
else if (overflow_embedding_count > 0) {
415 --overflow_embedding_count;
416 }
else if (stack.back().isolate_status ==
false && stack.size() >= 2) {
424 it->embedding_level = paragraph_embedding_level;
431 it->embedding_level = current_embedding_level;
432 if (current_override_status != ON) {
433 it->direction = current_override_status;
439[[nodiscard]]
constexpr unicode_bidi_char_info_iterator
440unicode_bidi_X9(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last)
noexcept
445 return character.direction == RLE || character.direction == LRE || character.direction == RLO ||
446 character.direction == LRO || character.direction == PDF || character.direction == BN;
450constexpr void unicode_bidi_W1(unicode_bidi_isolated_run_sequence& sequence)
noexcept
454 auto previous_bidi_class = sequence.sos;
455 for (
auto& char_info : sequence) {
456 if (char_info.direction == NSM) {
457 switch (previous_bidi_class) {
462 char_info.direction = ON;
465 char_info.direction = previous_bidi_class;
470 previous_bidi_class = char_info.direction;
474constexpr void unicode_bidi_W2(unicode_bidi_isolated_run_sequence& sequence)
noexcept
478 auto last_strong_direction = sequence.sos;
479 for (
auto& char_info : sequence) {
480 switch (char_info.direction) {
484 last_strong_direction = char_info.direction;
487 if (last_strong_direction == AL) {
488 char_info.direction = AN;
496constexpr void unicode_bidi_W3(unicode_bidi_isolated_run_sequence& sequence)
noexcept
500 for (
auto& char_info : sequence) {
501 if (char_info.direction == AL) {
502 char_info.direction = R;
507constexpr void unicode_bidi_W4(unicode_bidi_isolated_run_sequence& sequence)
noexcept
511 unicode_bidi_char_info *back1 =
nullptr;
512 unicode_bidi_char_info *back2 =
nullptr;
513 for (
auto& char_info : sequence) {
514 if (char_info.direction == EN && back2 !=
nullptr && back2->direction == EN && back1 !=
nullptr &&
515 (back1->direction == ES || back1->direction == CS)) {
516 back1->direction = EN;
518 if (char_info.direction == AN && back2 !=
nullptr && back2->direction == AN && back1 !=
nullptr &&
519 back1->direction == CS) {
520 back1->direction = AN;
523 back2 = std::exchange(back1, &char_info);
527constexpr void unicode_bidi_W5(unicode_bidi_isolated_run_sequence& sequence)
noexcept
531 auto ET_start =
end(sequence);
532 auto starts_with_EN =
false;
534 for (
auto it =
begin(sequence); it !=
end(sequence); ++it) {
535 auto& char_info = *it;
537 switch (char_info.direction) {
539 if (starts_with_EN) {
540 char_info.direction = EN;
541 }
else if (ET_start ==
end(sequence)) {
547 starts_with_EN =
true;
548 if (ET_start !=
end(sequence)) {
549 for (
auto jt = ET_start; jt != it; ++jt) {
552 ET_start =
end(sequence);
557 starts_with_EN =
false;
558 ET_start =
end(sequence);
563constexpr void unicode_bidi_W6(unicode_bidi_isolated_run_sequence& sequence)
noexcept
567 for (
auto& char_info : sequence) {
568 if (char_info.direction == ET || char_info.direction == ES || char_info.direction == CS) {
569 char_info.direction = ON;
574constexpr void unicode_bidi_W7(unicode_bidi_isolated_run_sequence& sequence)
noexcept
578 auto last_strong_direction = sequence.sos;
579 for (
auto& char_info : sequence) {
580 switch (char_info.direction) {
583 last_strong_direction = char_info.direction;
586 if (last_strong_direction == L) {
587 char_info.direction = L;
597 struct bracket_start {
598 unicode_bidi_isolated_run_sequence::iterator it;
599 char32_t mirrored_bracket;
601 bracket_start(unicode_bidi_isolated_run_sequence::iterator it,
char32_t mirrored_bracket) noexcept :
602 it(
std::move(it)), mirrored_bracket(mirrored_bracket)
612 for (
auto it =
begin(isolated_run_sequence); it !=
end(isolated_run_sequence); ++it) {
613 if (it->direction == ON) {
614 switch (it->bracket_type) {
615 case unicode_bidi_paired_bracket_type::o:
626 hi_axiom(ucd_get_bidi_paired_bracket_type(*canonical_equivalent) == unicode_bidi_paired_bracket_type::o);
631 stack.emplace_back(it, mirrored_glyph);
635 case unicode_bidi_paired_bracket_type::c:
638 for (
auto jt = stack.end() - 1; jt >= stack.begin(); --jt) {
639 if (jt->mirrored_bracket == it->code_point or
640 (canonical_equivalent and jt->mirrored_bracket == *canonical_equivalent)) {
641 pairs.emplace_back(jt->it, it);
658[[nodiscard]]
constexpr unicode_bidi_class unicode_bidi_N0_strong(unicode_bidi_class direction)
675 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
676 unicode_bidi_isolated_run_sequence::iterator
const& open_bracket)
noexcept
680 auto it = open_bracket;
681 while (it !=
begin(isolated_run_sequence)) {
684 if (hilet direction = unicode_bidi_N0_strong(it->direction); direction != ON) {
689 return isolated_run_sequence.sos;
693unicode_bidi_N0_enclosed_strong_type(unicode_bidi_bracket_pair
const& pair, unicode_bidi_class embedding_direction)
noexcept
697 auto opposite_direction = ON;
698 for (
auto it = pair.open + 1; it != pair.close; ++it) {
699 hilet direction = unicode_bidi_N0_strong(it->direction);
700 if (direction == ON) {
703 if (direction == embedding_direction) {
706 opposite_direction = direction;
709 return opposite_direction;
712constexpr void unicode_bidi_N0(unicode_bidi_isolated_run_sequence& isolated_run_sequence, unicode_bidi_context
const& context)
716 if (not context.enable_mirrored_brackets) {
720 auto bracket_pairs = unicode_bidi_BD16(isolated_run_sequence);
721 hilet embedding_direction = isolated_run_sequence.embedding_direction();
723 for (
auto& pair : bracket_pairs) {
724 auto pair_direction = unicode_bidi_N0_enclosed_strong_type(pair, embedding_direction);
726 if (pair_direction == ON) {
730 if (pair_direction != embedding_direction) {
731 pair_direction = unicode_bidi_N0_preceding_strong_type(isolated_run_sequence, pair.open);
733 if (pair_direction == embedding_direction || pair_direction == ON) {
734 pair_direction = embedding_direction;
738 pair.open->direction = pair_direction;
739 pair.close->direction = pair_direction;
741 for (
auto it = pair.open + 1; it != pair.close; ++it) {
742 if (it->bidi_class != NSM) {
745 it->direction = pair_direction;
748 for (
auto it = pair.close + 1; it !=
end(isolated_run_sequence); ++it) {
749 if (it->bidi_class != NSM) {
752 it->direction = pair_direction;
757constexpr void unicode_bidi_N1(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
761 auto direction_before_NI = isolated_run_sequence.sos;
762 auto first_NI =
end(isolated_run_sequence);
764 for (
auto it =
begin(isolated_run_sequence); it !=
end(isolated_run_sequence); ++it) {
765 hilet& char_info = *it;
766 if (first_NI !=
end(isolated_run_sequence)) {
767 if (!is_NI(char_info.direction)) {
768 hilet direction_after_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
770 if ((direction_before_NI == L || direction_before_NI == R) && direction_before_NI == direction_after_NI) {
771 std::for_each(first_NI, it, [direction_before_NI](
auto& item) {
772 item.direction = direction_before_NI;
776 first_NI =
end(isolated_run_sequence);
777 direction_before_NI = direction_after_NI;
780 }
else if (is_NI(char_info.direction)) {
783 direction_before_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
787 if (first_NI !=
end(isolated_run_sequence) && direction_before_NI == isolated_run_sequence.eos) {
788 std::for_each(first_NI,
end(isolated_run_sequence), [direction_before_NI](
auto& item) {
789 item.direction = direction_before_NI;
794constexpr void unicode_bidi_N2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
796 hilet embedding_direction = isolated_run_sequence.embedding_direction();
798 for (
auto& char_info : isolated_run_sequence) {
799 if (is_NI(char_info.direction)) {
800 char_info.direction = embedding_direction;
805constexpr void unicode_bidi_I1_I2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
809 for (
auto& char_info : isolated_run_sequence) {
810 if ((char_info.embedding_level % 2) == 0) {
812 if (char_info.direction == R) {
813 char_info.embedding_level += 1;
814 }
else if (char_info.direction == AN || char_info.direction == EN) {
815 char_info.embedding_level += 2;
819 if (char_info.direction == L || char_info.direction == AN || char_info.direction == EN) {
820 char_info.embedding_level += 1;
827unicode_bidi_BD7(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last)
noexcept
831 auto embedding_level = int8_t{0};
832 auto run_start = first;
833 for (
auto it = first; it != last; ++it) {
835 embedding_level = it->embedding_level;
837 }
else if (it->embedding_level != embedding_level) {
838 embedding_level = it->embedding_level;
844 if (run_start != last) {
857 while (!level_runs.
empty()) {
858 auto isolated_run_sequence = unicode_bidi_isolated_run_sequence(level_runs.
back());
861 while (isolated_run_sequence.ends_with_isolate_initiator() && !level_runs.
empty()) {
863 auto isolation_level = 1;
864 for (
auto it = std::rbegin(level_runs); it != std::rend(level_runs); ++it) {
865 if (it->starts_with_PDI() && --isolation_level == 0) {
866 hi_axiom(it->embedding_level() == isolated_run_sequence.embedding_level());
867 isolated_run_sequence.add_run(*it);
871 if (it->ends_with_isolate_initiator()) {
876 if (isolation_level != 0) {
889 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
890 unicode_bidi_char_info_iterator first,
891 unicode_bidi_char_info_iterator last,
892 int8_t paragraph_embedding_level)
noexcept
894 if (
begin(isolated_run_sequence) !=
end(isolated_run_sequence)) {
897 hilet first_char_it =
begin(isolated_run_sequence).child();
898 hilet last_char_it = (
end(isolated_run_sequence) - 1).child() + 1;
900 hilet has_char_before = first_char_it != first;
901 hilet has_char_after = last_char_it != last;
903 hilet start_embedding_level =
std::max(
904 isolated_run_sequence.embedding_level(),
905 has_char_before ? (first_char_it - 1)->embedding_level : paragraph_embedding_level);
906 hilet end_embedding_level =
std::max(
907 isolated_run_sequence.embedding_level(),
908 has_char_after && !isolated_run_sequence.ends_with_isolate_initiator() ? last_char_it->embedding_level :
909 paragraph_embedding_level);
921constexpr void unicode_bidi_X10(
922 unicode_bidi_char_info_iterator first,
923 unicode_bidi_char_info_iterator last,
924 int8_t paragraph_embedding_level,
925 unicode_bidi_context
const& context)
noexcept
927 auto isolated_run_sequence_set = unicode_bidi_BD13(unicode_bidi_BD7(first, last));
932 for (
auto& isolated_run_sequence : isolated_run_sequence_set) {
933 std::tie(isolated_run_sequence.sos, isolated_run_sequence.eos) =
934 unicode_bidi_X10_sos_eos(isolated_run_sequence, first, last, paragraph_embedding_level);
937 for (
auto& isolated_run_sequence : isolated_run_sequence_set) {
938 unicode_bidi_W1(isolated_run_sequence);
939 unicode_bidi_W2(isolated_run_sequence);
940 unicode_bidi_W3(isolated_run_sequence);
941 unicode_bidi_W4(isolated_run_sequence);
942 unicode_bidi_W5(isolated_run_sequence);
943 unicode_bidi_W6(isolated_run_sequence);
944 unicode_bidi_W7(isolated_run_sequence);
945 unicode_bidi_N0(isolated_run_sequence, context);
946 unicode_bidi_N1(isolated_run_sequence);
947 unicode_bidi_N2(isolated_run_sequence);
948 unicode_bidi_I1_I2(isolated_run_sequence);
953 unicode_bidi_char_info_iterator first,
954 unicode_bidi_char_info_iterator last,
955 int8_t paragraph_embedding_level)
noexcept
960 auto highest = paragraph_embedding_level;
961 auto preceding_is_segment =
true;
964 while (it != first) {
967 auto bidi_class = it->bidi_class;
969 if (bidi_class == B || bidi_class == S) {
970 it->embedding_level = paragraph_embedding_level;
971 preceding_is_segment =
true;
973 }
else if (preceding_is_segment && (bidi_class == WS || is_isolate_formatter(bidi_class))) {
974 it->embedding_level = paragraph_embedding_level;
975 preceding_is_segment =
true;
978 highest =
std::max(highest, it->embedding_level);
979 if ((it->embedding_level % 2) == 1) {
980 lowest_odd =
std::min(lowest_odd, it->embedding_level);
983 preceding_is_segment =
false;
987 if ((paragraph_embedding_level % 2) == 1) {
988 lowest_odd =
std::min(lowest_odd, paragraph_embedding_level);
991 if (lowest_odd > highest) {
993 if (highest % 2 == 1) {
995 lowest_odd = highest;
999 lowest_odd = highest - 1;
1003 return {lowest_odd, highest};
1006constexpr void unicode_bidi_L2(
1007 unicode_bidi_char_info_iterator first,
1008 unicode_bidi_char_info_iterator last,
1010 int8_t highest)
noexcept
1012 for (int8_t level = highest; level >= lowest_odd; --level) {
1013 auto sequence_start = last;
1014 for (
auto it = first; it != last; ++it) {
1015 if (sequence_start == last) {
1016 if (it->embedding_level >= level) {
1017 sequence_start = it;
1019 }
else if (it->embedding_level < level) {
1021 sequence_start = last;
1024 if (sequence_start != last) {
1030constexpr void unicode_bidi_L3(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last)
noexcept {}
1032[[nodiscard]]
constexpr unicode_bidi_class unicode_bidi_P2_default(unicode_bidi_context
const& context)
noexcept
1034 if (context.direction_mode == unicode_bidi_context::mode_type::auto_LTR) {
1035 return unicode_bidi_class::L;
1036 }
else if (context.direction_mode == unicode_bidi_context::mode_type::auto_RTL) {
1037 return unicode_bidi_class::R;
1044 unicode_bidi_char_info_iterator first,
1045 unicode_bidi_char_info_iterator last,
1046 unicode_bidi_context
const& context,
1047 bool rule_X5c)
noexcept
1051 if (context.direction_mode == unicode_bidi_context::mode_type::LTR) {
1052 return unicode_bidi_class::L;
1053 }
else if (context.direction_mode == unicode_bidi_context::mode_type::RTL) {
1054 return unicode_bidi_class::R;
1057 long long isolate_level = 0;
1058 for (
auto it = first; it != last; ++it) {
1059 switch (it->direction) {
1063 if (isolate_level == 0) {
1064 return it->direction;
1073 if (isolate_level > 0) {
1075 }
else if (rule_X5c) {
1077 return unicode_bidi_P2_default(context);
1083 return unicode_bidi_P2_default(context);
1086[[nodiscard]]
constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class)
noexcept
1088 return wide_cast<int8_t>(paragraph_bidi_class == unicode_bidi_class::AL or paragraph_bidi_class == unicode_bidi_class::R);
1091constexpr void unicode_bidi_P1_line(
1092 unicode_bidi_char_info_iterator first,
1093 unicode_bidi_char_info_iterator last,
1094 int8_t paragraph_embedding_level,
1095 unicode_bidi_context
const& context)
noexcept
1097 hilet[lowest_odd, highest] = unicode_bidi_L1(first, last, paragraph_embedding_level);
1098 unicode_bidi_L2(first, last, lowest_odd, highest);
1099 unicode_bidi_L3(first, last);
1104 unicode_bidi_char_info_iterator first,
1105 unicode_bidi_char_info_iterator last,
1106 unicode_bidi_context
const& context)
noexcept
1108 hilet default_paragraph_direction = unicode_bidi_P2(first, last, context,
false);
1109 hilet paragraph_embedding_level = unicode_bidi_P3(default_paragraph_direction);
1110 hilet paragraph_direction = paragraph_embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1111 return {paragraph_embedding_level, paragraph_direction};
1115 unicode_bidi_char_info_iterator first,
1116 unicode_bidi_char_info_iterator last,
1117 unicode_bidi_context
const& context)
noexcept
1119 hilet[paragraph_embedding_level, paragraph_direction] = unicode_bidi_P2_P3(first, last, context);
1121 unicode_bidi_X1(first, last, paragraph_embedding_level, context);
1122 last = unicode_bidi_X9(first, last);
1123 unicode_bidi_X10(first, last, paragraph_embedding_level, context);
1125 auto line_begin = first;
1126 for (
auto it = first; it != last; ++it) {
1127 hilet general_category = ucd_get_general_category(it->code_point);
1128 if (context.enable_line_separator and general_category == unicode_general_category::Zl) {
1129 hilet line_end = it + 1;
1130 unicode_bidi_P1_line(line_begin, line_end, paragraph_embedding_level, context);
1131 line_begin = line_end;
1135 if (line_begin != last) {
1136 unicode_bidi_P1_line(line_begin, last, paragraph_embedding_level, context);
1139 return {last, paragraph_direction};
1143 unicode_bidi_char_info_iterator first,
1144 unicode_bidi_char_info_iterator last,
1145 unicode_bidi_context
const& context)
noexcept
1148 auto paragraph_begin = it;
1150 while (it != last) {
1151 if (it->direction == unicode_bidi_class::B) {
1152 hilet paragraph_end = it + 1;
1153 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, paragraph_end, context);
1154 paragraph_directions.push_back(paragraph_bidi_class);
1157 std::rotate(new_paragraph_end, paragraph_end, last);
1160 paragraph_begin = it = new_paragraph_end;
1166 if (paragraph_begin != last) {
1167 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, last, context);
1168 paragraph_directions.push_back(paragraph_bidi_class);
1169 last = new_paragraph_end;
1172 return {last,
std::move(paragraph_directions)};
1175template<
typename OutputIt,
typename SetCodePo
int,
typename SetTextDirection>
1176constexpr void unicode_bidi_L4(
1177 unicode_bidi_char_info_iterator first,
1178 unicode_bidi_char_info_iterator last,
1180 SetCodePoint set_code_point,
1181 SetTextDirection set_text_direction)
noexcept
1183 for (
auto it = first; it != last; ++it, ++output_it) {
1184 hilet text_direction = it->embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1185 set_text_direction(*output_it, text_direction);
1186 if (it->direction == unicode_bidi_class::R and it->bracket_type != unicode_bidi_paired_bracket_type::n) {
1217template<
typename It,
typename GetCodePo
int,
typename SetCodePo
int,
typename SetTextDirection>
1221 GetCodePoint get_code_point,
1222 SetCodePoint set_code_point,
1223 SetTextDirection set_text_direction,
1226 auto proxy = detail::unicode_bidi_char_info_vector{};
1230 for (
auto it = first; it != last; ++it) {
1231 proxy.emplace_back(index++, get_code_point(*it));
1234 auto [proxy_last, paragraph_directions] = detail::unicode_bidi_P1(
begin(proxy),
end(proxy), context);
1239 detail::unicode_bidi_L4(
1243 std::forward<SetCodePoint>(set_code_point),
1244 std::forward<SetTextDirection>(set_text_direction));
1245 return {last,
std::move(paragraph_directions)};
1256template<
typename It,
typename GetCodePo
int>
1260 auto proxy = detail::unicode_bidi_char_info_vector{};
1264 for (
auto it = first; it != last; ++it) {
1265 proxy.emplace_back(index++, get_code_point(*it));
1266 if (proxy.back().direction == unicode_bidi_class::B) {
1272 return detail::unicode_bidi_P2_P3(
begin(proxy),
end(proxy), context).second;
1285template<
typename It,
typename EndIt,
typename CodePo
intFunc>
1289 hilet code_point = code_point_func(item);
1290 hilet bidi_class = ucd_get_bidi_class(code_point);
1291 return is_control(bidi_class);
DOXYGEN BUG.
Definition algorithm.hpp:16
auto shuffle_by_index(auto first, auto last, auto indices_first, auto indices_last, auto index_op) noexcept
Shuffle a container based on a list of indices.
Definition algorithm.hpp:261
constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const &code_point_func)
Removes control characters which will not survive the bidi-algorithm.
Definition unicode_bidi.hpp:1286
constexpr std::pair< It, std::vector< unicode_bidi_class > > unicode_bidi(It first, It last, GetCodePoint get_code_point, SetCodePoint set_code_point, SetTextDirection set_text_direction, unicode_bidi_context const &context={})
Reorder a given range of characters based on the unicode_bidi algorithm.
Definition unicode_bidi.hpp:1218
constexpr unicode_bidi_class unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const &context={})
Get the unicode bidi direction for the first paragraph and context.
Definition unicode_bidi.hpp:1258
constexpr char32_t ucd_get_bidi_mirroring_glyph(char32_t code_point) noexcept
Get the bidi-mirroring-glyph for a code-point.
Definition ucd_bidi_mirroring_glyphs.hpp:173
constexpr ucd_decomposition_info ucd_get_decomposition(char32_t code_point) noexcept
Get the decomposition info of a code-point.
Definition ucd_decompositions.hpp:4800
unicode_bidi_class
Bidirectional class Unicode Standard Annex #9: https://unicode.org/reports/tr9/.
Definition ucd_bidi_classes.hpp:858
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
constexpr std::optional< char32_t > canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition ucd_decompositions.hpp:4787
Definition unicode_bidi.hpp:21
Definition unicode_bidi.hpp:48
unicode_bidi_class direction
Current computed direction of the code-point.
Definition unicode_bidi.hpp:66
constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept
Constructor for testing to bypass normal initialization.
Definition unicode_bidi.hpp:89
int8_t embedding_level
The embedding level.
Definition unicode_bidi.hpp:61
unicode_bidi_class bidi_class
The original bidi class of the code-point.
Definition unicode_bidi.hpp:71
unicode_bidi_paired_bracket_type bracket_type
The type of bidi-paired-bracket.
Definition unicode_bidi.hpp:75
std::size_t index
Index from the first character in the original list.
Definition unicode_bidi.hpp:51
char32_t code_point
The current code point.
Definition unicode_bidi.hpp:56
Definition unicode_bidi.hpp:104
Definition unicode_bidi.hpp:124
Definition unicode_bidi.hpp:136
Definition unicode_bidi.hpp:180
Definition unicode_bidi.hpp:257
T emplace_back(T... args)