HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_bidi.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "ucd_bidi_classes.hpp"
8#include "ucd_bidi_paired_bracket_types.hpp"
9#include "ucd_bidi_mirroring_glyphs.hpp"
10#include "ucd_decompositions.hpp"
11#include "ucd_general_categories.hpp"
12#include "../utility/utility.hpp"
13#include "../container/container.hpp"
14#include "../algorithm/algorithm.hpp"
15#include "../macros.hpp"
16#include <cstdint>
17#include <exception>
18#include <cstddef>
19#include <vector>
20#include <utility>
21#include <iterator>
22#include <algorithm>
23
24hi_export_module(hikogui.unicode.unicode_bidi);
25
26
27hi_export namespace hi::inline v1 {
28
30 enum class mode_type : uint8_t { LTR, RTL, auto_LTR, auto_RTL };
31
32 mode_type direction_mode = mode_type::auto_LTR;
33 bool enable_mirrored_brackets = true;
34 bool enable_line_separator = true;
35
36 constexpr unicode_bidi_context() noexcept = default;
37 constexpr unicode_bidi_context(unicode_bidi_context const&) noexcept = default;
38 constexpr unicode_bidi_context(unicode_bidi_context&&) noexcept = default;
39 constexpr unicode_bidi_context& operator=(unicode_bidi_context const&) noexcept = default;
40 constexpr unicode_bidi_context& operator=(unicode_bidi_context&&) noexcept = default;
41
42 constexpr unicode_bidi_context(unicode_bidi_class text_direction) noexcept
43 {
44 if (text_direction == unicode_bidi_class::L) {
45 direction_mode = mode_type::auto_LTR;
46 } else if (text_direction == unicode_bidi_class::R) {
47 direction_mode = mode_type::auto_RTL;
48 } else {
49 hi_no_default();
50 }
51 }
52};
53
54namespace detail {
55
60
64 char32_t code_point;
65
70
74 unicode_bidi_class direction;
75
79 unicode_bidi_class bidi_class;
80
83 unicode_bidi_paired_bracket_type bracket_type;
84
85 [[nodiscard]] constexpr unicode_bidi_char_info(std::size_t index, char32_t code_point) noexcept
86 {
87 this->index = index;
88 this->code_point = code_point;
89 this->embedding_level = 0;
90 this->direction = this->bidi_class = ucd_get_bidi_class(code_point);
91 this->bracket_type = ucd_get_bidi_paired_bracket_type(code_point);
92 }
93
97 [[nodiscard]] constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept :
98 index(index),
99 code_point(U'\ufffd'),
100 direction(bidi_class),
101 bidi_class(bidi_class),
102 bracket_type(unicode_bidi_paired_bracket_type::n),
103 embedding_level(0)
104 {
105 }
106};
107
108using unicode_bidi_char_info_vector = std::vector<unicode_bidi_char_info>;
109using unicode_bidi_char_info_iterator = unicode_bidi_char_info_vector::iterator;
110using unicode_bidi_char_info_const_iterator = unicode_bidi_char_info_vector::const_iterator;
111
114
115 characters_type characters;
116
117 template<typename... Args>
118 constexpr void emplace_character(Args&&...args) noexcept
119 {
120 characters.emplace_back(std::forward<Args>(args)...);
121 }
122};
123
124[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2(
125 unicode_bidi_char_info_iterator first,
126 unicode_bidi_char_info_iterator last,
127 unicode_bidi_context const& context,
128 bool rule_X5c) noexcept;
129
130[[nodiscard]] constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class) noexcept;
131
133 int8_t embedding_level;
134 unicode_bidi_class override_status;
135 bool isolate_status;
136};
137
139public:
140 using iterator = unicode_bidi_char_info_iterator;
141 using const_iterator = unicode_bidi_char_info_const_iterator;
143
144 constexpr unicode_bidi_level_run(iterator begin, iterator end) noexcept : _begin(begin), _end(end) {}
145
146 [[nodiscard]] constexpr iterator begin() const noexcept
147 {
148 return _begin;
149 }
150
151 [[nodiscard]] constexpr iterator end() const noexcept
152 {
153 return _end;
154 }
155
156 [[nodiscard]] constexpr int8_t embedding_level() const noexcept
157 {
158 hi_axiom(_begin != _end);
159 return _begin->embedding_level;
160 }
161
162 [[nodiscard]] constexpr bool ends_with_isolate_initiator() const noexcept
163 {
164 using enum unicode_bidi_class;
165
166 hi_axiom(_begin != _end);
167 auto const& last_char = *(_end - 1);
168 return last_char.direction == LRI || last_char.direction == RLI || last_char.direction == FSI;
169 }
170
171 [[nodiscard]] constexpr bool starts_with_PDI() const noexcept
172 {
173 hi_axiom(_begin != _end);
174 return _begin->direction == unicode_bidi_class::PDI;
175 }
176
177private:
178 iterator _begin;
179 iterator _end;
180};
181
184 using iterator = recursive_iterator<run_container_type::iterator>;
185 using const_iterator = recursive_iterator<run_container_type::const_iterator>;
186
188 unicode_bidi_class sos;
189 unicode_bidi_class eos;
190
191 constexpr unicode_bidi_isolated_run_sequence(unicode_bidi_level_run const& rhs) noexcept :
192 runs({rhs}), sos(unicode_bidi_class::ON), eos(unicode_bidi_class::ON)
193 {
194 }
195
196 [[nodiscard]] constexpr auto begin() noexcept
197 {
198 return recursive_iterator_begin(runs);
199 }
200
201 [[nodiscard]] constexpr auto end() noexcept
202 {
203 return recursive_iterator_end(runs);
204 }
205
206 [[nodiscard]] constexpr auto begin() const noexcept
207 {
208 return recursive_iterator_begin(runs);
209 }
210
211 [[nodiscard]] constexpr auto end() const noexcept
212 {
213 return recursive_iterator_end(runs);
214 }
215
216 [[nodiscard]] constexpr friend auto begin(unicode_bidi_isolated_run_sequence& rhs) noexcept
217 {
218 return rhs.begin();
219 }
220
221 [[nodiscard]] constexpr friend auto begin(unicode_bidi_isolated_run_sequence const& rhs) noexcept
222 {
223 return rhs.begin();
224 }
225
226 [[nodiscard]] constexpr friend auto end(unicode_bidi_isolated_run_sequence& rhs) noexcept
227 {
228 return rhs.end();
229 }
230
231 [[nodiscard]] constexpr friend auto end(unicode_bidi_isolated_run_sequence const& rhs) noexcept
232 {
233 return rhs.end();
234 }
235
236 constexpr void add_run(unicode_bidi_level_run const& run) noexcept
237 {
238 runs.push_back(run);
239 }
240
241 [[nodiscard]] constexpr int8_t embedding_level() const noexcept
242 {
243 hi_axiom(not runs.empty());
244 return runs.front().embedding_level();
245 }
246
247 [[nodiscard]] constexpr unicode_bidi_class embedding_direction() const noexcept
248 {
249 return (embedding_level() % 2) == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
250 }
251
252 [[nodiscard]] constexpr bool ends_with_isolate_initiator() const noexcept
253 {
254 hi_axiom(not runs.empty());
255 return runs.back().ends_with_isolate_initiator();
256 }
257};
258
260 unicode_bidi_isolated_run_sequence::iterator open;
261 unicode_bidi_isolated_run_sequence::iterator close;
262
264 unicode_bidi_isolated_run_sequence::iterator open,
265 unicode_bidi_isolated_run_sequence::iterator close) :
266 open(std::move(open)), close(std::move(close))
267 {
268 }
269
270 [[nodiscard]] constexpr friend auto
271 operator<=>(unicode_bidi_bracket_pair const& lhs, unicode_bidi_bracket_pair const& rhs) noexcept
272 {
273 return lhs.open <=> rhs.open;
274 }
275};
276
277constexpr void unicode_bidi_X1(
278 unicode_bidi_char_info_iterator first,
279 unicode_bidi_char_info_iterator last,
280 int8_t paragraph_embedding_level,
281 unicode_bidi_context const& context) noexcept
282{
283 using enum unicode_bidi_class;
284
285 constexpr int8_t max_depth = 125;
286
287 auto next_even = [](int8_t x) -> int8_t {
288 return (x % 2 == 0) ? x + 2 : x + 1;
289 };
290
291 auto next_odd = [](int8_t x) -> int8_t {
292 return (x % 2 == 1) ? x + 2 : x + 1;
293 };
294
295 long long overflow_isolate_count = 0;
296 long long overflow_embedding_count = 0;
297 long long valid_isolate_count = 0;
298
299 // X1.
300 auto stack = hi::stack<unicode_bidi_stack_element, max_depth + 2>{};
301 stack.emplace_back(paragraph_embedding_level, ON, false);
302
303 for (auto it = first; it != last; ++it) {
304 auto const current_embedding_level = stack.back().embedding_level;
305 auto const current_override_status = stack.back().override_status;
306 auto const next_odd_embedding_level = next_odd(current_embedding_level);
307 auto const next_even_embedding_level = next_even(current_embedding_level);
308
309 auto RLI_implementation = [&] {
310 it->embedding_level = current_embedding_level;
311 if (current_override_status != ON) {
312 it->direction = current_override_status;
313 }
314
315 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
316 ++valid_isolate_count;
317 stack.emplace_back(next_odd_embedding_level, ON, true);
318 } else {
319 ++overflow_isolate_count;
320 }
321 };
322
323 auto LRI_implementation = [&] {
324 it->embedding_level = current_embedding_level;
325 if (current_override_status != ON) {
326 it->direction = current_override_status;
327 }
328
329 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
330 ++valid_isolate_count;
331 stack.emplace_back(next_even_embedding_level, ON, true);
332 } else {
333 ++overflow_isolate_count;
334 }
335 };
336
337 switch (it->direction) {
338 case RLE: // X2. Explicit embeddings
339 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
340 stack.emplace_back(next_odd_embedding_level, ON, false);
341 } else if (overflow_isolate_count == 0) {
342 ++overflow_embedding_count;
343 }
344 break;
345
346 case LRE: // X3. Explicit embeddings
347 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
348 stack.emplace_back(next_even_embedding_level, ON, false);
349 } else if (overflow_isolate_count == 0) {
350 ++overflow_embedding_count;
351 }
352 break;
353
354 case RLO: // X4. Explicit overrides
355 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
356 stack.emplace_back(next_odd_embedding_level, R, false);
357 } else if (overflow_isolate_count == 0) {
358 ++overflow_embedding_count;
359 }
360 break;
361
362 case LRO: // X5. Explicit overrides
363 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
364 stack.emplace_back(next_even_embedding_level, L, false);
365 } else if (overflow_isolate_count == 0) {
366 ++overflow_embedding_count;
367 }
368 break;
369
370 case RLI: // X5a. Isolates
371 RLI_implementation();
372 break;
373
374 case LRI: // X5b. Isolates
375 LRI_implementation();
376 break;
377
378 case FSI:
379 { // X5c. Isolates
380 auto sub_context = context;
381 sub_context.direction_mode = unicode_bidi_context::mode_type::auto_LTR;
382 auto const sub_paragraph_bidi_class = unicode_bidi_P2(it + 1, last, sub_context, true);
383 auto const sub_paragraph_embedding_level = unicode_bidi_P3(sub_paragraph_bidi_class);
384 if (sub_paragraph_embedding_level == 0) {
385 LRI_implementation();
386 } else {
387 RLI_implementation();
388 }
389 }
390 break;
391
392 case PDI: // X6a. Terminating Isolates
393 if (overflow_isolate_count > 0) {
394 --overflow_isolate_count;
395 } else if (valid_isolate_count == 0) {
396 // Mismatched PDI, do nothing.
397 ;
398 } else {
399 overflow_embedding_count = 0;
400 while (stack.back().isolate_status == false) {
401 stack.pop_back();
402 }
403 stack.pop_back();
404 --valid_isolate_count;
405 }
406
407 it->embedding_level = stack.back().embedding_level;
408 if (stack.back().override_status != ON) {
409 it->direction = stack.back().override_status;
410 }
411 break;
412
413 case PDF: // X7. Terminating Embeddings and Overrides
414 if (overflow_isolate_count > 0) {
415 // PDF is in scope of isolate, wait until the isolate is terminated.
416 ;
417 } else if (overflow_embedding_count > 0) {
418 --overflow_embedding_count;
419 } else if (stack.back().isolate_status == false && stack.size() >= 2) {
420 stack.pop_back();
421 } else {
422 // PDF does not match embedding character.
423 }
424 break;
425
426 case B: // X8. End of Paragraph
427 it->embedding_level = paragraph_embedding_level;
428 return;
429
430 case BN: // X6. Ignore
431 break;
432
433 default: // X6
434 it->embedding_level = current_embedding_level;
435 if (current_override_status != ON) {
436 it->direction = current_override_status;
437 }
438 }
439 }
440}
441
442[[nodiscard]] constexpr unicode_bidi_char_info_iterator
443unicode_bidi_X9(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept
444{
445 return std::remove_if(first, last, [](auto const& character) {
446 using enum unicode_bidi_class;
447
448 return character.direction == RLE || character.direction == LRE || character.direction == RLO ||
449 character.direction == LRO || character.direction == PDF || character.direction == BN;
450 });
451}
452
453constexpr void unicode_bidi_W1(unicode_bidi_isolated_run_sequence& sequence) noexcept
454{
455 using enum unicode_bidi_class;
456
457 auto previous_bidi_class = sequence.sos;
458 for (auto& char_info : sequence) {
459 if (char_info.direction == NSM) {
460 switch (previous_bidi_class) {
461 case LRI:
462 case RLI:
463 case FSI:
464 case PDI:
465 char_info.direction = ON;
466 break;
467 default:
468 char_info.direction = previous_bidi_class;
469 break;
470 }
471 }
472
473 previous_bidi_class = char_info.direction;
474 }
475}
476
477constexpr void unicode_bidi_W2(unicode_bidi_isolated_run_sequence& sequence) noexcept
478{
479 using enum unicode_bidi_class;
480
481 auto last_strong_direction = sequence.sos;
482 for (auto& char_info : sequence) {
483 switch (char_info.direction) {
484 case R:
485 case L:
486 case AL:
487 last_strong_direction = char_info.direction;
488 break;
489 case EN:
490 if (last_strong_direction == AL) {
491 char_info.direction = AN;
492 }
493 break;
494 default:;
495 }
496 }
497}
498
499constexpr void unicode_bidi_W3(unicode_bidi_isolated_run_sequence& sequence) noexcept
500{
501 using enum unicode_bidi_class;
502
503 for (auto& char_info : sequence) {
504 if (char_info.direction == AL) {
505 char_info.direction = R;
506 }
507 }
508}
509
510constexpr void unicode_bidi_W4(unicode_bidi_isolated_run_sequence& sequence) noexcept
511{
512 using enum unicode_bidi_class;
513
514 unicode_bidi_char_info *back1 = nullptr;
515 unicode_bidi_char_info *back2 = nullptr;
516 for (auto& char_info : sequence) {
517 if (char_info.direction == EN && back2 != nullptr && back2->direction == EN && back1 != nullptr &&
518 (back1->direction == ES || back1->direction == CS)) {
519 back1->direction = EN;
520 }
521 if (char_info.direction == AN && back2 != nullptr && back2->direction == AN && back1 != nullptr &&
522 back1->direction == CS) {
523 back1->direction = AN;
524 }
525
526 back2 = std::exchange(back1, &char_info);
527 }
528}
529
530constexpr void unicode_bidi_W5(unicode_bidi_isolated_run_sequence& sequence) noexcept
531{
532 using enum unicode_bidi_class;
533
534 auto ET_start = end(sequence);
535 auto starts_with_EN = false;
536
537 for (auto it = begin(sequence); it != end(sequence); ++it) {
538 auto& char_info = *it;
539
540 switch (char_info.direction) {
541 case ET:
542 if (starts_with_EN) {
543 char_info.direction = EN;
544 } else if (ET_start == end(sequence)) {
545 ET_start = it;
546 }
547 break;
548
549 case EN:
550 starts_with_EN = true;
551 if (ET_start != end(sequence)) {
552 for (auto jt = ET_start; jt != it; ++jt) {
553 jt->direction = EN;
554 }
555 ET_start = end(sequence);
556 }
557 break;
558
559 default:
560 starts_with_EN = false;
561 ET_start = end(sequence);
562 }
563 }
564}
565
566constexpr void unicode_bidi_W6(unicode_bidi_isolated_run_sequence& sequence) noexcept
567{
568 using enum unicode_bidi_class;
569
570 for (auto& char_info : sequence) {
571 if (char_info.direction == ET || char_info.direction == ES || char_info.direction == CS) {
572 char_info.direction = ON;
573 }
574 }
575}
576
577constexpr void unicode_bidi_W7(unicode_bidi_isolated_run_sequence& sequence) noexcept
578{
579 using enum unicode_bidi_class;
580
581 auto last_strong_direction = sequence.sos;
582 for (auto& char_info : sequence) {
583 switch (char_info.direction) {
584 case R:
585 case L:
586 last_strong_direction = char_info.direction;
587 break;
588 case EN:
589 if (last_strong_direction == L) {
590 char_info.direction = L;
591 }
592 break;
593 default:;
594 }
595 }
596}
597
598constexpr std::vector<unicode_bidi_bracket_pair> unicode_bidi_BD16(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
599{
600 struct bracket_start {
601 unicode_bidi_isolated_run_sequence::iterator it;
602 char32_t mirrored_bracket;
603 };
604
605 using enum unicode_bidi_class;
606
608 auto stack = hi::stack<bracket_start, 63>{};
609
610 for (auto it = begin(isolated_run_sequence); it != end(isolated_run_sequence); ++it) {
611 if (it->direction == ON) {
612 switch (it->bracket_type) {
613 case unicode_bidi_paired_bracket_type::o:
614 if (stack.full()) {
615 // Stop processing
616 std::sort(pairs.begin(), pairs.end());
617 return pairs;
618
619 } else {
620 // If there is a canonical equivalent of the opening bracket, find it's mirrored glyph
621 // to compare with the closing bracket.
622 auto mirrored_glyph = ucd_get_bidi_mirroring_glyph(it->code_point);
623 if (auto const canonical_equivalent = ucd_get_decomposition(it->code_point).canonical_equivalent()) {
624 hi_axiom(ucd_get_bidi_paired_bracket_type(*canonical_equivalent) == unicode_bidi_paired_bracket_type::o);
625
626 mirrored_glyph = ucd_get_bidi_mirroring_glyph(*canonical_equivalent);
627 }
628
629 stack.emplace_back(it, mirrored_glyph);
630 }
631 break;
632
633 case unicode_bidi_paired_bracket_type::c:
634 {
635 auto const canonical_equivalent = ucd_get_decomposition(it->code_point).canonical_equivalent();
636 auto jt = stack.end();
637 while (jt != stack.begin()) {
638 --jt;
639
640 if (jt->mirrored_bracket == it->code_point or
641 (canonical_equivalent and jt->mirrored_bracket == *canonical_equivalent)) {
642 pairs.emplace_back(jt->it, it);
643 stack.pop_back(jt);
644 break;
645 }
646 }
647 }
648 break;
649
650 default:;
651 }
652 }
653 }
654
655 std::sort(pairs.begin(), pairs.end());
656 return pairs;
657}
658
659[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_N0_strong(unicode_bidi_class direction)
660{
661 using enum unicode_bidi_class;
662
663 switch (direction) {
664 case L:
665 return L;
666 case R:
667 case EN:
668 case AN:
669 return R;
670 default:
671 return ON;
672 }
673}
674
675[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_N0_preceding_strong_type(
676 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
677 unicode_bidi_isolated_run_sequence::iterator const& open_bracket) noexcept
678{
679 using enum unicode_bidi_class;
680
681 auto it = open_bracket;
682 while (it != begin(isolated_run_sequence)) {
683 --it;
684
685 if (auto const direction = unicode_bidi_N0_strong(it->direction); direction != ON) {
686 return direction;
687 }
688 }
689
690 return isolated_run_sequence.sos;
691}
692
693[[nodiscard]] constexpr unicode_bidi_class
694unicode_bidi_N0_enclosed_strong_type(unicode_bidi_bracket_pair const& pair, unicode_bidi_class embedding_direction) noexcept
695{
696 using enum unicode_bidi_class;
697
698 auto opposite_direction = ON;
699 for (auto it = pair.open + 1; it != pair.close; ++it) {
700 auto const direction = unicode_bidi_N0_strong(it->direction);
701 if (direction == ON) {
702 continue;
703 }
704 if (direction == embedding_direction) {
705 return direction;
706 }
707 opposite_direction = direction;
708 }
709
710 return opposite_direction;
711}
712
713constexpr void unicode_bidi_N0(unicode_bidi_isolated_run_sequence& isolated_run_sequence, unicode_bidi_context const& context)
714{
715 using enum unicode_bidi_class;
716
717 if (not context.enable_mirrored_brackets) {
718 return;
719 }
720
721 auto bracket_pairs = unicode_bidi_BD16(isolated_run_sequence);
722 auto const embedding_direction = isolated_run_sequence.embedding_direction();
723
724 for (auto& pair : bracket_pairs) {
725 auto pair_direction = unicode_bidi_N0_enclosed_strong_type(pair, embedding_direction);
726
727 if (pair_direction == ON) {
728 continue;
729 }
730
731 if (pair_direction != embedding_direction) {
732 pair_direction = unicode_bidi_N0_preceding_strong_type(isolated_run_sequence, pair.open);
733
734 if (pair_direction == embedding_direction || pair_direction == ON) {
735 pair_direction = embedding_direction;
736 }
737 }
738
739 pair.open->direction = pair_direction;
740 pair.close->direction = pair_direction;
741
742 for (auto it = pair.open + 1; it != pair.close; ++it) {
743 if (it->bidi_class != NSM) {
744 break;
745 }
746 it->direction = pair_direction;
747 }
748
749 for (auto it = pair.close + 1; it != end(isolated_run_sequence); ++it) {
750 if (it->bidi_class != NSM) {
751 break;
752 }
753 it->direction = pair_direction;
754 }
755 }
756}
757
758constexpr void unicode_bidi_N1(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
759{
760 using enum unicode_bidi_class;
761
762 auto direction_before_NI = isolated_run_sequence.sos;
763 auto first_NI = end(isolated_run_sequence);
764
765 for (auto it = begin(isolated_run_sequence); it != end(isolated_run_sequence); ++it) {
766 auto const& char_info = *it;
767 if (first_NI != end(isolated_run_sequence)) {
768 if (!is_NI(char_info.direction)) {
769 auto const direction_after_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
770
771 if ((direction_before_NI == L || direction_before_NI == R) && direction_before_NI == direction_after_NI) {
772 std::for_each(first_NI, it, [direction_before_NI](auto& item) {
773 item.direction = direction_before_NI;
774 });
775 }
776
777 first_NI = end(isolated_run_sequence);
778 direction_before_NI = direction_after_NI;
779 }
780
781 } else if (is_NI(char_info.direction)) {
782 first_NI = it;
783 } else {
784 direction_before_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
785 }
786 }
787
788 if (first_NI != end(isolated_run_sequence) && direction_before_NI == isolated_run_sequence.eos) {
789 std::for_each(first_NI, end(isolated_run_sequence), [direction_before_NI](auto& item) {
790 item.direction = direction_before_NI;
791 });
792 }
793}
794
795constexpr void unicode_bidi_N2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
796{
797 auto const embedding_direction = isolated_run_sequence.embedding_direction();
798
799 for (auto& char_info : isolated_run_sequence) {
800 if (is_NI(char_info.direction)) {
801 char_info.direction = embedding_direction;
802 }
803 }
804}
805
806constexpr void unicode_bidi_I1_I2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
807{
808 using enum unicode_bidi_class;
809
810 for (auto& char_info : isolated_run_sequence) {
811 if ((char_info.embedding_level % 2) == 0) {
812 // I1
813 if (char_info.direction == R) {
814 char_info.embedding_level += 1;
815 } else if (char_info.direction == AN || char_info.direction == EN) {
816 char_info.embedding_level += 2;
817 }
818 } else {
819 // I2
820 if (char_info.direction == L || char_info.direction == AN || char_info.direction == EN) {
821 char_info.embedding_level += 1;
822 }
823 }
824 }
825}
826
828unicode_bidi_BD7(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept
829{
831
832 auto embedding_level = int8_t{0};
833 auto run_start = first;
834 for (auto it = first; it != last; ++it) {
835 if (it == first) {
836 embedding_level = it->embedding_level;
837
838 } else if (it->embedding_level != embedding_level) {
839 embedding_level = it->embedding_level;
840
841 level_runs.emplace_back(run_start, it);
842 run_start = it;
843 }
844 }
845 if (run_start != last) {
846 level_runs.emplace_back(run_start, last);
847 }
848
849 return level_runs;
850}
851
853unicode_bidi_BD13(std::vector<unicode_bidi_level_run> level_runs) noexcept
854{
856
857 std::reverse(begin(level_runs), end(level_runs));
858 while (!level_runs.empty()) {
859 auto isolated_run_sequence = unicode_bidi_isolated_run_sequence(level_runs.back());
860 level_runs.pop_back();
861
862 while (isolated_run_sequence.ends_with_isolate_initiator() && !level_runs.empty()) {
863 // Search for matching PDI in the run_levels. This should have the same embedding level.
864 auto isolation_level = 1;
865 for (auto it = std::rbegin(level_runs); it != std::rend(level_runs); ++it) {
866 if (it->starts_with_PDI() && --isolation_level == 0) {
867 hi_axiom(it->embedding_level() == isolated_run_sequence.embedding_level());
868 isolated_run_sequence.add_run(*it);
869 level_runs.erase(std::next(it).base());
870 break;
871 }
872 if (it->ends_with_isolate_initiator()) {
873 ++isolation_level;
874 }
875 }
876
877 if (isolation_level != 0) {
878 // No PDI that matches the isolate initiator of this isolated run sequence.
879 break;
880 }
881 }
882
883 r.push_back(std::move(isolated_run_sequence));
884 }
885
886 return r;
887}
888
889[[nodiscard]] constexpr std::pair<unicode_bidi_class, unicode_bidi_class> unicode_bidi_X10_sos_eos(
890 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
891 unicode_bidi_char_info_iterator first,
892 unicode_bidi_char_info_iterator last,
893 int8_t paragraph_embedding_level) noexcept
894{
895 if (begin(isolated_run_sequence) != end(isolated_run_sequence)) {
896 // The calculations on the iterator for last_char_it is required because
897 // calling child() on an end iterator is undefined behavior.
898 auto const first_char_it = begin(isolated_run_sequence).child();
899 auto const last_char_it = (end(isolated_run_sequence) - 1).child() + 1;
900
901 auto const has_char_before = first_char_it != first;
902 auto const has_char_after = last_char_it != last;
903
904 auto const start_embedding_level = std::max(
905 isolated_run_sequence.embedding_level(),
906 has_char_before ? (first_char_it - 1)->embedding_level : paragraph_embedding_level);
907 auto const end_embedding_level = std::max(
908 isolated_run_sequence.embedding_level(),
909 has_char_after && !isolated_run_sequence.ends_with_isolate_initiator() ? last_char_it->embedding_level :
910 paragraph_embedding_level);
911
912 return {
913 (start_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L,
914 (end_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L};
915 } else {
916 return {
917 (paragraph_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L,
918 (paragraph_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L};
919 }
920}
921
922constexpr void unicode_bidi_X10(
923 unicode_bidi_char_info_iterator first,
924 unicode_bidi_char_info_iterator last,
925 int8_t paragraph_embedding_level,
926 unicode_bidi_context const& context) noexcept
927{
928 auto isolated_run_sequence_set = unicode_bidi_BD13(unicode_bidi_BD7(first, last));
929
930 // All sos and eos calculations must be done before W*, N*, I* parts are executed,
931 // since those will change the embedding levels of the characters outside of the
932 // current isolated_run_sequence that the unicode_bidi_X10_sos_eos() depends on.
933 for (auto& isolated_run_sequence : isolated_run_sequence_set) {
934 std::tie(isolated_run_sequence.sos, isolated_run_sequence.eos) =
935 unicode_bidi_X10_sos_eos(isolated_run_sequence, first, last, paragraph_embedding_level);
936 }
937
938 for (auto& isolated_run_sequence : isolated_run_sequence_set) {
939 unicode_bidi_W1(isolated_run_sequence);
940 unicode_bidi_W2(isolated_run_sequence);
941 unicode_bidi_W3(isolated_run_sequence);
942 unicode_bidi_W4(isolated_run_sequence);
943 unicode_bidi_W5(isolated_run_sequence);
944 unicode_bidi_W6(isolated_run_sequence);
945 unicode_bidi_W7(isolated_run_sequence);
946 unicode_bidi_N0(isolated_run_sequence, context);
947 unicode_bidi_N1(isolated_run_sequence);
948 unicode_bidi_N2(isolated_run_sequence);
949 unicode_bidi_I1_I2(isolated_run_sequence);
950 }
951}
952
953[[nodiscard]] constexpr std::pair<int8_t, int8_t> unicode_bidi_L1(
954 unicode_bidi_char_info_iterator first,
955 unicode_bidi_char_info_iterator last,
956 int8_t paragraph_embedding_level) noexcept
957{
958 using enum unicode_bidi_class;
959
960 auto lowest_odd = std::numeric_limits<int8_t>::max();
961 auto highest = paragraph_embedding_level;
962 auto preceding_is_segment = true;
963
964 auto it = last;
965 while (it != first) {
966 --it;
967
968 auto bidi_class = it->bidi_class;
969
970 if (bidi_class == B || bidi_class == S) {
971 it->embedding_level = paragraph_embedding_level;
972 preceding_is_segment = true;
973
974 } else if (preceding_is_segment && (bidi_class == WS || is_isolate_formatter(bidi_class))) {
975 it->embedding_level = paragraph_embedding_level;
976 preceding_is_segment = true;
977
978 } else {
979 highest = std::max(highest, it->embedding_level);
980 if ((it->embedding_level % 2) == 1) {
981 lowest_odd = std::min(lowest_odd, it->embedding_level);
982 }
983
984 preceding_is_segment = false;
985 }
986 }
987
988 if ((paragraph_embedding_level % 2) == 1) {
989 lowest_odd = std::min(lowest_odd, paragraph_embedding_level);
990 }
991
992 if (lowest_odd > highest) {
993 // If there where no odd levels below the highest level
994 if (highest % 2 == 1) {
995 // We need to reverse at least once if the highest was odd.
996 lowest_odd = highest;
997 } else {
998 // We need to reverse at least twice if the highest was even.
999 // This may yield a negative lowest_odd.
1000 lowest_odd = highest - 1;
1001 }
1002 }
1003
1004 return {lowest_odd, highest};
1005}
1006
1007constexpr void unicode_bidi_L2(
1008 unicode_bidi_char_info_iterator first,
1009 unicode_bidi_char_info_iterator last,
1010 int8_t lowest_odd,
1011 int8_t highest) noexcept
1012{
1013 for (int8_t level = highest; level >= lowest_odd; --level) {
1014 auto sequence_start = last;
1015 for (auto it = first; it != last; ++it) {
1016 if (sequence_start == last) {
1017 if (it->embedding_level >= level) {
1018 sequence_start = it;
1019 }
1020 } else if (it->embedding_level < level) {
1021 std::reverse(sequence_start, it);
1022 sequence_start = last;
1023 }
1024 }
1025 if (sequence_start != last) {
1026 std::reverse(sequence_start, last);
1027 }
1028 }
1029}
1030
1031constexpr void unicode_bidi_L3(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept {}
1032
1033[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2_default(unicode_bidi_context const& context) noexcept
1034{
1035 if (context.direction_mode == unicode_bidi_context::mode_type::auto_LTR) {
1036 return unicode_bidi_class::L;
1037 } else if (context.direction_mode == unicode_bidi_context::mode_type::auto_RTL) {
1038 return unicode_bidi_class::R;
1039 } else {
1040 hi_no_default();
1041 }
1042}
1043
1044[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2(
1045 unicode_bidi_char_info_iterator first,
1046 unicode_bidi_char_info_iterator last,
1047 unicode_bidi_context const& context,
1048 bool rule_X5c) noexcept
1049{
1050 using enum unicode_bidi_class;
1051
1052 if (context.direction_mode == unicode_bidi_context::mode_type::LTR) {
1053 return unicode_bidi_class::L;
1054 } else if (context.direction_mode == unicode_bidi_context::mode_type::RTL) {
1055 return unicode_bidi_class::R;
1056 }
1057
1058 long long isolate_level = 0;
1059 for (auto it = first; it != last; ++it) {
1060 switch (it->direction) {
1061 case L:
1062 case AL:
1063 case R:
1064 if (isolate_level == 0) {
1065 return it->direction;
1066 }
1067 break;
1068 case LRI:
1069 case RLI:
1070 case FSI:
1071 ++isolate_level;
1072 break;
1073 case PDI:
1074 if (isolate_level > 0) {
1075 --isolate_level;
1076 } else if (rule_X5c) {
1077 // End at the matching PDI, when recursing for rule X5c.
1078 return unicode_bidi_P2_default(context);
1079 }
1080 break;
1081 default:;
1082 }
1083 }
1084 return unicode_bidi_P2_default(context);
1085}
1086
1087[[nodiscard]] constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class) noexcept
1088{
1089 return wide_cast<int8_t>(paragraph_bidi_class == unicode_bidi_class::AL or paragraph_bidi_class == unicode_bidi_class::R);
1090}
1091
1092constexpr void unicode_bidi_P1_line(
1093 unicode_bidi_char_info_iterator first,
1094 unicode_bidi_char_info_iterator last,
1095 int8_t paragraph_embedding_level,
1096 unicode_bidi_context const& context) noexcept
1097{
1098 auto const[lowest_odd, highest] = unicode_bidi_L1(first, last, paragraph_embedding_level);
1099 unicode_bidi_L2(first, last, lowest_odd, highest);
1100 unicode_bidi_L3(first, last);
1101 // L4 is delayed after the original array has been shuffled.
1102}
1103
1104[[nodiscard]] constexpr std::pair<int8_t, unicode_bidi_class> unicode_bidi_P2_P3(
1105 unicode_bidi_char_info_iterator first,
1106 unicode_bidi_char_info_iterator last,
1107 unicode_bidi_context const& context) noexcept
1108{
1109 auto const default_paragraph_direction = unicode_bidi_P2(first, last, context, false);
1110 auto const paragraph_embedding_level = unicode_bidi_P3(default_paragraph_direction);
1111 auto const paragraph_direction = paragraph_embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1112 return {paragraph_embedding_level, paragraph_direction};
1113}
1114
1115[[nodiscard]] constexpr std::pair<unicode_bidi_char_info_iterator, unicode_bidi_class> unicode_bidi_P1_paragraph(
1116 unicode_bidi_char_info_iterator first,
1117 unicode_bidi_char_info_iterator last,
1118 unicode_bidi_context const& context) noexcept
1119{
1120 auto const[paragraph_embedding_level, paragraph_direction] = unicode_bidi_P2_P3(first, last, context);
1121
1122 unicode_bidi_X1(first, last, paragraph_embedding_level, context);
1123 last = unicode_bidi_X9(first, last);
1124 unicode_bidi_X10(first, last, paragraph_embedding_level, context);
1125
1126 auto line_begin = first;
1127 for (auto it = first; it != last; ++it) {
1128 auto const general_category = ucd_get_general_category(it->code_point);
1129 if (context.enable_line_separator and general_category == unicode_general_category::Zl) {
1130 auto const line_end = it + 1;
1131 unicode_bidi_P1_line(line_begin, line_end, paragraph_embedding_level, context);
1132 line_begin = line_end;
1133 }
1134 }
1135
1136 if (line_begin != last) {
1137 unicode_bidi_P1_line(line_begin, last, paragraph_embedding_level, context);
1138 }
1139
1140 return {last, paragraph_direction};
1141}
1142
1144 unicode_bidi_char_info_iterator first,
1145 unicode_bidi_char_info_iterator last,
1146 unicode_bidi_context const& context) noexcept
1147{
1148 auto it = first;
1149 auto paragraph_begin = it;
1150 auto paragraph_directions = std::vector<unicode_bidi_class>{};
1151 while (it != last) {
1152 if (it->direction == unicode_bidi_class::B) {
1153 auto const paragraph_end = it + 1;
1154 auto const[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, paragraph_end, context);
1155 paragraph_directions.push_back(paragraph_bidi_class);
1156
1157 // Move the removed items of the paragraph to the end of the text.
1158 std::rotate(new_paragraph_end, paragraph_end, last);
1159 last -= std::distance(new_paragraph_end, paragraph_end);
1160
1161 paragraph_begin = it = new_paragraph_end;
1162 } else {
1163 ++it;
1164 }
1165 }
1166
1167 if (paragraph_begin != last) {
1168 auto const[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, last, context);
1169 paragraph_directions.push_back(paragraph_bidi_class);
1170 last = new_paragraph_end;
1171 }
1172
1173 return {last, std::move(paragraph_directions)};
1174}
1175
1176template<typename OutputIt, typename SetCodePoint, typename SetTextDirection>
1177constexpr void unicode_bidi_L4(
1178 unicode_bidi_char_info_iterator first,
1179 unicode_bidi_char_info_iterator last,
1180 OutputIt output_it,
1181 SetCodePoint set_code_point,
1182 SetTextDirection set_text_direction) noexcept
1183{
1184 for (auto it = first; it != last; ++it, ++output_it) {
1185 auto const text_direction = it->embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1186 set_text_direction(*output_it, text_direction);
1187 if (it->direction == unicode_bidi_class::R and it->bracket_type != unicode_bidi_paired_bracket_type::n) {
1188 set_code_point(*output_it, ucd_get_bidi_mirroring_glyph(it->code_point));
1189 }
1190 }
1191}
1192
1193} // namespace detail
1194
1218template<typename It, typename GetCodePoint, typename SetCodePoint, typename SetTextDirection>
1220 It first,
1221 It last,
1222 GetCodePoint get_code_point,
1223 SetCodePoint set_code_point,
1224 SetTextDirection set_text_direction,
1225 unicode_bidi_context const& context = {})
1226{
1227 auto proxy = detail::unicode_bidi_char_info_vector{};
1228 proxy.reserve(std::distance(first, last));
1229
1230 std::size_t index = 0;
1231 for (auto it = first; it != last; ++it) {
1232 proxy.emplace_back(index++, get_code_point(*it));
1233 }
1234
1235 auto [proxy_last, paragraph_directions] = detail::unicode_bidi_P1(begin(proxy), end(proxy), context);
1236 last = shuffle_by_index(first, last, begin(proxy), proxy_last, [](auto const& item) {
1237 return item.index;
1238 });
1239
1240 detail::unicode_bidi_L4(
1241 begin(proxy),
1242 proxy_last,
1243 first,
1244 std::forward<SetCodePoint>(set_code_point),
1245 std::forward<SetTextDirection>(set_text_direction));
1246 return {last, std::move(paragraph_directions)};
1247}
1248
1257template<typename It, typename GetCodePoint>
1258[[nodiscard]] constexpr unicode_bidi_class
1259unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const& context = {})
1260{
1261 auto proxy = detail::unicode_bidi_char_info_vector{};
1262 proxy.reserve(std::distance(first, last));
1263
1264 std::size_t index = 0;
1265 for (auto it = first; it != last; ++it) {
1266 proxy.emplace_back(index++, get_code_point(*it));
1267 if (proxy.back().direction == unicode_bidi_class::B) {
1268 // Break early when end-of-paragraph symbol is found.
1269 break;
1270 }
1271 }
1272
1273 return detail::unicode_bidi_P2_P3(begin(proxy), end(proxy), context).second;
1274}
1275
1286template<typename It, typename EndIt, typename CodePointFunc>
1287constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const& code_point_func)
1288{
1289 return std::remove_if(first, last, [&](auto const& item) {
1290 auto const code_point = code_point_func(item);
1291 auto const bidi_class = ucd_get_bidi_class(code_point);
1292 return is_control(bidi_class);
1293 });
1294}
1295
1296} // namespace hi::inline v1
constexpr char32_t ucd_get_bidi_mirroring_glyph(char32_t code_point) noexcept
Get the bidi-mirroring-glyph for a code-point.
Definition ucd_bidi_mirroring_glyphs.hpp:176
constexpr ucd_decomposition_info ucd_get_decomposition(char32_t code_point) noexcept
Get the decomposition info of a code-point.
Definition ucd_decompositions.hpp:4803
unicode_bidi_class
Bidirectional class Unicode Standard Annex #9: https://unicode.org/reports/tr9/.
Definition ucd_bidi_classes.hpp:861
@ level
The child widget stays at the same elevation and layer.
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
auto shuffle_by_index(auto first, auto last, auto indices_first, auto indices_last, auto index_op) noexcept
Shuffle a container based on a list of indices.
Definition algorithm_misc.hpp:265
constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const &code_point_func)
Removes control characters which will not survive the bidi-algorithm.
Definition unicode_bidi.hpp:1287
constexpr std::pair< It, std::vector< unicode_bidi_class > > unicode_bidi(It first, It last, GetCodePoint get_code_point, SetCodePoint set_code_point, SetTextDirection set_text_direction, unicode_bidi_context const &context={})
Reorder a given range of characters based on the unicode_bidi algorithm.
Definition unicode_bidi.hpp:1219
constexpr unicode_bidi_class unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const &context={})
Get the unicode bidi direction for the first paragraph and context.
Definition unicode_bidi.hpp:1259
constexpr std::optional< char32_t > canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition ucd_decompositions.hpp:4790
Definition unicode_bidi.hpp:29
Definition unicode_bidi.hpp:56
unicode_bidi_class direction
Current computed direction of the code-point.
Definition unicode_bidi.hpp:74
constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept
Constructor for testing to bypass normal initialization.
Definition unicode_bidi.hpp:97
int8_t embedding_level
The embedding level.
Definition unicode_bidi.hpp:69
unicode_bidi_class bidi_class
The original bidi class of the code-point.
Definition unicode_bidi.hpp:79
unicode_bidi_paired_bracket_type bracket_type
The type of bidi-paired-bracket.
Definition unicode_bidi.hpp:83
std::size_t index
Index from the first character in the original list.
Definition unicode_bidi.hpp:59
char32_t code_point
The current code point.
Definition unicode_bidi.hpp:64
Definition unicode_bidi.hpp:112
Definition unicode_bidi.hpp:132
Definition unicode_bidi.hpp:138
Definition unicode_bidi.hpp:182
Definition unicode_bidi.hpp:259
T back(T... args)
T begin(T... args)
T distance(T... args)
T emplace_back(T... args)
T empty(T... args)
T end(T... args)
T erase(T... args)
T for_each(T... args)
T front(T... args)
T max(T... args)
T min(T... args)
T move(T... args)
T next(T... args)
T pop_back(T... args)
T push_back(T... args)
T remove_if(T... args)
T reverse(T... args)
T rotate(T... args)
T sort(T... args)
T tie(T... args)