HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_bidi.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "ucd_bidi_classes.hpp"
8#include "ucd_bidi_paired_bracket_types.hpp"
9#include "ucd_bidi_mirroring_glyphs.hpp"
10#include "ucd_decompositions.hpp"
11#include "ucd_general_categories.hpp"
12#include "../utility/module.hpp"
13#include "../stack.hpp"
14#include "../recursive_iterator.hpp"
15
16namespace hi::inline v1 {
17
19 enum class mode_type : uint8_t { LTR, RTL, auto_LTR, auto_RTL };
20
21 mode_type direction_mode = mode_type::auto_LTR;
22 bool enable_mirrored_brackets = true;
23 bool enable_line_separator = true;
24
25 constexpr unicode_bidi_context() noexcept = default;
26 constexpr unicode_bidi_context(unicode_bidi_context const&) noexcept = default;
27 constexpr unicode_bidi_context(unicode_bidi_context&&) noexcept = default;
28 constexpr unicode_bidi_context& operator=(unicode_bidi_context const&) noexcept = default;
29 constexpr unicode_bidi_context& operator=(unicode_bidi_context&&) noexcept = default;
30
31 constexpr unicode_bidi_context(unicode_bidi_class text_direction) noexcept
32 {
33 if (text_direction == unicode_bidi_class::L) {
34 direction_mode = mode_type::auto_LTR;
35 } else if (text_direction == unicode_bidi_class::R) {
36 direction_mode = mode_type::auto_RTL;
37 } else {
39 }
40 }
41};
42
43namespace detail {
44
49
53 char32_t code_point;
54
59
63 unicode_bidi_class direction;
64
68 unicode_bidi_class bidi_class;
69
72 unicode_bidi_paired_bracket_type bracket_type;
73
74 [[nodiscard]] constexpr unicode_bidi_char_info(std::size_t index, char32_t code_point) noexcept
75 {
76 this->index = index;
77 this->code_point = code_point;
78 this->embedding_level = 0;
79 this->direction = this->bidi_class = ucd_get_bidi_class(code_point);
80 this->bracket_type = ucd_get_bidi_paired_bracket_type(code_point);
81 }
82
86 [[nodiscard]] constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept :
87 index(index),
88 code_point(U'\ufffd'),
89 direction(bidi_class),
90 bidi_class(bidi_class),
91 bracket_type(unicode_bidi_paired_bracket_type::n),
92 embedding_level(0)
93 {
94 }
95};
96
97using unicode_bidi_char_info_vector = std::vector<unicode_bidi_char_info>;
98using unicode_bidi_char_info_iterator = unicode_bidi_char_info_vector::iterator;
99using unicode_bidi_char_info_const_iterator = unicode_bidi_char_info_vector::const_iterator;
100
103
104 characters_type characters;
105
106 template<typename... Args>
107 constexpr void emplace_character(Args&&...args) noexcept
108 {
109 characters.emplace_back(std::forward<Args>(args)...);
110 }
111};
112
113[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2(
114 unicode_bidi_char_info_iterator first,
115 unicode_bidi_char_info_iterator last,
116 unicode_bidi_context const& context,
117 bool rule_X5c) noexcept;
118
119[[nodiscard]] constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class) noexcept;
120
122 int8_t embedding_level;
123 unicode_bidi_class override_status;
124 bool isolate_status;
125
126 constexpr unicode_bidi_stack_element(int8_t embedding_level, unicode_bidi_class override_status, bool isolate_status) noexcept
127 :
128 embedding_level(embedding_level), override_status(override_status), isolate_status(isolate_status)
129 {
130 }
131};
132
134public:
135 using iterator = unicode_bidi_char_info_iterator;
136 using const_iterator = unicode_bidi_char_info_const_iterator;
138
139 constexpr unicode_bidi_level_run(iterator begin, iterator end) noexcept : _begin(begin), _end(end) {}
140
141 [[nodiscard]] constexpr iterator begin() const noexcept
142 {
143 return _begin;
144 }
145
146 [[nodiscard]] constexpr iterator end() const noexcept
147 {
148 return _end;
149 }
150
151 [[nodiscard]] constexpr int8_t embedding_level() const noexcept
152 {
153 hi_axiom(_begin != _end);
154 return _begin->embedding_level;
155 }
156
157 [[nodiscard]] constexpr bool ends_with_isolate_initiator() const noexcept
158 {
159 using enum unicode_bidi_class;
160
161 hi_axiom(_begin != _end);
162 hilet& last_char = *(_end - 1);
163 return last_char.direction == LRI || last_char.direction == RLI || last_char.direction == FSI;
164 }
165
166 [[nodiscard]] constexpr bool starts_with_PDI() const noexcept
167 {
168 hi_axiom(_begin != _end);
169 return _begin->direction == unicode_bidi_class::PDI;
170 }
171
172private:
173 iterator _begin;
174 iterator _end;
175};
176
179 using iterator = recursive_iterator<run_container_type::iterator>;
180 using const_iterator = recursive_iterator<run_container_type::const_iterator>;
181
183 unicode_bidi_class sos;
184 unicode_bidi_class eos;
185
186 constexpr unicode_bidi_isolated_run_sequence(unicode_bidi_level_run const& rhs) noexcept :
187 runs({rhs}), sos(unicode_bidi_class::ON), eos(unicode_bidi_class::ON)
188 {
189 }
190
191 [[nodiscard]] constexpr auto begin() noexcept
192 {
193 return recursive_iterator_begin(runs);
194 }
195
196 [[nodiscard]] constexpr auto end() noexcept
197 {
198 return recursive_iterator_end(runs);
199 }
200
201 [[nodiscard]] constexpr auto begin() const noexcept
202 {
203 return recursive_iterator_begin(runs);
204 }
205
206 [[nodiscard]] constexpr auto end() const noexcept
207 {
208 return recursive_iterator_end(runs);
209 }
210
211 [[nodiscard]] constexpr friend auto begin(unicode_bidi_isolated_run_sequence& rhs) noexcept
212 {
213 return rhs.begin();
214 }
215
216 [[nodiscard]] constexpr friend auto begin(unicode_bidi_isolated_run_sequence const& rhs) noexcept
217 {
218 return rhs.begin();
219 }
220
221 [[nodiscard]] constexpr friend auto end(unicode_bidi_isolated_run_sequence& rhs) noexcept
222 {
223 return rhs.end();
224 }
225
226 [[nodiscard]] constexpr friend auto end(unicode_bidi_isolated_run_sequence const& rhs) noexcept
227 {
228 return rhs.end();
229 }
230
231 constexpr void add_run(unicode_bidi_level_run const& run) noexcept
232 {
233 runs.push_back(run);
234 }
235
236 [[nodiscard]] constexpr int8_t embedding_level() const noexcept
237 {
238 hi_axiom(not runs.empty());
239 return runs.front().embedding_level();
240 }
241
242 [[nodiscard]] constexpr unicode_bidi_class embedding_direction() const noexcept
243 {
244 return (embedding_level() % 2) == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
245 }
246
247 [[nodiscard]] constexpr bool ends_with_isolate_initiator() const noexcept
248 {
249 hi_axiom(not runs.empty());
250 return runs.back().ends_with_isolate_initiator();
251 }
252};
253
255 unicode_bidi_isolated_run_sequence::iterator open;
256 unicode_bidi_isolated_run_sequence::iterator close;
257
259 unicode_bidi_isolated_run_sequence::iterator open,
260 unicode_bidi_isolated_run_sequence::iterator close) :
261 open(std::move(open)), close(std::move(close))
262 {
263 }
264
265 [[nodiscard]] constexpr friend auto
266 operator<=>(unicode_bidi_bracket_pair const& lhs, unicode_bidi_bracket_pair const& rhs) noexcept
267 {
268 return lhs.open <=> rhs.open;
269 }
270};
271
272constexpr void unicode_bidi_X1(
273 unicode_bidi_char_info_iterator first,
274 unicode_bidi_char_info_iterator last,
275 int8_t paragraph_embedding_level,
276 unicode_bidi_context const& context) noexcept
277{
278 using enum unicode_bidi_class;
279
280 constexpr int8_t max_depth = 125;
281
282 auto next_even = [](int8_t x) -> int8_t {
283 return (x % 2 == 0) ? x + 2 : x + 1;
284 };
285
286 auto next_odd = [](int8_t x) -> int8_t {
287 return (x % 2 == 1) ? x + 2 : x + 1;
288 };
289
290 long long overflow_isolate_count = 0;
291 long long overflow_embedding_count = 0;
292 long long valid_isolate_count = 0;
293
294 // X1.
295 auto stack = hi::stack<unicode_bidi_stack_element, max_depth + 2>{{paragraph_embedding_level, ON, false}};
296
297 for (auto it = first; it != last; ++it) {
298 hilet current_embedding_level = stack.back().embedding_level;
299 hilet current_override_status = stack.back().override_status;
300 hilet next_odd_embedding_level = next_odd(current_embedding_level);
301 hilet next_even_embedding_level = next_even(current_embedding_level);
302
303 auto RLI_implementation = [&] {
304 it->embedding_level = current_embedding_level;
305 if (current_override_status != ON) {
306 it->direction = current_override_status;
307 }
308
309 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
310 ++valid_isolate_count;
311 stack.emplace_back(next_odd_embedding_level, ON, true);
312 } else {
313 ++overflow_isolate_count;
314 }
315 };
316
317 auto LRI_implementation = [&] {
318 it->embedding_level = current_embedding_level;
319 if (current_override_status != ON) {
320 it->direction = current_override_status;
321 }
322
323 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
324 ++valid_isolate_count;
325 stack.emplace_back(next_even_embedding_level, ON, true);
326 } else {
327 ++overflow_isolate_count;
328 }
329 };
330
331 switch (it->direction) {
332 case RLE: // X2. Explicit embeddings
333 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
334 stack.emplace_back(next_odd_embedding_level, ON, false);
335 } else if (overflow_isolate_count == 0) {
336 ++overflow_embedding_count;
337 }
338 break;
339
340 case LRE: // X3. Explicit embeddings
341 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
342 stack.emplace_back(next_even_embedding_level, ON, false);
343 } else if (overflow_isolate_count == 0) {
344 ++overflow_embedding_count;
345 }
346 break;
347
348 case RLO: // X4. Explicit overrides
349 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
350 stack.emplace_back(next_odd_embedding_level, R, false);
351 } else if (overflow_isolate_count == 0) {
352 ++overflow_embedding_count;
353 }
354 break;
355
356 case LRO: // X5. Explicit overrides
357 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
358 stack.emplace_back(next_even_embedding_level, L, false);
359 } else if (overflow_isolate_count == 0) {
360 ++overflow_embedding_count;
361 }
362 break;
363
364 case RLI: // X5a. Isolates
365 RLI_implementation();
366 break;
367
368 case LRI: // X5b. Isolates
369 LRI_implementation();
370 break;
371
372 case FSI:
373 { // X5c. Isolates
374 auto sub_context = context;
375 sub_context.direction_mode = unicode_bidi_context::mode_type::auto_LTR;
376 hilet sub_paragraph_bidi_class = unicode_bidi_P2(it + 1, last, sub_context, true);
377 hilet sub_paragraph_embedding_level = unicode_bidi_P3(sub_paragraph_bidi_class);
378 if (sub_paragraph_embedding_level == 0) {
379 LRI_implementation();
380 } else {
381 RLI_implementation();
382 }
383 }
384 break;
385
386 case PDI: // X6a. Terminating Isolates
387 if (overflow_isolate_count > 0) {
388 --overflow_isolate_count;
389 } else if (valid_isolate_count == 0) {
390 // Mismatched PDI, do nothing.
391 ;
392 } else {
393 overflow_embedding_count = 0;
394 while (stack.back().isolate_status == false) {
395 stack.pop_back();
396 }
397 stack.pop_back();
398 --valid_isolate_count;
399 }
400
401 it->embedding_level = stack.back().embedding_level;
402 if (stack.back().override_status != ON) {
403 it->direction = stack.back().override_status;
404 }
405 break;
406
407 case PDF: // X7. Terminating Embeddings and Overrides
408 if (overflow_isolate_count > 0) {
409 // PDF is in scope of isolate, wait until the isolate is terminated.
410 ;
411 } else if (overflow_embedding_count > 0) {
412 --overflow_embedding_count;
413 } else if (stack.back().isolate_status == false && stack.size() >= 2) {
414 stack.pop_back();
415 } else {
416 // PDF does not match embedding character.
417 }
418 break;
419
420 case B: // X8. End of Paragraph
421 it->embedding_level = paragraph_embedding_level;
422 return;
423
424 case BN: // X6. Ignore
425 break;
426
427 default: // X6
428 it->embedding_level = current_embedding_level;
429 if (current_override_status != ON) {
430 it->direction = current_override_status;
431 }
432 }
433 }
434}
435
436[[nodiscard]] constexpr unicode_bidi_char_info_iterator
437unicode_bidi_X9(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept
438{
439 return std::remove_if(first, last, [](hilet& character) {
440 using enum unicode_bidi_class;
441
442 return character.direction == RLE || character.direction == LRE || character.direction == RLO ||
443 character.direction == LRO || character.direction == PDF || character.direction == BN;
444 });
445}
446
447constexpr void unicode_bidi_W1(unicode_bidi_isolated_run_sequence& sequence) noexcept
448{
449 using enum unicode_bidi_class;
450
451 auto previous_bidi_class = sequence.sos;
452 for (auto& char_info : sequence) {
453 if (char_info.direction == NSM) {
454 switch (previous_bidi_class) {
455 case LRI:
456 case RLI:
457 case FSI:
458 case PDI:
459 char_info.direction = ON;
460 break;
461 default:
462 char_info.direction = previous_bidi_class;
463 break;
464 }
465 }
466
467 previous_bidi_class = char_info.direction;
468 }
469}
470
471constexpr void unicode_bidi_W2(unicode_bidi_isolated_run_sequence& sequence) noexcept
472{
473 using enum unicode_bidi_class;
474
475 auto last_strong_direction = sequence.sos;
476 for (auto& char_info : sequence) {
477 switch (char_info.direction) {
478 case R:
479 case L:
480 case AL:
481 last_strong_direction = char_info.direction;
482 break;
483 case EN:
484 if (last_strong_direction == AL) {
485 char_info.direction = AN;
486 }
487 break;
488 default:;
489 }
490 }
491}
492
493constexpr void unicode_bidi_W3(unicode_bidi_isolated_run_sequence& sequence) noexcept
494{
495 using enum unicode_bidi_class;
496
497 for (auto& char_info : sequence) {
498 if (char_info.direction == AL) {
499 char_info.direction = R;
500 }
501 }
502}
503
504constexpr void unicode_bidi_W4(unicode_bidi_isolated_run_sequence& sequence) noexcept
505{
506 using enum unicode_bidi_class;
507
508 unicode_bidi_char_info *back1 = nullptr;
509 unicode_bidi_char_info *back2 = nullptr;
510 for (auto& char_info : sequence) {
511 if (char_info.direction == EN && back2 != nullptr && back2->direction == EN && back1 != nullptr &&
512 (back1->direction == ES || back1->direction == CS)) {
513 back1->direction = EN;
514 }
515 if (char_info.direction == AN && back2 != nullptr && back2->direction == AN && back1 != nullptr &&
516 back1->direction == CS) {
517 back1->direction = AN;
518 }
519
520 back2 = std::exchange(back1, &char_info);
521 }
522}
523
524constexpr void unicode_bidi_W5(unicode_bidi_isolated_run_sequence& sequence) noexcept
525{
526 using enum unicode_bidi_class;
527
528 auto ET_start = end(sequence);
529 auto starts_with_EN = false;
530
531 for (auto it = begin(sequence); it != end(sequence); ++it) {
532 auto& char_info = *it;
533
534 switch (char_info.direction) {
535 case ET:
536 if (starts_with_EN) {
537 char_info.direction = EN;
538 } else if (ET_start == end(sequence)) {
539 ET_start = it;
540 }
541 break;
542
543 case EN:
544 starts_with_EN = true;
545 if (ET_start != end(sequence)) {
546 for (auto jt = ET_start; jt != it; ++jt) {
547 jt->direction = EN;
548 }
549 ET_start = end(sequence);
550 }
551 break;
552
553 default:
554 starts_with_EN = false;
555 ET_start = end(sequence);
556 }
557 }
558}
559
560constexpr void unicode_bidi_W6(unicode_bidi_isolated_run_sequence& sequence) noexcept
561{
562 using enum unicode_bidi_class;
563
564 for (auto& char_info : sequence) {
565 if (char_info.direction == ET || char_info.direction == ES || char_info.direction == CS) {
566 char_info.direction = ON;
567 }
568 }
569}
570
571constexpr void unicode_bidi_W7(unicode_bidi_isolated_run_sequence& sequence) noexcept
572{
573 using enum unicode_bidi_class;
574
575 auto last_strong_direction = sequence.sos;
576 for (auto& char_info : sequence) {
577 switch (char_info.direction) {
578 case R:
579 case L:
580 last_strong_direction = char_info.direction;
581 break;
582 case EN:
583 if (last_strong_direction == L) {
584 char_info.direction = L;
585 }
586 break;
587 default:;
588 }
589 }
590}
591
592constexpr std::vector<unicode_bidi_bracket_pair> unicode_bidi_BD16(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
593{
594 struct bracket_start {
595 unicode_bidi_isolated_run_sequence::iterator it;
596 char32_t mirrored_bracket;
597
598 bracket_start(unicode_bidi_isolated_run_sequence::iterator it, char32_t mirrored_bracket) noexcept :
599 it(std::move(it)), mirrored_bracket(mirrored_bracket)
600 {
601 }
602 };
603
604 using enum unicode_bidi_class;
605
607 auto stack = hi::stack<bracket_start, 63>{};
608
609 for (auto it = begin(isolated_run_sequence); it != end(isolated_run_sequence); ++it) {
610 if (it->direction == ON) {
611 switch (it->bracket_type) {
612 case unicode_bidi_paired_bracket_type::o:
613 if (stack.full()) {
614 // Stop processing
615 std::sort(pairs.begin(), pairs.end());
616 return pairs;
617
618 } else {
619 // If there is a canonical equivalent of the opening bracket, find it's mirrored glyph
620 // to compare with the closing bracket.
621 auto mirrored_glyph = ucd_get_bidi_mirroring_glyph(it->code_point);
622 if (hilet canonical_equivalent = ucd_get_decomposition(it->code_point).canonical_equivalent()) {
623 hi_axiom(ucd_get_bidi_paired_bracket_type(*canonical_equivalent) == unicode_bidi_paired_bracket_type::o);
624
625 mirrored_glyph = ucd_get_bidi_mirroring_glyph(*canonical_equivalent);
626 }
627
628 stack.emplace_back(it, mirrored_glyph);
629 }
630 break;
631
632 case unicode_bidi_paired_bracket_type::c:
633 {
634 hilet canonical_equivalent = ucd_get_decomposition(it->code_point).canonical_equivalent();
635 for (auto jt = stack.end() - 1; jt >= stack.begin(); --jt) {
636 if (jt->mirrored_bracket == it->code_point or
637 (canonical_equivalent and jt->mirrored_bracket == *canonical_equivalent)) {
638 pairs.emplace_back(jt->it, it);
639 stack.pop_back(jt);
640 break;
641 }
642 }
643 }
644 break;
645
646 default:;
647 }
648 }
649 }
650
651 std::sort(pairs.begin(), pairs.end());
652 return pairs;
653}
654
655[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_N0_strong(unicode_bidi_class direction)
656{
657 using enum unicode_bidi_class;
658
659 switch (direction) {
660 case L:
661 return L;
662 case R:
663 case EN:
664 case AN:
665 return R;
666 default:
667 return ON;
668 }
669}
670
671[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_N0_preceding_strong_type(
672 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
673 unicode_bidi_isolated_run_sequence::iterator const& open_bracket) noexcept
674{
675 using enum unicode_bidi_class;
676
677 auto it = open_bracket;
678 while (it != begin(isolated_run_sequence)) {
679 --it;
680
681 if (hilet direction = unicode_bidi_N0_strong(it->direction); direction != ON) {
682 return direction;
683 }
684 }
685
686 return isolated_run_sequence.sos;
687}
688
689[[nodiscard]] constexpr unicode_bidi_class
690unicode_bidi_N0_enclosed_strong_type(unicode_bidi_bracket_pair const& pair, unicode_bidi_class embedding_direction) noexcept
691{
692 using enum unicode_bidi_class;
693
694 auto opposite_direction = ON;
695 for (auto it = pair.open + 1; it != pair.close; ++it) {
696 hilet direction = unicode_bidi_N0_strong(it->direction);
697 if (direction == ON) {
698 continue;
699 }
700 if (direction == embedding_direction) {
701 return direction;
702 }
703 opposite_direction = direction;
704 }
705
706 return opposite_direction;
707}
708
709constexpr void unicode_bidi_N0(unicode_bidi_isolated_run_sequence& isolated_run_sequence, unicode_bidi_context const& context)
710{
711 using enum unicode_bidi_class;
712
713 if (not context.enable_mirrored_brackets) {
714 return;
715 }
716
717 auto bracket_pairs = unicode_bidi_BD16(isolated_run_sequence);
718 hilet embedding_direction = isolated_run_sequence.embedding_direction();
719
720 for (auto& pair : bracket_pairs) {
721 auto pair_direction = unicode_bidi_N0_enclosed_strong_type(pair, embedding_direction);
722
723 if (pair_direction == ON) {
724 continue;
725 }
726
727 if (pair_direction != embedding_direction) {
728 pair_direction = unicode_bidi_N0_preceding_strong_type(isolated_run_sequence, pair.open);
729
730 if (pair_direction == embedding_direction || pair_direction == ON) {
731 pair_direction = embedding_direction;
732 }
733 }
734
735 pair.open->direction = pair_direction;
736 pair.close->direction = pair_direction;
737
738 for (auto it = pair.open + 1; it != pair.close; ++it) {
739 if (it->bidi_class != NSM) {
740 break;
741 }
742 it->direction = pair_direction;
743 }
744
745 for (auto it = pair.close + 1; it != end(isolated_run_sequence); ++it) {
746 if (it->bidi_class != NSM) {
747 break;
748 }
749 it->direction = pair_direction;
750 }
751 }
752}
753
754constexpr void unicode_bidi_N1(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
755{
756 using enum unicode_bidi_class;
757
758 auto direction_before_NI = isolated_run_sequence.sos;
759 auto first_NI = end(isolated_run_sequence);
760
761 for (auto it = begin(isolated_run_sequence); it != end(isolated_run_sequence); ++it) {
762 hilet& char_info = *it;
763 if (first_NI != end(isolated_run_sequence)) {
764 if (!is_NI(char_info.direction)) {
765 hilet direction_after_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
766
767 if ((direction_before_NI == L || direction_before_NI == R) && direction_before_NI == direction_after_NI) {
768 std::for_each(first_NI, it, [direction_before_NI](auto& item) {
769 item.direction = direction_before_NI;
770 });
771 }
772
773 first_NI = end(isolated_run_sequence);
774 direction_before_NI = direction_after_NI;
775 }
776
777 } else if (is_NI(char_info.direction)) {
778 first_NI = it;
779 } else {
780 direction_before_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
781 }
782 }
783
784 if (first_NI != end(isolated_run_sequence) && direction_before_NI == isolated_run_sequence.eos) {
785 std::for_each(first_NI, end(isolated_run_sequence), [direction_before_NI](auto& item) {
786 item.direction = direction_before_NI;
787 });
788 }
789}
790
791constexpr void unicode_bidi_N2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
792{
793 hilet embedding_direction = isolated_run_sequence.embedding_direction();
794
795 for (auto& char_info : isolated_run_sequence) {
796 if (is_NI(char_info.direction)) {
797 char_info.direction = embedding_direction;
798 }
799 }
800}
801
802constexpr void unicode_bidi_I1_I2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
803{
804 using enum unicode_bidi_class;
805
806 for (auto& char_info : isolated_run_sequence) {
807 if ((char_info.embedding_level % 2) == 0) {
808 // I1
809 if (char_info.direction == R) {
810 char_info.embedding_level += 1;
811 } else if (char_info.direction == AN || char_info.direction == EN) {
812 char_info.embedding_level += 2;
813 }
814 } else {
815 // I2
816 if (char_info.direction == L || char_info.direction == AN || char_info.direction == EN) {
817 char_info.embedding_level += 1;
818 }
819 }
820 }
821}
822
824unicode_bidi_BD7(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept
825{
827
828 auto embedding_level = int8_t{0};
829 auto run_start = first;
830 for (auto it = first; it != last; ++it) {
831 if (it == first) {
832 embedding_level = it->embedding_level;
833
834 } else if (it->embedding_level != embedding_level) {
835 embedding_level = it->embedding_level;
836
837 level_runs.emplace_back(run_start, it);
838 run_start = it;
839 }
840 }
841 if (run_start != last) {
842 level_runs.emplace_back(run_start, last);
843 }
844
845 return level_runs;
846}
847
849unicode_bidi_BD13(std::vector<unicode_bidi_level_run> level_runs) noexcept
850{
852
853 std::reverse(begin(level_runs), end(level_runs));
854 while (!level_runs.empty()) {
855 auto isolated_run_sequence = unicode_bidi_isolated_run_sequence(level_runs.back());
856 level_runs.pop_back();
857
858 while (isolated_run_sequence.ends_with_isolate_initiator() && !level_runs.empty()) {
859 // Search for matching PDI in the run_levels. This should have the same embedding level.
860 auto isolation_level = 1;
861 for (auto it = std::rbegin(level_runs); it != std::rend(level_runs); ++it) {
862 if (it->starts_with_PDI() && --isolation_level == 0) {
863 hi_axiom(it->embedding_level() == isolated_run_sequence.embedding_level());
864 isolated_run_sequence.add_run(*it);
865 level_runs.erase(std::next(it).base());
866 break;
867 }
868 if (it->ends_with_isolate_initiator()) {
869 ++isolation_level;
870 }
871 }
872
873 if (isolation_level != 0) {
874 // No PDI that matches the isolate initiator of this isolated run sequence.
875 break;
876 }
877 }
878
879 r.push_back(std::move(isolated_run_sequence));
880 }
881
882 return r;
883}
884
885[[nodiscard]] constexpr std::pair<unicode_bidi_class, unicode_bidi_class> unicode_bidi_X10_sos_eos(
886 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
887 unicode_bidi_char_info_iterator first,
888 unicode_bidi_char_info_iterator last,
889 int8_t paragraph_embedding_level) noexcept
890{
891 if (begin(isolated_run_sequence) != end(isolated_run_sequence)) {
892 // The calculations on the iterator for last_char_it is required because
893 // calling child() on an end iterator is undefined behavior.
894 hilet first_char_it = begin(isolated_run_sequence).child();
895 hilet last_char_it = (end(isolated_run_sequence) - 1).child() + 1;
896
897 hilet has_char_before = first_char_it != first;
898 hilet has_char_after = last_char_it != last;
899
900 hilet start_embedding_level = std::max(
901 isolated_run_sequence.embedding_level(),
902 has_char_before ? (first_char_it - 1)->embedding_level : paragraph_embedding_level);
903 hilet end_embedding_level = std::max(
904 isolated_run_sequence.embedding_level(),
905 has_char_after && !isolated_run_sequence.ends_with_isolate_initiator() ? last_char_it->embedding_level :
906 paragraph_embedding_level);
907
908 return {
909 (start_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L,
910 (end_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L};
911 } else {
912 return {
913 (paragraph_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L,
914 (paragraph_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L};
915 }
916}
917
918constexpr void unicode_bidi_X10(
919 unicode_bidi_char_info_iterator first,
920 unicode_bidi_char_info_iterator last,
921 int8_t paragraph_embedding_level,
922 unicode_bidi_context const& context) noexcept
923{
924 auto isolated_run_sequence_set = unicode_bidi_BD13(unicode_bidi_BD7(first, last));
925
926 // All sos and eos calculations must be done before W*, N*, I* parts are executed,
927 // since those will change the embedding levels of the characters outside of the
928 // current isolated_run_sequence that the unicode_bidi_X10_sos_eos() depends on.
929 for (auto& isolated_run_sequence : isolated_run_sequence_set) {
930 std::tie(isolated_run_sequence.sos, isolated_run_sequence.eos) =
931 unicode_bidi_X10_sos_eos(isolated_run_sequence, first, last, paragraph_embedding_level);
932 }
933
934 for (auto& isolated_run_sequence : isolated_run_sequence_set) {
935 unicode_bidi_W1(isolated_run_sequence);
936 unicode_bidi_W2(isolated_run_sequence);
937 unicode_bidi_W3(isolated_run_sequence);
938 unicode_bidi_W4(isolated_run_sequence);
939 unicode_bidi_W5(isolated_run_sequence);
940 unicode_bidi_W6(isolated_run_sequence);
941 unicode_bidi_W7(isolated_run_sequence);
942 unicode_bidi_N0(isolated_run_sequence, context);
943 unicode_bidi_N1(isolated_run_sequence);
944 unicode_bidi_N2(isolated_run_sequence);
945 unicode_bidi_I1_I2(isolated_run_sequence);
946 }
947}
948
949[[nodiscard]] constexpr std::pair<int8_t, int8_t> unicode_bidi_L1(
950 unicode_bidi_char_info_iterator first,
951 unicode_bidi_char_info_iterator last,
952 int8_t paragraph_embedding_level) noexcept
953{
954 using enum unicode_bidi_class;
955
956 auto lowest_odd = std::numeric_limits<int8_t>::max();
957 auto highest = paragraph_embedding_level;
958 auto preceding_is_segment = true;
959
960 auto it = last;
961 while (it != first) {
962 --it;
963
964 auto bidi_class = it->bidi_class;
965
966 if (bidi_class == B || bidi_class == S) {
967 it->embedding_level = paragraph_embedding_level;
968 preceding_is_segment = true;
969
970 } else if (preceding_is_segment && (bidi_class == WS || is_isolate_formatter(bidi_class))) {
971 it->embedding_level = paragraph_embedding_level;
972 preceding_is_segment = true;
973
974 } else {
975 highest = std::max(highest, it->embedding_level);
976 if ((it->embedding_level % 2) == 1) {
977 lowest_odd = std::min(lowest_odd, it->embedding_level);
978 }
979
980 preceding_is_segment = false;
981 }
982 }
983
984 if ((paragraph_embedding_level % 2) == 1) {
985 lowest_odd = std::min(lowest_odd, paragraph_embedding_level);
986 }
987
988 if (lowest_odd > highest) {
989 // If there where no odd levels below the highest level
990 if (highest % 2 == 1) {
991 // We need to reverse at least once if the highest was odd.
992 lowest_odd = highest;
993 } else {
994 // We need to reverse at least twice if the highest was even.
995 // This may yield a negative lowest_odd.
996 lowest_odd = highest - 1;
997 }
998 }
999
1000 return {lowest_odd, highest};
1001}
1002
1003constexpr void unicode_bidi_L2(
1004 unicode_bidi_char_info_iterator first,
1005 unicode_bidi_char_info_iterator last,
1006 int8_t lowest_odd,
1007 int8_t highest) noexcept
1008{
1009 for (int8_t level = highest; level >= lowest_odd; --level) {
1010 auto sequence_start = last;
1011 for (auto it = first; it != last; ++it) {
1012 if (sequence_start == last) {
1013 if (it->embedding_level >= level) {
1014 sequence_start = it;
1015 }
1016 } else if (it->embedding_level < level) {
1017 std::reverse(sequence_start, it);
1018 sequence_start = last;
1019 }
1020 }
1021 if (sequence_start != last) {
1022 std::reverse(sequence_start, last);
1023 }
1024 }
1025}
1026
1027constexpr void unicode_bidi_L3(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept {}
1028
1029[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2_default(unicode_bidi_context const& context) noexcept
1030{
1031 if (context.direction_mode == unicode_bidi_context::mode_type::auto_LTR) {
1032 return unicode_bidi_class::L;
1033 } else if (context.direction_mode == unicode_bidi_context::mode_type::auto_RTL) {
1034 return unicode_bidi_class::R;
1035 } else {
1036 hi_no_default();
1037 }
1038}
1039
1040[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2(
1041 unicode_bidi_char_info_iterator first,
1042 unicode_bidi_char_info_iterator last,
1043 unicode_bidi_context const& context,
1044 bool rule_X5c) noexcept
1045{
1046 using enum unicode_bidi_class;
1047
1048 if (context.direction_mode == unicode_bidi_context::mode_type::LTR) {
1049 return unicode_bidi_class::L;
1050 } else if (context.direction_mode == unicode_bidi_context::mode_type::RTL) {
1051 return unicode_bidi_class::R;
1052 }
1053
1054 long long isolate_level = 0;
1055 for (auto it = first; it != last; ++it) {
1056 switch (it->direction) {
1057 case L:
1058 case AL:
1059 case R:
1060 if (isolate_level == 0) {
1061 return it->direction;
1062 }
1063 break;
1064 case LRI:
1065 case RLI:
1066 case FSI:
1067 ++isolate_level;
1068 break;
1069 case PDI:
1070 if (isolate_level > 0) {
1071 --isolate_level;
1072 } else if (rule_X5c) {
1073 // End at the matching PDI, when recursing for rule X5c.
1074 return unicode_bidi_P2_default(context);
1075 }
1076 break;
1077 default:;
1078 }
1079 }
1080 return unicode_bidi_P2_default(context);
1081}
1082
1083[[nodiscard]] constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class) noexcept
1084{
1085 return wide_cast<int8_t>(paragraph_bidi_class == unicode_bidi_class::AL or paragraph_bidi_class == unicode_bidi_class::R);
1086}
1087
1088constexpr void unicode_bidi_P1_line(
1089 unicode_bidi_char_info_iterator first,
1090 unicode_bidi_char_info_iterator last,
1091 int8_t paragraph_embedding_level,
1092 unicode_bidi_context const& context) noexcept
1093{
1094 hilet[lowest_odd, highest] = unicode_bidi_L1(first, last, paragraph_embedding_level);
1095 unicode_bidi_L2(first, last, lowest_odd, highest);
1096 unicode_bidi_L3(first, last);
1097 // L4 is delayed after the original array has been shuffled.
1098}
1099
1100[[nodiscard]] constexpr std::pair<int8_t, unicode_bidi_class> unicode_bidi_P2_P3(
1101 unicode_bidi_char_info_iterator first,
1102 unicode_bidi_char_info_iterator last,
1103 unicode_bidi_context const& context) noexcept
1104{
1105 hilet default_paragraph_direction = unicode_bidi_P2(first, last, context, false);
1106 hilet paragraph_embedding_level = unicode_bidi_P3(default_paragraph_direction);
1107 hilet paragraph_direction = paragraph_embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1108 return {paragraph_embedding_level, paragraph_direction};
1109}
1110
1111[[nodiscard]] constexpr std::pair<unicode_bidi_char_info_iterator, unicode_bidi_class> unicode_bidi_P1_paragraph(
1112 unicode_bidi_char_info_iterator first,
1113 unicode_bidi_char_info_iterator last,
1114 unicode_bidi_context const& context) noexcept
1115{
1116 hilet[paragraph_embedding_level, paragraph_direction] = unicode_bidi_P2_P3(first, last, context);
1117
1118 unicode_bidi_X1(first, last, paragraph_embedding_level, context);
1119 last = unicode_bidi_X9(first, last);
1120 unicode_bidi_X10(first, last, paragraph_embedding_level, context);
1121
1122 auto line_begin = first;
1123 for (auto it = first; it != last; ++it) {
1124 hilet general_category = ucd_get_general_category(it->code_point);
1125 if (context.enable_line_separator and general_category == unicode_general_category::Zl) {
1126 hilet line_end = it + 1;
1127 unicode_bidi_P1_line(line_begin, line_end, paragraph_embedding_level, context);
1128 line_begin = line_end;
1129 }
1130 }
1131
1132 if (line_begin != last) {
1133 unicode_bidi_P1_line(line_begin, last, paragraph_embedding_level, context);
1134 }
1135
1136 return {last, paragraph_direction};
1137}
1138
1140 unicode_bidi_char_info_iterator first,
1141 unicode_bidi_char_info_iterator last,
1142 unicode_bidi_context const& context) noexcept
1143{
1144 auto it = first;
1145 auto paragraph_begin = it;
1146 auto paragraph_directions = std::vector<unicode_bidi_class>{};
1147 while (it != last) {
1148 if (it->direction == unicode_bidi_class::B) {
1149 hilet paragraph_end = it + 1;
1150 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, paragraph_end, context);
1151 paragraph_directions.push_back(paragraph_bidi_class);
1152
1153 // Move the removed items of the paragraph to the end of the text.
1154 std::rotate(new_paragraph_end, paragraph_end, last);
1155 last -= std::distance(new_paragraph_end, paragraph_end);
1156
1157 paragraph_begin = it = new_paragraph_end;
1158 } else {
1159 ++it;
1160 }
1161 }
1162
1163 if (paragraph_begin != last) {
1164 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, last, context);
1165 paragraph_directions.push_back(paragraph_bidi_class);
1166 last = new_paragraph_end;
1167 }
1168
1169 return {last, std::move(paragraph_directions)};
1170}
1171
1172template<typename OutputIt, typename SetCodePoint, typename SetTextDirection>
1173constexpr void unicode_bidi_L4(
1174 unicode_bidi_char_info_iterator first,
1175 unicode_bidi_char_info_iterator last,
1176 OutputIt output_it,
1177 SetCodePoint set_code_point,
1178 SetTextDirection set_text_direction) noexcept
1179{
1180 for (auto it = first; it != last; ++it, ++output_it) {
1181 hilet text_direction = it->embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1182 set_text_direction(*output_it, text_direction);
1183 if (it->direction == unicode_bidi_class::R and it->bracket_type != unicode_bidi_paired_bracket_type::n) {
1184 set_code_point(*output_it, ucd_get_bidi_mirroring_glyph(it->code_point));
1185 }
1186 }
1187}
1188
1189} // namespace detail
1190
1214template<typename It, typename GetCodePoint, typename SetCodePoint, typename SetTextDirection>
1216 It first,
1217 It last,
1218 GetCodePoint get_code_point,
1219 SetCodePoint set_code_point,
1220 SetTextDirection set_text_direction,
1221 unicode_bidi_context const& context = {})
1222{
1223 auto proxy = detail::unicode_bidi_char_info_vector{};
1224 proxy.reserve(std::distance(first, last));
1225
1226 std::size_t index = 0;
1227 for (auto it = first; it != last; ++it) {
1228 proxy.emplace_back(index++, get_code_point(*it));
1229 }
1230
1231 auto [proxy_last, paragraph_directions] = detail::unicode_bidi_P1(begin(proxy), end(proxy), context);
1232 last = shuffle_by_index(first, last, begin(proxy), proxy_last, [](hilet& item) {
1233 return item.index;
1234 });
1235
1236 detail::unicode_bidi_L4(
1237 begin(proxy),
1238 proxy_last,
1239 first,
1240 std::forward<SetCodePoint>(set_code_point),
1241 std::forward<SetTextDirection>(set_text_direction));
1242 return {last, std::move(paragraph_directions)};
1243}
1244
1253template<typename It, typename GetCodePoint>
1254[[nodiscard]] constexpr unicode_bidi_class
1255unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const& context = {})
1256{
1257 auto proxy = detail::unicode_bidi_char_info_vector{};
1258 proxy.reserve(std::distance(first, last));
1259
1260 std::size_t index = 0;
1261 for (auto it = first; it != last; ++it) {
1262 proxy.emplace_back(index++, get_code_point(*it));
1263 if (proxy.back().direction == unicode_bidi_class::B) {
1264 // Break early when end-of-paragraph symbol is found.
1265 break;
1266 }
1267 }
1268
1269 return detail::unicode_bidi_P2_P3(begin(proxy), end(proxy), context).second;
1270}
1271
1282template<typename It, typename EndIt, typename CodePointFunc>
1283constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const& code_point_func)
1284{
1285 return std::remove_if(first, last, [&](hilet& item) {
1286 hilet code_point = code_point_func(item);
1287 hilet bidi_class = ucd_get_bidi_class(code_point);
1288 return is_control(bidi_class);
1289 });
1290}
1291
1292} // namespace hi::inline v1
#define hi_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:279
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
auto shuffle_by_index(auto first, auto last, auto indices_first, auto indices_last, auto index_op) noexcept
Shuffle a container based on a list of indices.
Definition algorithm.hpp:232
constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const &code_point_func)
Removes control characters which will not survive the bidi-algorithm.
Definition unicode_bidi.hpp:1283
constexpr std::pair< It, std::vector< unicode_bidi_class > > unicode_bidi(It first, It last, GetCodePoint get_code_point, SetCodePoint set_code_point, SetTextDirection set_text_direction, unicode_bidi_context const &context={})
Reorder a given range of characters based on the unicode_bidi algorithm.
Definition unicode_bidi.hpp:1215
constexpr unicode_bidi_class unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const &context={})
Get the unicode bidi direction for the first paragraph and context.
Definition unicode_bidi.hpp:1255
constexpr char32_t ucd_get_bidi_mirroring_glyph(char32_t code_point) noexcept
Get the bidi-mirroring-glyph for a code-point.
Definition ucd_bidi_mirroring_glyphs.hpp:171
constexpr ucd_decomposition_info ucd_get_decomposition(char32_t code_point) noexcept
Get the decomposition info of a code-point.
Definition ucd_decompositions.hpp:4798
unicode_bidi_class
Bidirectional class Unicode Standard Annex #9: https://unicode.org/reports/tr9/.
Definition ucd_bidi_classes.hpp:856
constexpr std::optional< char32_t > canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition ucd_decompositions.hpp:4785
Definition unicode_bidi.hpp:18
Definition unicode_bidi.hpp:45
unicode_bidi_class direction
Current computed direction of the code-point.
Definition unicode_bidi.hpp:63
constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept
Constructor for testing to bypass normal initialization.
Definition unicode_bidi.hpp:86
int8_t embedding_level
The embedding level.
Definition unicode_bidi.hpp:58
unicode_bidi_class bidi_class
The original bidi class of the code-point.
Definition unicode_bidi.hpp:68
unicode_bidi_paired_bracket_type bracket_type
The type of bidi-paired-bracket.
Definition unicode_bidi.hpp:72
std::size_t index
Index from the first character in the original list.
Definition unicode_bidi.hpp:48
char32_t code_point
The current code point.
Definition unicode_bidi.hpp:53
Definition unicode_bidi.hpp:101
Definition unicode_bidi.hpp:121
Definition unicode_bidi.hpp:133
Definition unicode_bidi.hpp:177
Definition unicode_bidi.hpp:254
Definition concepts.hpp:39
T back(T... args)
T begin(T... args)
T distance(T... args)
T emplace_back(T... args)
T empty(T... args)
T end(T... args)
T erase(T... args)
T for_each(T... args)
T front(T... args)
T max(T... args)
T min(T... args)
T move(T... args)
T next(T... args)
T pop_back(T... args)
T push_back(T... args)
T remove_if(T... args)
T reverse(T... args)
T rotate(T... args)
T sort(T... args)
T tie(T... args)