HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_bidi.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "ucd_bidi_classes.hpp"
8#include "ucd_bidi_paired_bracket_types.hpp"
9#include "ucd_bidi_mirroring_glyphs.hpp"
10#include "ucd_decompositions.hpp"
11#include "ucd_general_categories.hpp"
12#include "../utility/utility.hpp"
13#include "../container/module.hpp"
14#include "../algorithm/module.hpp"
15#include "../macros.hpp"
16
17
18
19namespace hi::inline v1 {
20
22 enum class mode_type : uint8_t { LTR, RTL, auto_LTR, auto_RTL };
23
24 mode_type direction_mode = mode_type::auto_LTR;
25 bool enable_mirrored_brackets = true;
26 bool enable_line_separator = true;
27
28 constexpr unicode_bidi_context() noexcept = default;
29 constexpr unicode_bidi_context(unicode_bidi_context const&) noexcept = default;
30 constexpr unicode_bidi_context(unicode_bidi_context&&) noexcept = default;
31 constexpr unicode_bidi_context& operator=(unicode_bidi_context const&) noexcept = default;
32 constexpr unicode_bidi_context& operator=(unicode_bidi_context&&) noexcept = default;
33
34 constexpr unicode_bidi_context(unicode_bidi_class text_direction) noexcept
35 {
36 if (text_direction == unicode_bidi_class::L) {
37 direction_mode = mode_type::auto_LTR;
38 } else if (text_direction == unicode_bidi_class::R) {
39 direction_mode = mode_type::auto_RTL;
40 } else {
41 hi_no_default();
42 }
43 }
44};
45
46namespace detail {
47
52
56 char32_t code_point;
57
62
66 unicode_bidi_class direction;
67
71 unicode_bidi_class bidi_class;
72
75 unicode_bidi_paired_bracket_type bracket_type;
76
77 [[nodiscard]] constexpr unicode_bidi_char_info(std::size_t index, char32_t code_point) noexcept
78 {
79 this->index = index;
80 this->code_point = code_point;
81 this->embedding_level = 0;
82 this->direction = this->bidi_class = ucd_get_bidi_class(code_point);
83 this->bracket_type = ucd_get_bidi_paired_bracket_type(code_point);
84 }
85
89 [[nodiscard]] constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept :
90 index(index),
91 code_point(U'\ufffd'),
92 direction(bidi_class),
93 bidi_class(bidi_class),
94 bracket_type(unicode_bidi_paired_bracket_type::n),
95 embedding_level(0)
96 {
97 }
98};
99
100using unicode_bidi_char_info_vector = std::vector<unicode_bidi_char_info>;
101using unicode_bidi_char_info_iterator = unicode_bidi_char_info_vector::iterator;
102using unicode_bidi_char_info_const_iterator = unicode_bidi_char_info_vector::const_iterator;
103
106
107 characters_type characters;
108
109 template<typename... Args>
110 constexpr void emplace_character(Args&&...args) noexcept
111 {
112 characters.emplace_back(std::forward<Args>(args)...);
113 }
114};
115
116[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2(
117 unicode_bidi_char_info_iterator first,
118 unicode_bidi_char_info_iterator last,
119 unicode_bidi_context const& context,
120 bool rule_X5c) noexcept;
121
122[[nodiscard]] constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class) noexcept;
123
125 int8_t embedding_level;
126 unicode_bidi_class override_status;
127 bool isolate_status;
128
129 constexpr unicode_bidi_stack_element(int8_t embedding_level, unicode_bidi_class override_status, bool isolate_status) noexcept
130 :
131 embedding_level(embedding_level), override_status(override_status), isolate_status(isolate_status)
132 {
133 }
134};
135
137public:
138 using iterator = unicode_bidi_char_info_iterator;
139 using const_iterator = unicode_bidi_char_info_const_iterator;
141
142 constexpr unicode_bidi_level_run(iterator begin, iterator end) noexcept : _begin(begin), _end(end) {}
143
144 [[nodiscard]] constexpr iterator begin() const noexcept
145 {
146 return _begin;
147 }
148
149 [[nodiscard]] constexpr iterator end() const noexcept
150 {
151 return _end;
152 }
153
154 [[nodiscard]] constexpr int8_t embedding_level() const noexcept
155 {
156 hi_axiom(_begin != _end);
157 return _begin->embedding_level;
158 }
159
160 [[nodiscard]] constexpr bool ends_with_isolate_initiator() const noexcept
161 {
162 using enum unicode_bidi_class;
163
164 hi_axiom(_begin != _end);
165 hilet& last_char = *(_end - 1);
166 return last_char.direction == LRI || last_char.direction == RLI || last_char.direction == FSI;
167 }
168
169 [[nodiscard]] constexpr bool starts_with_PDI() const noexcept
170 {
171 hi_axiom(_begin != _end);
172 return _begin->direction == unicode_bidi_class::PDI;
173 }
174
175private:
176 iterator _begin;
177 iterator _end;
178};
179
182 using iterator = recursive_iterator<run_container_type::iterator>;
183 using const_iterator = recursive_iterator<run_container_type::const_iterator>;
184
186 unicode_bidi_class sos;
187 unicode_bidi_class eos;
188
189 constexpr unicode_bidi_isolated_run_sequence(unicode_bidi_level_run const& rhs) noexcept :
190 runs({rhs}), sos(unicode_bidi_class::ON), eos(unicode_bidi_class::ON)
191 {
192 }
193
194 [[nodiscard]] constexpr auto begin() noexcept
195 {
196 return recursive_iterator_begin(runs);
197 }
198
199 [[nodiscard]] constexpr auto end() noexcept
200 {
201 return recursive_iterator_end(runs);
202 }
203
204 [[nodiscard]] constexpr auto begin() const noexcept
205 {
206 return recursive_iterator_begin(runs);
207 }
208
209 [[nodiscard]] constexpr auto end() const noexcept
210 {
211 return recursive_iterator_end(runs);
212 }
213
214 [[nodiscard]] constexpr friend auto begin(unicode_bidi_isolated_run_sequence& rhs) noexcept
215 {
216 return rhs.begin();
217 }
218
219 [[nodiscard]] constexpr friend auto begin(unicode_bidi_isolated_run_sequence const& rhs) noexcept
220 {
221 return rhs.begin();
222 }
223
224 [[nodiscard]] constexpr friend auto end(unicode_bidi_isolated_run_sequence& rhs) noexcept
225 {
226 return rhs.end();
227 }
228
229 [[nodiscard]] constexpr friend auto end(unicode_bidi_isolated_run_sequence const& rhs) noexcept
230 {
231 return rhs.end();
232 }
233
234 constexpr void add_run(unicode_bidi_level_run const& run) noexcept
235 {
236 runs.push_back(run);
237 }
238
239 [[nodiscard]] constexpr int8_t embedding_level() const noexcept
240 {
241 hi_axiom(not runs.empty());
242 return runs.front().embedding_level();
243 }
244
245 [[nodiscard]] constexpr unicode_bidi_class embedding_direction() const noexcept
246 {
247 return (embedding_level() % 2) == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
248 }
249
250 [[nodiscard]] constexpr bool ends_with_isolate_initiator() const noexcept
251 {
252 hi_axiom(not runs.empty());
253 return runs.back().ends_with_isolate_initiator();
254 }
255};
256
258 unicode_bidi_isolated_run_sequence::iterator open;
259 unicode_bidi_isolated_run_sequence::iterator close;
260
262 unicode_bidi_isolated_run_sequence::iterator open,
263 unicode_bidi_isolated_run_sequence::iterator close) :
264 open(std::move(open)), close(std::move(close))
265 {
266 }
267
268 [[nodiscard]] constexpr friend auto
269 operator<=>(unicode_bidi_bracket_pair const& lhs, unicode_bidi_bracket_pair const& rhs) noexcept
270 {
271 return lhs.open <=> rhs.open;
272 }
273};
274
275constexpr void unicode_bidi_X1(
276 unicode_bidi_char_info_iterator first,
277 unicode_bidi_char_info_iterator last,
278 int8_t paragraph_embedding_level,
279 unicode_bidi_context const& context) noexcept
280{
281 using enum unicode_bidi_class;
282
283 constexpr int8_t max_depth = 125;
284
285 auto next_even = [](int8_t x) -> int8_t {
286 return (x % 2 == 0) ? x + 2 : x + 1;
287 };
288
289 auto next_odd = [](int8_t x) -> int8_t {
290 return (x % 2 == 1) ? x + 2 : x + 1;
291 };
292
293 long long overflow_isolate_count = 0;
294 long long overflow_embedding_count = 0;
295 long long valid_isolate_count = 0;
296
297 // X1.
298 auto stack = hi::stack<unicode_bidi_stack_element, max_depth + 2>{{paragraph_embedding_level, ON, false}};
299
300 for (auto it = first; it != last; ++it) {
301 hilet current_embedding_level = stack.back().embedding_level;
302 hilet current_override_status = stack.back().override_status;
303 hilet next_odd_embedding_level = next_odd(current_embedding_level);
304 hilet next_even_embedding_level = next_even(current_embedding_level);
305
306 auto RLI_implementation = [&] {
307 it->embedding_level = current_embedding_level;
308 if (current_override_status != ON) {
309 it->direction = current_override_status;
310 }
311
312 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
313 ++valid_isolate_count;
314 stack.emplace_back(next_odd_embedding_level, ON, true);
315 } else {
316 ++overflow_isolate_count;
317 }
318 };
319
320 auto LRI_implementation = [&] {
321 it->embedding_level = current_embedding_level;
322 if (current_override_status != ON) {
323 it->direction = current_override_status;
324 }
325
326 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
327 ++valid_isolate_count;
328 stack.emplace_back(next_even_embedding_level, ON, true);
329 } else {
330 ++overflow_isolate_count;
331 }
332 };
333
334 switch (it->direction) {
335 case RLE: // X2. Explicit embeddings
336 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
337 stack.emplace_back(next_odd_embedding_level, ON, false);
338 } else if (overflow_isolate_count == 0) {
339 ++overflow_embedding_count;
340 }
341 break;
342
343 case LRE: // X3. Explicit embeddings
344 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
345 stack.emplace_back(next_even_embedding_level, ON, false);
346 } else if (overflow_isolate_count == 0) {
347 ++overflow_embedding_count;
348 }
349 break;
350
351 case RLO: // X4. Explicit overrides
352 if (next_odd_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
353 stack.emplace_back(next_odd_embedding_level, R, false);
354 } else if (overflow_isolate_count == 0) {
355 ++overflow_embedding_count;
356 }
357 break;
358
359 case LRO: // X5. Explicit overrides
360 if (next_even_embedding_level <= max_depth && overflow_isolate_count == 0 && overflow_embedding_count == 0) {
361 stack.emplace_back(next_even_embedding_level, L, false);
362 } else if (overflow_isolate_count == 0) {
363 ++overflow_embedding_count;
364 }
365 break;
366
367 case RLI: // X5a. Isolates
368 RLI_implementation();
369 break;
370
371 case LRI: // X5b. Isolates
372 LRI_implementation();
373 break;
374
375 case FSI:
376 { // X5c. Isolates
377 auto sub_context = context;
378 sub_context.direction_mode = unicode_bidi_context::mode_type::auto_LTR;
379 hilet sub_paragraph_bidi_class = unicode_bidi_P2(it + 1, last, sub_context, true);
380 hilet sub_paragraph_embedding_level = unicode_bidi_P3(sub_paragraph_bidi_class);
381 if (sub_paragraph_embedding_level == 0) {
382 LRI_implementation();
383 } else {
384 RLI_implementation();
385 }
386 }
387 break;
388
389 case PDI: // X6a. Terminating Isolates
390 if (overflow_isolate_count > 0) {
391 --overflow_isolate_count;
392 } else if (valid_isolate_count == 0) {
393 // Mismatched PDI, do nothing.
394 ;
395 } else {
396 overflow_embedding_count = 0;
397 while (stack.back().isolate_status == false) {
398 stack.pop_back();
399 }
400 stack.pop_back();
401 --valid_isolate_count;
402 }
403
404 it->embedding_level = stack.back().embedding_level;
405 if (stack.back().override_status != ON) {
406 it->direction = stack.back().override_status;
407 }
408 break;
409
410 case PDF: // X7. Terminating Embeddings and Overrides
411 if (overflow_isolate_count > 0) {
412 // PDF is in scope of isolate, wait until the isolate is terminated.
413 ;
414 } else if (overflow_embedding_count > 0) {
415 --overflow_embedding_count;
416 } else if (stack.back().isolate_status == false && stack.size() >= 2) {
417 stack.pop_back();
418 } else {
419 // PDF does not match embedding character.
420 }
421 break;
422
423 case B: // X8. End of Paragraph
424 it->embedding_level = paragraph_embedding_level;
425 return;
426
427 case BN: // X6. Ignore
428 break;
429
430 default: // X6
431 it->embedding_level = current_embedding_level;
432 if (current_override_status != ON) {
433 it->direction = current_override_status;
434 }
435 }
436 }
437}
438
439[[nodiscard]] constexpr unicode_bidi_char_info_iterator
440unicode_bidi_X9(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept
441{
442 return std::remove_if(first, last, [](hilet& character) {
443 using enum unicode_bidi_class;
444
445 return character.direction == RLE || character.direction == LRE || character.direction == RLO ||
446 character.direction == LRO || character.direction == PDF || character.direction == BN;
447 });
448}
449
450constexpr void unicode_bidi_W1(unicode_bidi_isolated_run_sequence& sequence) noexcept
451{
452 using enum unicode_bidi_class;
453
454 auto previous_bidi_class = sequence.sos;
455 for (auto& char_info : sequence) {
456 if (char_info.direction == NSM) {
457 switch (previous_bidi_class) {
458 case LRI:
459 case RLI:
460 case FSI:
461 case PDI:
462 char_info.direction = ON;
463 break;
464 default:
465 char_info.direction = previous_bidi_class;
466 break;
467 }
468 }
469
470 previous_bidi_class = char_info.direction;
471 }
472}
473
474constexpr void unicode_bidi_W2(unicode_bidi_isolated_run_sequence& sequence) noexcept
475{
476 using enum unicode_bidi_class;
477
478 auto last_strong_direction = sequence.sos;
479 for (auto& char_info : sequence) {
480 switch (char_info.direction) {
481 case R:
482 case L:
483 case AL:
484 last_strong_direction = char_info.direction;
485 break;
486 case EN:
487 if (last_strong_direction == AL) {
488 char_info.direction = AN;
489 }
490 break;
491 default:;
492 }
493 }
494}
495
496constexpr void unicode_bidi_W3(unicode_bidi_isolated_run_sequence& sequence) noexcept
497{
498 using enum unicode_bidi_class;
499
500 for (auto& char_info : sequence) {
501 if (char_info.direction == AL) {
502 char_info.direction = R;
503 }
504 }
505}
506
507constexpr void unicode_bidi_W4(unicode_bidi_isolated_run_sequence& sequence) noexcept
508{
509 using enum unicode_bidi_class;
510
511 unicode_bidi_char_info *back1 = nullptr;
512 unicode_bidi_char_info *back2 = nullptr;
513 for (auto& char_info : sequence) {
514 if (char_info.direction == EN && back2 != nullptr && back2->direction == EN && back1 != nullptr &&
515 (back1->direction == ES || back1->direction == CS)) {
516 back1->direction = EN;
517 }
518 if (char_info.direction == AN && back2 != nullptr && back2->direction == AN && back1 != nullptr &&
519 back1->direction == CS) {
520 back1->direction = AN;
521 }
522
523 back2 = std::exchange(back1, &char_info);
524 }
525}
526
527constexpr void unicode_bidi_W5(unicode_bidi_isolated_run_sequence& sequence) noexcept
528{
529 using enum unicode_bidi_class;
530
531 auto ET_start = end(sequence);
532 auto starts_with_EN = false;
533
534 for (auto it = begin(sequence); it != end(sequence); ++it) {
535 auto& char_info = *it;
536
537 switch (char_info.direction) {
538 case ET:
539 if (starts_with_EN) {
540 char_info.direction = EN;
541 } else if (ET_start == end(sequence)) {
542 ET_start = it;
543 }
544 break;
545
546 case EN:
547 starts_with_EN = true;
548 if (ET_start != end(sequence)) {
549 for (auto jt = ET_start; jt != it; ++jt) {
550 jt->direction = EN;
551 }
552 ET_start = end(sequence);
553 }
554 break;
555
556 default:
557 starts_with_EN = false;
558 ET_start = end(sequence);
559 }
560 }
561}
562
563constexpr void unicode_bidi_W6(unicode_bidi_isolated_run_sequence& sequence) noexcept
564{
565 using enum unicode_bidi_class;
566
567 for (auto& char_info : sequence) {
568 if (char_info.direction == ET || char_info.direction == ES || char_info.direction == CS) {
569 char_info.direction = ON;
570 }
571 }
572}
573
574constexpr void unicode_bidi_W7(unicode_bidi_isolated_run_sequence& sequence) noexcept
575{
576 using enum unicode_bidi_class;
577
578 auto last_strong_direction = sequence.sos;
579 for (auto& char_info : sequence) {
580 switch (char_info.direction) {
581 case R:
582 case L:
583 last_strong_direction = char_info.direction;
584 break;
585 case EN:
586 if (last_strong_direction == L) {
587 char_info.direction = L;
588 }
589 break;
590 default:;
591 }
592 }
593}
594
595constexpr std::vector<unicode_bidi_bracket_pair> unicode_bidi_BD16(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
596{
597 struct bracket_start {
598 unicode_bidi_isolated_run_sequence::iterator it;
599 char32_t mirrored_bracket;
600
601 bracket_start(unicode_bidi_isolated_run_sequence::iterator it, char32_t mirrored_bracket) noexcept :
602 it(std::move(it)), mirrored_bracket(mirrored_bracket)
603 {
604 }
605 };
606
607 using enum unicode_bidi_class;
608
610 auto stack = hi::stack<bracket_start, 63>{};
611
612 for (auto it = begin(isolated_run_sequence); it != end(isolated_run_sequence); ++it) {
613 if (it->direction == ON) {
614 switch (it->bracket_type) {
615 case unicode_bidi_paired_bracket_type::o:
616 if (stack.full()) {
617 // Stop processing
618 std::sort(pairs.begin(), pairs.end());
619 return pairs;
620
621 } else {
622 // If there is a canonical equivalent of the opening bracket, find it's mirrored glyph
623 // to compare with the closing bracket.
624 auto mirrored_glyph = ucd_get_bidi_mirroring_glyph(it->code_point);
625 if (hilet canonical_equivalent = ucd_get_decomposition(it->code_point).canonical_equivalent()) {
626 hi_axiom(ucd_get_bidi_paired_bracket_type(*canonical_equivalent) == unicode_bidi_paired_bracket_type::o);
627
628 mirrored_glyph = ucd_get_bidi_mirroring_glyph(*canonical_equivalent);
629 }
630
631 stack.emplace_back(it, mirrored_glyph);
632 }
633 break;
634
635 case unicode_bidi_paired_bracket_type::c:
636 {
637 hilet canonical_equivalent = ucd_get_decomposition(it->code_point).canonical_equivalent();
638 for (auto jt = stack.end() - 1; jt >= stack.begin(); --jt) {
639 if (jt->mirrored_bracket == it->code_point or
640 (canonical_equivalent and jt->mirrored_bracket == *canonical_equivalent)) {
641 pairs.emplace_back(jt->it, it);
642 stack.pop_back(jt);
643 break;
644 }
645 }
646 }
647 break;
648
649 default:;
650 }
651 }
652 }
653
654 std::sort(pairs.begin(), pairs.end());
655 return pairs;
656}
657
658[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_N0_strong(unicode_bidi_class direction)
659{
660 using enum unicode_bidi_class;
661
662 switch (direction) {
663 case L:
664 return L;
665 case R:
666 case EN:
667 case AN:
668 return R;
669 default:
670 return ON;
671 }
672}
673
674[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_N0_preceding_strong_type(
675 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
676 unicode_bidi_isolated_run_sequence::iterator const& open_bracket) noexcept
677{
678 using enum unicode_bidi_class;
679
680 auto it = open_bracket;
681 while (it != begin(isolated_run_sequence)) {
682 --it;
683
684 if (hilet direction = unicode_bidi_N0_strong(it->direction); direction != ON) {
685 return direction;
686 }
687 }
688
689 return isolated_run_sequence.sos;
690}
691
692[[nodiscard]] constexpr unicode_bidi_class
693unicode_bidi_N0_enclosed_strong_type(unicode_bidi_bracket_pair const& pair, unicode_bidi_class embedding_direction) noexcept
694{
695 using enum unicode_bidi_class;
696
697 auto opposite_direction = ON;
698 for (auto it = pair.open + 1; it != pair.close; ++it) {
699 hilet direction = unicode_bidi_N0_strong(it->direction);
700 if (direction == ON) {
701 continue;
702 }
703 if (direction == embedding_direction) {
704 return direction;
705 }
706 opposite_direction = direction;
707 }
708
709 return opposite_direction;
710}
711
712constexpr void unicode_bidi_N0(unicode_bidi_isolated_run_sequence& isolated_run_sequence, unicode_bidi_context const& context)
713{
714 using enum unicode_bidi_class;
715
716 if (not context.enable_mirrored_brackets) {
717 return;
718 }
719
720 auto bracket_pairs = unicode_bidi_BD16(isolated_run_sequence);
721 hilet embedding_direction = isolated_run_sequence.embedding_direction();
722
723 for (auto& pair : bracket_pairs) {
724 auto pair_direction = unicode_bidi_N0_enclosed_strong_type(pair, embedding_direction);
725
726 if (pair_direction == ON) {
727 continue;
728 }
729
730 if (pair_direction != embedding_direction) {
731 pair_direction = unicode_bidi_N0_preceding_strong_type(isolated_run_sequence, pair.open);
732
733 if (pair_direction == embedding_direction || pair_direction == ON) {
734 pair_direction = embedding_direction;
735 }
736 }
737
738 pair.open->direction = pair_direction;
739 pair.close->direction = pair_direction;
740
741 for (auto it = pair.open + 1; it != pair.close; ++it) {
742 if (it->bidi_class != NSM) {
743 break;
744 }
745 it->direction = pair_direction;
746 }
747
748 for (auto it = pair.close + 1; it != end(isolated_run_sequence); ++it) {
749 if (it->bidi_class != NSM) {
750 break;
751 }
752 it->direction = pair_direction;
753 }
754 }
755}
756
757constexpr void unicode_bidi_N1(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
758{
759 using enum unicode_bidi_class;
760
761 auto direction_before_NI = isolated_run_sequence.sos;
762 auto first_NI = end(isolated_run_sequence);
763
764 for (auto it = begin(isolated_run_sequence); it != end(isolated_run_sequence); ++it) {
765 hilet& char_info = *it;
766 if (first_NI != end(isolated_run_sequence)) {
767 if (!is_NI(char_info.direction)) {
768 hilet direction_after_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
769
770 if ((direction_before_NI == L || direction_before_NI == R) && direction_before_NI == direction_after_NI) {
771 std::for_each(first_NI, it, [direction_before_NI](auto& item) {
772 item.direction = direction_before_NI;
773 });
774 }
775
776 first_NI = end(isolated_run_sequence);
777 direction_before_NI = direction_after_NI;
778 }
779
780 } else if (is_NI(char_info.direction)) {
781 first_NI = it;
782 } else {
783 direction_before_NI = (it->direction == EN || it->direction == AN) ? R : it->direction;
784 }
785 }
786
787 if (first_NI != end(isolated_run_sequence) && direction_before_NI == isolated_run_sequence.eos) {
788 std::for_each(first_NI, end(isolated_run_sequence), [direction_before_NI](auto& item) {
789 item.direction = direction_before_NI;
790 });
791 }
792}
793
794constexpr void unicode_bidi_N2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
795{
796 hilet embedding_direction = isolated_run_sequence.embedding_direction();
797
798 for (auto& char_info : isolated_run_sequence) {
799 if (is_NI(char_info.direction)) {
800 char_info.direction = embedding_direction;
801 }
802 }
803}
804
805constexpr void unicode_bidi_I1_I2(unicode_bidi_isolated_run_sequence& isolated_run_sequence)
806{
807 using enum unicode_bidi_class;
808
809 for (auto& char_info : isolated_run_sequence) {
810 if ((char_info.embedding_level % 2) == 0) {
811 // I1
812 if (char_info.direction == R) {
813 char_info.embedding_level += 1;
814 } else if (char_info.direction == AN || char_info.direction == EN) {
815 char_info.embedding_level += 2;
816 }
817 } else {
818 // I2
819 if (char_info.direction == L || char_info.direction == AN || char_info.direction == EN) {
820 char_info.embedding_level += 1;
821 }
822 }
823 }
824}
825
827unicode_bidi_BD7(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept
828{
830
831 auto embedding_level = int8_t{0};
832 auto run_start = first;
833 for (auto it = first; it != last; ++it) {
834 if (it == first) {
835 embedding_level = it->embedding_level;
836
837 } else if (it->embedding_level != embedding_level) {
838 embedding_level = it->embedding_level;
839
840 level_runs.emplace_back(run_start, it);
841 run_start = it;
842 }
843 }
844 if (run_start != last) {
845 level_runs.emplace_back(run_start, last);
846 }
847
848 return level_runs;
849}
850
852unicode_bidi_BD13(std::vector<unicode_bidi_level_run> level_runs) noexcept
853{
855
856 std::reverse(begin(level_runs), end(level_runs));
857 while (!level_runs.empty()) {
858 auto isolated_run_sequence = unicode_bidi_isolated_run_sequence(level_runs.back());
859 level_runs.pop_back();
860
861 while (isolated_run_sequence.ends_with_isolate_initiator() && !level_runs.empty()) {
862 // Search for matching PDI in the run_levels. This should have the same embedding level.
863 auto isolation_level = 1;
864 for (auto it = std::rbegin(level_runs); it != std::rend(level_runs); ++it) {
865 if (it->starts_with_PDI() && --isolation_level == 0) {
866 hi_axiom(it->embedding_level() == isolated_run_sequence.embedding_level());
867 isolated_run_sequence.add_run(*it);
868 level_runs.erase(std::next(it).base());
869 break;
870 }
871 if (it->ends_with_isolate_initiator()) {
872 ++isolation_level;
873 }
874 }
875
876 if (isolation_level != 0) {
877 // No PDI that matches the isolate initiator of this isolated run sequence.
878 break;
879 }
880 }
881
882 r.push_back(std::move(isolated_run_sequence));
883 }
884
885 return r;
886}
887
888[[nodiscard]] constexpr std::pair<unicode_bidi_class, unicode_bidi_class> unicode_bidi_X10_sos_eos(
889 unicode_bidi_isolated_run_sequence& isolated_run_sequence,
890 unicode_bidi_char_info_iterator first,
891 unicode_bidi_char_info_iterator last,
892 int8_t paragraph_embedding_level) noexcept
893{
894 if (begin(isolated_run_sequence) != end(isolated_run_sequence)) {
895 // The calculations on the iterator for last_char_it is required because
896 // calling child() on an end iterator is undefined behavior.
897 hilet first_char_it = begin(isolated_run_sequence).child();
898 hilet last_char_it = (end(isolated_run_sequence) - 1).child() + 1;
899
900 hilet has_char_before = first_char_it != first;
901 hilet has_char_after = last_char_it != last;
902
903 hilet start_embedding_level = std::max(
904 isolated_run_sequence.embedding_level(),
905 has_char_before ? (first_char_it - 1)->embedding_level : paragraph_embedding_level);
906 hilet end_embedding_level = std::max(
907 isolated_run_sequence.embedding_level(),
908 has_char_after && !isolated_run_sequence.ends_with_isolate_initiator() ? last_char_it->embedding_level :
909 paragraph_embedding_level);
910
911 return {
912 (start_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L,
913 (end_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L};
914 } else {
915 return {
916 (paragraph_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L,
917 (paragraph_embedding_level % 2) == 1 ? unicode_bidi_class::R : unicode_bidi_class::L};
918 }
919}
920
921constexpr void unicode_bidi_X10(
922 unicode_bidi_char_info_iterator first,
923 unicode_bidi_char_info_iterator last,
924 int8_t paragraph_embedding_level,
925 unicode_bidi_context const& context) noexcept
926{
927 auto isolated_run_sequence_set = unicode_bidi_BD13(unicode_bidi_BD7(first, last));
928
929 // All sos and eos calculations must be done before W*, N*, I* parts are executed,
930 // since those will change the embedding levels of the characters outside of the
931 // current isolated_run_sequence that the unicode_bidi_X10_sos_eos() depends on.
932 for (auto& isolated_run_sequence : isolated_run_sequence_set) {
933 std::tie(isolated_run_sequence.sos, isolated_run_sequence.eos) =
934 unicode_bidi_X10_sos_eos(isolated_run_sequence, first, last, paragraph_embedding_level);
935 }
936
937 for (auto& isolated_run_sequence : isolated_run_sequence_set) {
938 unicode_bidi_W1(isolated_run_sequence);
939 unicode_bidi_W2(isolated_run_sequence);
940 unicode_bidi_W3(isolated_run_sequence);
941 unicode_bidi_W4(isolated_run_sequence);
942 unicode_bidi_W5(isolated_run_sequence);
943 unicode_bidi_W6(isolated_run_sequence);
944 unicode_bidi_W7(isolated_run_sequence);
945 unicode_bidi_N0(isolated_run_sequence, context);
946 unicode_bidi_N1(isolated_run_sequence);
947 unicode_bidi_N2(isolated_run_sequence);
948 unicode_bidi_I1_I2(isolated_run_sequence);
949 }
950}
951
952[[nodiscard]] constexpr std::pair<int8_t, int8_t> unicode_bidi_L1(
953 unicode_bidi_char_info_iterator first,
954 unicode_bidi_char_info_iterator last,
955 int8_t paragraph_embedding_level) noexcept
956{
957 using enum unicode_bidi_class;
958
959 auto lowest_odd = std::numeric_limits<int8_t>::max();
960 auto highest = paragraph_embedding_level;
961 auto preceding_is_segment = true;
962
963 auto it = last;
964 while (it != first) {
965 --it;
966
967 auto bidi_class = it->bidi_class;
968
969 if (bidi_class == B || bidi_class == S) {
970 it->embedding_level = paragraph_embedding_level;
971 preceding_is_segment = true;
972
973 } else if (preceding_is_segment && (bidi_class == WS || is_isolate_formatter(bidi_class))) {
974 it->embedding_level = paragraph_embedding_level;
975 preceding_is_segment = true;
976
977 } else {
978 highest = std::max(highest, it->embedding_level);
979 if ((it->embedding_level % 2) == 1) {
980 lowest_odd = std::min(lowest_odd, it->embedding_level);
981 }
982
983 preceding_is_segment = false;
984 }
985 }
986
987 if ((paragraph_embedding_level % 2) == 1) {
988 lowest_odd = std::min(lowest_odd, paragraph_embedding_level);
989 }
990
991 if (lowest_odd > highest) {
992 // If there where no odd levels below the highest level
993 if (highest % 2 == 1) {
994 // We need to reverse at least once if the highest was odd.
995 lowest_odd = highest;
996 } else {
997 // We need to reverse at least twice if the highest was even.
998 // This may yield a negative lowest_odd.
999 lowest_odd = highest - 1;
1000 }
1001 }
1002
1003 return {lowest_odd, highest};
1004}
1005
1006constexpr void unicode_bidi_L2(
1007 unicode_bidi_char_info_iterator first,
1008 unicode_bidi_char_info_iterator last,
1009 int8_t lowest_odd,
1010 int8_t highest) noexcept
1011{
1012 for (int8_t level = highest; level >= lowest_odd; --level) {
1013 auto sequence_start = last;
1014 for (auto it = first; it != last; ++it) {
1015 if (sequence_start == last) {
1016 if (it->embedding_level >= level) {
1017 sequence_start = it;
1018 }
1019 } else if (it->embedding_level < level) {
1020 std::reverse(sequence_start, it);
1021 sequence_start = last;
1022 }
1023 }
1024 if (sequence_start != last) {
1025 std::reverse(sequence_start, last);
1026 }
1027 }
1028}
1029
1030constexpr void unicode_bidi_L3(unicode_bidi_char_info_iterator first, unicode_bidi_char_info_iterator last) noexcept {}
1031
1032[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2_default(unicode_bidi_context const& context) noexcept
1033{
1034 if (context.direction_mode == unicode_bidi_context::mode_type::auto_LTR) {
1035 return unicode_bidi_class::L;
1036 } else if (context.direction_mode == unicode_bidi_context::mode_type::auto_RTL) {
1037 return unicode_bidi_class::R;
1038 } else {
1039 hi_no_default();
1040 }
1041}
1042
1043[[nodiscard]] constexpr unicode_bidi_class unicode_bidi_P2(
1044 unicode_bidi_char_info_iterator first,
1045 unicode_bidi_char_info_iterator last,
1046 unicode_bidi_context const& context,
1047 bool rule_X5c) noexcept
1048{
1049 using enum unicode_bidi_class;
1050
1051 if (context.direction_mode == unicode_bidi_context::mode_type::LTR) {
1052 return unicode_bidi_class::L;
1053 } else if (context.direction_mode == unicode_bidi_context::mode_type::RTL) {
1054 return unicode_bidi_class::R;
1055 }
1056
1057 long long isolate_level = 0;
1058 for (auto it = first; it != last; ++it) {
1059 switch (it->direction) {
1060 case L:
1061 case AL:
1062 case R:
1063 if (isolate_level == 0) {
1064 return it->direction;
1065 }
1066 break;
1067 case LRI:
1068 case RLI:
1069 case FSI:
1070 ++isolate_level;
1071 break;
1072 case PDI:
1073 if (isolate_level > 0) {
1074 --isolate_level;
1075 } else if (rule_X5c) {
1076 // End at the matching PDI, when recursing for rule X5c.
1077 return unicode_bidi_P2_default(context);
1078 }
1079 break;
1080 default:;
1081 }
1082 }
1083 return unicode_bidi_P2_default(context);
1084}
1085
1086[[nodiscard]] constexpr int8_t unicode_bidi_P3(unicode_bidi_class paragraph_bidi_class) noexcept
1087{
1088 return wide_cast<int8_t>(paragraph_bidi_class == unicode_bidi_class::AL or paragraph_bidi_class == unicode_bidi_class::R);
1089}
1090
1091constexpr void unicode_bidi_P1_line(
1092 unicode_bidi_char_info_iterator first,
1093 unicode_bidi_char_info_iterator last,
1094 int8_t paragraph_embedding_level,
1095 unicode_bidi_context const& context) noexcept
1096{
1097 hilet[lowest_odd, highest] = unicode_bidi_L1(first, last, paragraph_embedding_level);
1098 unicode_bidi_L2(first, last, lowest_odd, highest);
1099 unicode_bidi_L3(first, last);
1100 // L4 is delayed after the original array has been shuffled.
1101}
1102
1103[[nodiscard]] constexpr std::pair<int8_t, unicode_bidi_class> unicode_bidi_P2_P3(
1104 unicode_bidi_char_info_iterator first,
1105 unicode_bidi_char_info_iterator last,
1106 unicode_bidi_context const& context) noexcept
1107{
1108 hilet default_paragraph_direction = unicode_bidi_P2(first, last, context, false);
1109 hilet paragraph_embedding_level = unicode_bidi_P3(default_paragraph_direction);
1110 hilet paragraph_direction = paragraph_embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1111 return {paragraph_embedding_level, paragraph_direction};
1112}
1113
1114[[nodiscard]] constexpr std::pair<unicode_bidi_char_info_iterator, unicode_bidi_class> unicode_bidi_P1_paragraph(
1115 unicode_bidi_char_info_iterator first,
1116 unicode_bidi_char_info_iterator last,
1117 unicode_bidi_context const& context) noexcept
1118{
1119 hilet[paragraph_embedding_level, paragraph_direction] = unicode_bidi_P2_P3(first, last, context);
1120
1121 unicode_bidi_X1(first, last, paragraph_embedding_level, context);
1122 last = unicode_bidi_X9(first, last);
1123 unicode_bidi_X10(first, last, paragraph_embedding_level, context);
1124
1125 auto line_begin = first;
1126 for (auto it = first; it != last; ++it) {
1127 hilet general_category = ucd_get_general_category(it->code_point);
1128 if (context.enable_line_separator and general_category == unicode_general_category::Zl) {
1129 hilet line_end = it + 1;
1130 unicode_bidi_P1_line(line_begin, line_end, paragraph_embedding_level, context);
1131 line_begin = line_end;
1132 }
1133 }
1134
1135 if (line_begin != last) {
1136 unicode_bidi_P1_line(line_begin, last, paragraph_embedding_level, context);
1137 }
1138
1139 return {last, paragraph_direction};
1140}
1141
1143 unicode_bidi_char_info_iterator first,
1144 unicode_bidi_char_info_iterator last,
1145 unicode_bidi_context const& context) noexcept
1146{
1147 auto it = first;
1148 auto paragraph_begin = it;
1149 auto paragraph_directions = std::vector<unicode_bidi_class>{};
1150 while (it != last) {
1151 if (it->direction == unicode_bidi_class::B) {
1152 hilet paragraph_end = it + 1;
1153 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, paragraph_end, context);
1154 paragraph_directions.push_back(paragraph_bidi_class);
1155
1156 // Move the removed items of the paragraph to the end of the text.
1157 std::rotate(new_paragraph_end, paragraph_end, last);
1158 last -= std::distance(new_paragraph_end, paragraph_end);
1159
1160 paragraph_begin = it = new_paragraph_end;
1161 } else {
1162 ++it;
1163 }
1164 }
1165
1166 if (paragraph_begin != last) {
1167 hilet[new_paragraph_end, paragraph_bidi_class] = unicode_bidi_P1_paragraph(paragraph_begin, last, context);
1168 paragraph_directions.push_back(paragraph_bidi_class);
1169 last = new_paragraph_end;
1170 }
1171
1172 return {last, std::move(paragraph_directions)};
1173}
1174
1175template<typename OutputIt, typename SetCodePoint, typename SetTextDirection>
1176constexpr void unicode_bidi_L4(
1177 unicode_bidi_char_info_iterator first,
1178 unicode_bidi_char_info_iterator last,
1179 OutputIt output_it,
1180 SetCodePoint set_code_point,
1181 SetTextDirection set_text_direction) noexcept
1182{
1183 for (auto it = first; it != last; ++it, ++output_it) {
1184 hilet text_direction = it->embedding_level % 2 == 0 ? unicode_bidi_class::L : unicode_bidi_class::R;
1185 set_text_direction(*output_it, text_direction);
1186 if (it->direction == unicode_bidi_class::R and it->bracket_type != unicode_bidi_paired_bracket_type::n) {
1187 set_code_point(*output_it, ucd_get_bidi_mirroring_glyph(it->code_point));
1188 }
1189 }
1190}
1191
1192} // namespace detail
1193
1217template<typename It, typename GetCodePoint, typename SetCodePoint, typename SetTextDirection>
1219 It first,
1220 It last,
1221 GetCodePoint get_code_point,
1222 SetCodePoint set_code_point,
1223 SetTextDirection set_text_direction,
1224 unicode_bidi_context const& context = {})
1225{
1226 auto proxy = detail::unicode_bidi_char_info_vector{};
1227 proxy.reserve(std::distance(first, last));
1228
1229 std::size_t index = 0;
1230 for (auto it = first; it != last; ++it) {
1231 proxy.emplace_back(index++, get_code_point(*it));
1232 }
1233
1234 auto [proxy_last, paragraph_directions] = detail::unicode_bidi_P1(begin(proxy), end(proxy), context);
1235 last = shuffle_by_index(first, last, begin(proxy), proxy_last, [](hilet& item) {
1236 return item.index;
1237 });
1238
1239 detail::unicode_bidi_L4(
1240 begin(proxy),
1241 proxy_last,
1242 first,
1243 std::forward<SetCodePoint>(set_code_point),
1244 std::forward<SetTextDirection>(set_text_direction));
1245 return {last, std::move(paragraph_directions)};
1246}
1247
1256template<typename It, typename GetCodePoint>
1257[[nodiscard]] constexpr unicode_bidi_class
1258unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const& context = {})
1259{
1260 auto proxy = detail::unicode_bidi_char_info_vector{};
1261 proxy.reserve(std::distance(first, last));
1262
1263 std::size_t index = 0;
1264 for (auto it = first; it != last; ++it) {
1265 proxy.emplace_back(index++, get_code_point(*it));
1266 if (proxy.back().direction == unicode_bidi_class::B) {
1267 // Break early when end-of-paragraph symbol is found.
1268 break;
1269 }
1270 }
1271
1272 return detail::unicode_bidi_P2_P3(begin(proxy), end(proxy), context).second;
1273}
1274
1285template<typename It, typename EndIt, typename CodePointFunc>
1286constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const& code_point_func)
1287{
1288 return std::remove_if(first, last, [&](hilet& item) {
1289 hilet code_point = code_point_func(item);
1290 hilet bidi_class = ucd_get_bidi_class(code_point);
1291 return is_control(bidi_class);
1292 });
1293}
1294
1295} // namespace hi::inline v1
DOXYGEN BUG.
Definition algorithm.hpp:16
auto shuffle_by_index(auto first, auto last, auto indices_first, auto indices_last, auto index_op) noexcept
Shuffle a container based on a list of indices.
Definition algorithm.hpp:261
constexpr It unicode_bidi_control_filter(It first, EndIt last, CodePointFunc const &code_point_func)
Removes control characters which will not survive the bidi-algorithm.
Definition unicode_bidi.hpp:1286
constexpr std::pair< It, std::vector< unicode_bidi_class > > unicode_bidi(It first, It last, GetCodePoint get_code_point, SetCodePoint set_code_point, SetTextDirection set_text_direction, unicode_bidi_context const &context={})
Reorder a given range of characters based on the unicode_bidi algorithm.
Definition unicode_bidi.hpp:1218
constexpr unicode_bidi_class unicode_bidi_direction(It first, It last, GetCodePoint get_code_point, unicode_bidi_context const &context={})
Get the unicode bidi direction for the first paragraph and context.
Definition unicode_bidi.hpp:1258
constexpr char32_t ucd_get_bidi_mirroring_glyph(char32_t code_point) noexcept
Get the bidi-mirroring-glyph for a code-point.
Definition ucd_bidi_mirroring_glyphs.hpp:173
constexpr ucd_decomposition_info ucd_get_decomposition(char32_t code_point) noexcept
Get the decomposition info of a code-point.
Definition ucd_decompositions.hpp:4800
unicode_bidi_class
Bidirectional class Unicode Standard Annex #9: https://unicode.org/reports/tr9/.
Definition ucd_bidi_classes.hpp:858
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
constexpr std::optional< char32_t > canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition ucd_decompositions.hpp:4787
Definition unicode_bidi.hpp:21
Definition unicode_bidi.hpp:48
unicode_bidi_class direction
Current computed direction of the code-point.
Definition unicode_bidi.hpp:66
constexpr unicode_bidi_char_info(std::size_t index, unicode_bidi_class bidi_class) noexcept
Constructor for testing to bypass normal initialization.
Definition unicode_bidi.hpp:89
int8_t embedding_level
The embedding level.
Definition unicode_bidi.hpp:61
unicode_bidi_class bidi_class
The original bidi class of the code-point.
Definition unicode_bidi.hpp:71
unicode_bidi_paired_bracket_type bracket_type
The type of bidi-paired-bracket.
Definition unicode_bidi.hpp:75
std::size_t index
Index from the first character in the original list.
Definition unicode_bidi.hpp:51
char32_t code_point
The current code point.
Definition unicode_bidi.hpp:56
Definition unicode_bidi.hpp:104
Definition unicode_bidi.hpp:124
Definition unicode_bidi.hpp:136
Definition unicode_bidi.hpp:180
Definition unicode_bidi.hpp:257
T back(T... args)
T begin(T... args)
T distance(T... args)
T emplace_back(T... args)
T empty(T... args)
T end(T... args)
T erase(T... args)
T for_each(T... args)
T front(T... args)
T max(T... args)
T min(T... args)
T move(T... args)
T next(T... args)
T pop_back(T... args)
T push_back(T... args)
T remove_if(T... args)
T reverse(T... args)
T rotate(T... args)
T sort(T... args)
T tie(T... args)