HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
po_parser.hpp
1// Copyright Take Vos 2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "translation.hpp"
8#include "../i18n/i18n.hpp"
9#include "../file/file.hpp"
10#include "../parser/parser.hpp"
11#include "../macros.hpp"
12#include <string>
13#include <vector>
14#include <filesystem>
15#include <ranges>
16
17hi_export_module(hikogui.l10n.po_parser);
18
19namespace hi::inline v1 {
20
22 std::optional<std::string> msgctxt;
23 std::string msgid;
24 std::string msgid_plural;
26};
27
29 language_tag language;
30 size_t nr_plural_forms;
31 std::string plural_expression;
32 std::vector<po_translation> translations;
33};
34
35namespace detail {
36
37template<std::input_iterator It, std::sentinel_for<It> ItEnd>
38[[nodiscard]] constexpr std::tuple<std::string, size_t, std::string> parse_po_line(It& it, ItEnd last, std::string_view path)
39{
40 hi_assert(it != last);
41
42 auto name = std::string{};
43 if ((*it == token::id)) {
44 name = static_cast<std::string>(*it++);
45 } else {
46 throw parse_error(
47 std::format("{}: Expecting a keyword at start of each line, got {}", token_location(it, last, path), *it));
48 }
49
50 auto index = 0_uz;
51 if ((*it == '[')) {
52 ++it;
53
54 if (it != last and *it == token::integer) {
55 index = static_cast<size_t>(*it++);
56 } else {
57 throw parse_error(std::format(
58 "{}: Expecting an integer literal as an index for {}, got {}", token_location(it, last, path), name, *it));
59 }
60
61 if (it != last and *it == ']') {
62 ++it;
63 } else {
64 throw parse_error(std::format(
65 "{}: The index on {} must terminate with a bracket ']', got {}", token_location(it, last, path), name, *it));
66 }
67 }
68
69 auto value = std::string{};
70 if (it != last and (*it == token::sstr or *it == token::dstr)) {
71 value = static_cast<std::string>(*it++);
72 } else {
73 throw parse_error(
74 std::format("{}: Expecting a string value after {}, got {}", token_location(it, last, path), name, *it));
75 }
76
77 while (it != last and (*it == token::sstr or *it == token::dstr)) {
78 // Concatenating string literals.
79 value += static_cast<std::string>(*it++);
80 }
81
82 return {name, index, value};
83}
84
85template<std::input_iterator It, std::sentinel_for<It> ItEnd>
86[[nodiscard]] constexpr std::optional<po_translation> parse_po_translation(It& it, ItEnd last, std::string_view path)
87{
88 po_translation r;
89
90 while (it != last) {
91 if (r.msgstr.empty()) {
92 // If there have been no "msgstr" keywords, then capture information in the translation.
93 auto [name, index, value] = parse_po_line(it, last, path);
94
95 if (name == "msgctxt") {
96 r.msgctxt = value;
97
98 } else if (name == "msgid") {
99 r.msgid = value;
100
101 } else if (name == "msgid_plural") {
102 r.msgid_plural = value;
103
104 } else if (name == "msgstr") {
105 if (index >= r.msgstr.size()) {
106 r.msgstr.resize(index + 1);
107 }
108 r.msgstr[index] = value;
109
110 } else {
111 throw parse_error(
112 std::format("{}: Line starts with unexpected keyword {}", token_location(it, last, path), name));
113 }
114
115 } else if ((*it == token::id) and (*it == "msgstr")) {
116 // After the first "msgstr" keyword there may be others, but another keyword will start a new translation.
117 auto [name, index, value] = parse_po_line(it, last, path);
118
119 if (index >= r.msgstr.size()) {
120 r.msgstr.resize(index + 1);
121 }
122 r.msgstr[index] = value;
123
124 } else {
125 // The current keyword is not a msgstr, so return the translation captured.
126 return r;
127 }
128 }
129
130 return std::nullopt;
131}
132
133constexpr void parse_po_header(po_translations& r, std::string_view header)
134{
135 using namespace std::literals;
136
137 for (hilet line : std::views::split(header, "\\n"sv)) {
138 if (line.empty()) {
139 // Skip empty header lines.
140 continue;
141 }
142
143 auto split_line = make_vector<std::string_view>(std::views::split(line, ":"sv));
144 if (split_line.size() < 2) {
145 throw parse_error(std::format("Unknown header '{}'", std::string_view{line}));
146 }
147
148 hilet name = to_lower(strip(split_line.front()));
149 split_line.erase(split_line.begin());
150 hilet value = strip(join(split_line, ":"));
151
152 if (name == "language") {
153 r.language = language_tag{value};
154 }
155 }
156}
157
158} // namespace detail
159
160template<std::input_iterator It, std::sentinel_for<It> ItEnd>
161[[nodiscard]] constexpr po_translations parse_po(It it, ItEnd last, std::string_view path)
162{
163 po_translations r;
164
165 auto token_it = lexer<lexer_config::sh_style()>.parse(it, last);
166
167 while (token_it != std::default_sentinel) {
168 if (auto result = detail::parse_po_translation(token_it, std::default_sentinel, path)) {
169 if (not result->msgid.empty()) {
170 r.translations.push_back(*result);
171
172 } else if (result->msgstr.size() == 1) {
173 // If a translation has an empty msgid, then the msgstr contain headers.
174 detail::parse_po_header(r, result->msgstr.front());
175
176 } else {
177 throw parse_error(std::format("{}: Unknown .po syntax.", token_location(token_it, path)));
178 }
179 }
180 }
181
182 return r;
183}
184
185[[nodiscard]] constexpr po_translations parse_po(std::string_view text, std::string_view path)
186{
187 return parse_po(text.begin(), text.end(), path);
188}
189
190[[nodiscard]] inline po_translations parse_po(std::filesystem::path const& path)
191{
192 return parse_po(as_string_view(file_view{path}), path.string());
193}
194
195} // namespace hi::inline v1
Defines the file class.
STL namespace.
DOXYGEN BUG.
Definition algorithm.hpp:16
hi_export constexpr std::string token_location(It &it, ItEnd last, std::string_view path) noexcept
Create a location string for error messages.
Definition token.hpp:160
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
Definition po_parser.hpp:21
Definition po_parser.hpp:28