17struct UnicodeConverter;
20struct LIGHTWEIGHT_API UnicodeConverter<char8_t>
23 template <
typename OutputIterator>
24 static constexpr OutputIterator Convert(
char32_t input, OutputIterator output)
noexcept
28 *output++ =
static_cast<char8_t>(input & 0b0111'1111);
30 else if (input <= 0x07FF)
32 *output++ =
static_cast<char8_t>(((input >> 6) & 0b0001'1111) | 0b1100'0000);
33 *output++ =
static_cast<char8_t>(((input >> 0) & 0b0011'1111) | 0b1000'0000);
35 else if (input <= 0xFFFF)
37 *output++ =
static_cast<char8_t>(((input >> 12) & 0b0000'1111) | 0b1110'0000);
38 *output++ =
static_cast<char8_t>(((input >> 6) & 0b0011'1111) | 0b1000'0000);
39 *output++ =
static_cast<char8_t>(((input >> 0) & 0b0011'1111) | 0b1000'0000);
43 *output++ =
static_cast<char8_t>(((input >> 18) & 0b0000'0111) | 0b1111'0000);
44 *output++ =
static_cast<char8_t>(((input >> 12) & 0b0011'1111) | 0b1000'0000);
45 *output++ =
static_cast<char8_t>(((input >> 6) & 0b0011'1111) | 0b1000'0000);
46 *output++ =
static_cast<char8_t>(((input >> 0) & 0b0011'1111) | 0b1000'0000);
53struct LIGHTWEIGHT_API UnicodeConverter<char16_t>
56 template <
typename OutputIterator>
57 static constexpr OutputIterator Convert(
char32_t input, OutputIterator output)
noexcept
61 *output++ = char16_t(input);
64 else if (input < 0x10000)
70 *output++ = char16_t(input);
73 else if (input < 0x110000)
75 *output++ = char16_t(0xD7C0 + (input >> 10));
76 *output++ = char16_t(0xDC00 + (input & 0x3FF));
86 char32_t codePoint = 0;
89 static constexpr auto InvalidCodePoint =
char32_t { 0xFFFD };
91 constexpr std::optional<char32_t> Process(
char8_t c8)
noexcept
93 if ((c8 & 0b1100'0000) == 0b1000'0000)
96 return InvalidCodePoint;
98 codePoint |= c8 & 0b0011'1111;
101 auto result = codePoint;
109 if ((c8 & 0b1000'0000) == 0)
111 if ((c8 & 0b1110'0000) == 0b1100'0000)
113 codePoint = c8 & 0b0001'1111;
117 if ((c8 & 0b1111'0000) == 0b1110'0000)
119 codePoint = c8 & 0b0000'1111;
123 if ((c8 & 0b1111'1000) == 0b1111'0000)
125 codePoint = c8 & 0b0000'0111;
129 return InvalidCodePoint;
131 return InvalidCodePoint;
135struct [[nodiscard]] Utf32Iterator
137 std::u8string_view u8InputString;
139 struct [[nodiscard]] iterator
141 std::u8string_view::iterator current {};
142 std::u8string_view::iterator end {};
143 char32_t codePoint = Utf32Converter::InvalidCodePoint;
145 constexpr explicit iterator(std::u8string_view::iterator current, std::u8string_view::iterator end)
noexcept:
153 constexpr char32_t operator*() const noexcept
158 constexpr iterator& operator++() noexcept
160 auto converter = Utf32Converter {};
161 codePoint = Utf32Converter::InvalidCodePoint;
162 while (current != end)
164 if (
auto const result = converter.Process(*current++); result.has_value())
173 constexpr iterator& operator++(
int)
noexcept
178 constexpr bool operator==(iterator
const& other)
const noexcept
180 return current == other.current && codePoint == other.codePoint;
183 constexpr bool operator!=(iterator
const& other)
const noexcept
185 return !(*
this == other);
189 iterator begin() const noexcept
191 return iterator { u8InputString.begin(), u8InputString.end() };
194 iterator end() const noexcept
196 return iterator { u8InputString.end(), u8InputString.end() };
209LIGHTWEIGHT_API std::u8string
ToUtf8(std::u32string_view u32InputString);
214LIGHTWEIGHT_API std::u8string
ToUtf8(std::u16string_view u16InputString);
220 requires(std::same_as<T, wchar_t> &&
sizeof(wchar_t) == 2)
221inline LIGHTWEIGHT_FORCE_INLINE std::u8string
ToUtf8(std::basic_string_view<T> u16InputString)
223 return ToUtf8(std::u16string_view(
reinterpret_cast<const char16_t*
>(u16InputString.data()), u16InputString.size()));
230 requires(std::same_as<T, wchar_t> &&
sizeof(wchar_t) == 4)
231inline LIGHTWEIGHT_FORCE_INLINE std::u8string
ToUtf8(std::basic_string_view<T> u32InputString)
233 return ToUtf8(std::u32string_view(
reinterpret_cast<const char32_t*
>(u32InputString.data()), u32InputString.size()));
240 requires std::same_as<T, char32_t> || (std::same_as<T, wchar_t> &&
sizeof(wchar_t) == 4)
241std::u16string
ToUtf16(
const std::basic_string_view<T> u32InputString)
243 std::u16string u16OutputString;
244 u16OutputString.reserve(u32InputString.size());
245 detail::UnicodeConverter<char16_t> converter;
246 for (
auto const c: u32InputString)
247 converter.Convert(c, std::back_inserter(u16OutputString));
248 return u16OutputString;
254LIGHTWEIGHT_API std::u16string
ToUtf16(std::u8string_view u8InputString);
259LIGHTWEIGHT_API std::u16string
ToUtf16(std::string
const& localeInputString);
264template <
typename T = std::u32
string>
268 for (
char32_t const c32: detail::Utf32Iterator { u8InputString })
269 result.push_back(c32);
277template <
typename T = std::u32
string>
282 for (
char16_t const c16: u16InputString)
284 if (c16 < 0xD800 || c16 >= 0xDC00)
285 result.push_back(c16);
287 result.push_back(0x10000 + ((c16 & 0x3FF) | ((c16 & 0x3FF) << 10)));
296LIGHTWEIGHT_API std::wstring ToStdWideString(std::u8string_view u8InputString);
301LIGHTWEIGHT_API std::wstring ToStdWideString(std::string
const& localeInputString);
T ToUtf32(std::u8string_view u8InputString)
std::u16string ToUtf16(const std::basic_string_view< T > u32InputString)
LIGHTWEIGHT_API std::u8string ToUtf8(std::u32string_view u32InputString)