20 struct UnicodeConverter;
23 struct LIGHTWEIGHT_API UnicodeConverter<char8_t>
26 template <
typename OutputIterator>
27 static constexpr OutputIterator Convert(
char32_t input, OutputIterator output)
noexcept
31 *output++ =
static_cast<char8_t>(input & 0b0111'1111);
33 else if (input <= 0x07FF)
35 *output++ =
static_cast<char8_t>(((input >> 6) & 0b0001'1111) | 0b1100'0000);
36 *output++ =
static_cast<char8_t>(((input >> 0) & 0b0011'1111) | 0b1000'0000);
38 else if (input <= 0xFFFF)
40 *output++ =
static_cast<char8_t>(((input >> 12) & 0b0000'1111) | 0b1110'0000);
41 *output++ =
static_cast<char8_t>(((input >> 6) & 0b0011'1111) | 0b1000'0000);
42 *output++ =
static_cast<char8_t>(((input >> 0) & 0b0011'1111) | 0b1000'0000);
46 *output++ =
static_cast<char8_t>(((input >> 18) & 0b0000'0111) | 0b1111'0000);
47 *output++ =
static_cast<char8_t>(((input >> 12) & 0b0011'1111) | 0b1000'0000);
48 *output++ =
static_cast<char8_t>(((input >> 6) & 0b0011'1111) | 0b1000'0000);
49 *output++ =
static_cast<char8_t>(((input >> 0) & 0b0011'1111) | 0b1000'0000);
56 struct LIGHTWEIGHT_API UnicodeConverter<char16_t>
59 template <
typename OutputIterator>
60 static constexpr OutputIterator Convert(
char32_t input, OutputIterator output)
noexcept
64 *output++ = char16_t(input);
67 else if (input < 0x10000)
73 *output++ = char16_t(input);
76 else if (input < 0x110000)
78 *output++ = char16_t(0xD7C0 + (input >> 10));
79 *output++ = char16_t(0xDC00 + (input & 0x3FF));
89 char32_t codePoint = 0;
92 static constexpr auto InvalidCodePoint =
char32_t { 0xFFFD };
94 constexpr std::optional<char32_t> Process(
char8_t c8)
noexcept
96 if ((c8 & 0b1100'0000) == 0b1000'0000)
99 return InvalidCodePoint;
101 codePoint |= c8 & 0b0011'1111;
102 if (--codeUnits == 0)
104 auto result = codePoint;
112 if ((c8 & 0b1000'0000) == 0)
114 if ((c8 & 0b1110'0000) == 0b1100'0000)
116 codePoint = c8 & 0b0001'1111;
120 if ((c8 & 0b1111'0000) == 0b1110'0000)
122 codePoint = c8 & 0b0000'1111;
126 if ((c8 & 0b1111'1000) == 0b1111'0000)
128 codePoint = c8 & 0b0000'0111;
132 return InvalidCodePoint;
134 return InvalidCodePoint;
138 struct [[nodiscard]] Utf32Iterator
140 std::u8string_view u8InputString;
142 struct [[nodiscard]] iterator
144 std::u8string_view::iterator current {};
145 std::u8string_view::iterator end {};
146 char32_t codePoint = Utf32Converter::InvalidCodePoint;
148 constexpr explicit iterator(std::u8string_view::iterator current, std::u8string_view::iterator end)
noexcept:
156 constexpr char32_t operator*() const noexcept
161 constexpr iterator& operator++() noexcept
163 auto converter = Utf32Converter {};
164 codePoint = Utf32Converter::InvalidCodePoint;
165 while (current != end)
167 if (
auto const result = converter.Process(*current++); result.has_value())
176 constexpr iterator& operator++(
int)
noexcept
181 constexpr bool operator==(iterator
const& other)
const noexcept
183 return current == other.current && codePoint == other.codePoint;
186 constexpr bool operator!=(iterator
const& other)
const noexcept
188 return !(*
this == other);
192 iterator begin() const noexcept
194 return iterator { u8InputString.begin(), u8InputString.end() };
197 iterator end() const noexcept
199 return iterator { u8InputString.end(), u8InputString.end() };
212LIGHTWEIGHT_API std::u8string
ToUtf8(std::u32string_view u32InputString);
217LIGHTWEIGHT_API std::u8string
ToUtf8(std::u16string_view u16InputString);
223 requires(std::same_as<T, wchar_t> &&
sizeof(wchar_t) == 2)
224inline LIGHTWEIGHT_FORCE_INLINE std::u8string
ToUtf8(std::basic_string_view<T> u16InputString)
226 return ToUtf8(std::u16string_view(
reinterpret_cast<char16_t const*
>(u16InputString.data()), u16InputString.size()));
233 requires(std::same_as<T, wchar_t> &&
sizeof(wchar_t) == 4)
234inline LIGHTWEIGHT_FORCE_INLINE std::u8string
ToUtf8(std::basic_string_view<T> u32InputString)
236 return ToUtf8(std::u32string_view(
reinterpret_cast<char32_t const*
>(u32InputString.data()), u32InputString.size()));
243 requires std::same_as<T, char32_t> || (std::same_as<T, wchar_t> &&
sizeof(wchar_t) == 4)
244std::u16string
ToUtf16(std::basic_string_view<T>
const u32InputString)
246 std::u16string u16OutputString;
247 u16OutputString.reserve(u32InputString.size());
248 detail::UnicodeConverter<char16_t> converter;
249 for (
auto const c: u32InputString)
250 converter.Convert(c, std::back_inserter(u16OutputString));
251 return u16OutputString;
257LIGHTWEIGHT_API std::u16string
ToUtf16(std::u8string_view u8InputString);
262LIGHTWEIGHT_API std::u16string
ToUtf16(std::string
const& localeInputString);
267template <
typename T = std::u32
string>
271 for (
char32_t const c32: detail::Utf32Iterator { u8InputString })
272 result.push_back(c32);
279template <
typename T = std::u32
string>
284 for (
char16_t const c16: u16InputString)
286 if (c16 < 0xD800 || c16 >= 0xDC00)
287 result.push_back(
static_cast<char32_t>(c16));
289 result.push_back(0x10000 + ((c16 & 0x3FF) | ((c16 & 0x3FF) << 10)));
298LIGHTWEIGHT_API std::wstring ToStdWideString(std::u8string_view u8InputString);
303LIGHTWEIGHT_API std::wstring ToStdWideString(std::string
const& localeInputString);
T ToUtf32(std::u8string_view u8InputString)
LIGHTWEIGHT_API std::u8string ToUtf8(std::u32string_view u32InputString)
std::u16string ToUtf16(std::basic_string_view< T > const u32InputString)