8
8
// ===----------------------------------------------------------------------===//
9
9
10
10
#include " llvm/Support/ConvertUTF.h"
11
+ #include " llvm/Support/ErrorHandling.h"
11
12
#include " llvm/Support/SwapByteOrder.h"
12
13
#include < string>
13
14
#include < vector>
@@ -36,7 +37,7 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
36
37
ConversionFlags flags = strictConversion;
37
38
result = ConvertUTF8toUTF16 (
38
39
&sourceStart, sourceStart + Source.size (),
39
- &targetStart, targetStart + 2 * Source.size (), flags);
40
+ &targetStart, targetStart + Source.size (), flags);
40
41
if (result == conversionOK)
41
42
ResultPtr = reinterpret_cast <char *>(targetStart);
42
43
else
@@ -49,7 +50,7 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
49
50
ConversionFlags flags = strictConversion;
50
51
result = ConvertUTF8toUTF32 (
51
52
&sourceStart, sourceStart + Source.size (),
52
- &targetStart, targetStart + 4 * Source.size (), flags);
53
+ &targetStart, targetStart + Source.size (), flags);
53
54
if (result == conversionOK)
54
55
ResultPtr = reinterpret_cast <char *>(targetStart);
55
56
else
@@ -130,6 +131,13 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
130
131
return true ;
131
132
}
132
133
134
+ bool convertUTF16ToUTF8String (ArrayRef<UTF16> Src, std::string &Out)
135
+ {
136
+ return convertUTF16ToUTF8String (
137
+ llvm::ArrayRef<char >(reinterpret_cast <const char *>(Src.data ()),
138
+ Src.size () * sizeof (UTF16)), Out);
139
+ }
140
+
133
141
bool convertUTF8ToUTF16String (StringRef SrcUTF8,
134
142
SmallVectorImpl<UTF16> &DstUTF16) {
135
143
assert (DstUTF16.empty ());
@@ -168,5 +176,74 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8,
168
176
return true ;
169
177
}
170
178
179
+ static_assert (sizeof (wchar_t ) == 1 || sizeof (wchar_t ) == 2 ||
180
+ sizeof (wchar_t ) == 4 ,
181
+ " Expected wchar_t to be 1, 2, or 4 bytes" );
182
+
183
+ template <typename TResult>
184
+ static inline bool ConvertUTF8toWideInternal (llvm::StringRef Source,
185
+ TResult &Result) {
186
+ // Even in the case of UTF-16, the number of bytes in a UTF-8 string is
187
+ // at least as large as the number of elements in the resulting wide
188
+ // string, because surrogate pairs take at least 4 bytes in UTF-8.
189
+ Result.resize (Source.size () + 1 );
190
+ char *ResultPtr = reinterpret_cast <char *>(&Result[0 ]);
191
+ const UTF8 *ErrorPtr;
192
+ if (!ConvertUTF8toWide (sizeof (wchar_t ), Source, ResultPtr, ErrorPtr)) {
193
+ Result.clear ();
194
+ return false ;
195
+ }
196
+ Result.resize (reinterpret_cast <wchar_t *>(ResultPtr) - &Result[0 ]);
197
+ return true ;
198
+ }
199
+
200
+ bool ConvertUTF8toWide (llvm::StringRef Source, std::wstring &Result) {
201
+ return ConvertUTF8toWideInternal (Source, Result);
202
+ }
203
+
204
+ bool ConvertUTF8toWide (const char *Source, std::wstring &Result) {
205
+ if (!Source) {
206
+ Result.clear ();
207
+ return true ;
208
+ }
209
+ return ConvertUTF8toWide (llvm::StringRef (Source), Result);
210
+ }
211
+
212
+ bool convertWideToUTF8 (const std::wstring &Source, std::string &Result) {
213
+ if (sizeof (wchar_t ) == 1 ) {
214
+ const UTF8 *Start = reinterpret_cast <const UTF8 *>(Source.data ());
215
+ const UTF8 *End =
216
+ reinterpret_cast <const UTF8 *>(Source.data () + Source.size ());
217
+ if (!isLegalUTF8String (&Start, End))
218
+ return false ;
219
+ Result.resize (Source.size ());
220
+ memcpy (&Result[0 ], Source.data (), Source.size ());
221
+ return true ;
222
+ } else if (sizeof (wchar_t ) == 2 ) {
223
+ return convertUTF16ToUTF8String (
224
+ llvm::ArrayRef<UTF16>(reinterpret_cast <const UTF16 *>(Source.data ()),
225
+ Source.size ()),
226
+ Result);
227
+ } else if (sizeof (wchar_t ) == 4 ) {
228
+ const UTF32 *Start = reinterpret_cast <const UTF32 *>(Source.data ());
229
+ const UTF32 *End =
230
+ reinterpret_cast <const UTF32 *>(Source.data () + Source.size ());
231
+ Result.resize (UNI_MAX_UTF8_BYTES_PER_CODE_POINT * Source.size ());
232
+ UTF8 *ResultPtr = reinterpret_cast <UTF8 *>(&Result[0 ]);
233
+ UTF8 *ResultEnd = reinterpret_cast <UTF8 *>(&Result[0 ] + Result.size ());
234
+ if (ConvertUTF32toUTF8 (&Start, End, &ResultPtr, ResultEnd,
235
+ strictConversion) == conversionOK) {
236
+ Result.resize (reinterpret_cast <char *>(ResultPtr) - &Result[0 ]);
237
+ return true ;
238
+ } else {
239
+ Result.clear ();
240
+ return false ;
241
+ }
242
+ } else {
243
+ llvm_unreachable (
244
+ " Control should never reach this point; see static_assert further up" );
245
+ }
246
+ }
247
+
171
248
} // end namespace llvm
172
249
0 commit comments