diff options
-rw-r--r-- | cpp/src/IceUtil/StringConverter.cpp | 123 | ||||
-rw-r--r-- | cpp/src/IceUtil/Unicode.cpp | 187 | ||||
-rw-r--r-- | cpp/test/IceUtil/unicode/Client.cpp | 465 |
3 files changed, 382 insertions, 393 deletions
diff --git a/cpp/src/IceUtil/StringConverter.cpp b/cpp/src/IceUtil/StringConverter.cpp index 8b60c48d53f..cb15037ecfa 100644 --- a/cpp/src/IceUtil/StringConverter.cpp +++ b/cpp/src/IceUtil/StringConverter.cpp @@ -43,11 +43,7 @@ struct SelectCodeCvt; template<> struct SelectCodeCvt<2> { -#ifdef ICE_LITTLE_ENDIAN - typedef std::codecvt_utf8_utf16<wchar_t, 0x10ffff, little_endian> Type; -#else typedef std::codecvt_utf8_utf16<wchar_t> Type; -#endif }; template<> @@ -155,28 +151,35 @@ public: virtual void fromUTF8(const Byte* sourceStart, const Byte* sourceEnd, wstring& target) const { - if(sourceStart == sourceEnd) + const size_t sourceSize = sourceEnd - sourceStart; + + if(sourceSize == 0) { target = L""; } else { - // - // TODO: consider reimplementing without the wstring_convert helper - // to improve performance - // Note that wstring_convert is "stateful" and cannot be a shared data member - // - wstring_convert<CodeCvt> convert; - - try - { - target = convert.from_bytes(reinterpret_cast<const char*>(sourceStart), - reinterpret_cast<const char*>(sourceEnd)); - } - catch(const std::range_error& ex) + target.resize(sourceSize); + wchar_t* targetStart = const_cast<wchar_t*>(target.data()); + wchar_t* targetEnd = targetStart + sourceSize; + wchar_t* targetNext = targetStart; + + const char* sourceNext = reinterpret_cast<const char*>(sourceStart); + + mbstate_t state = mbstate_t(); + + codecvt_base::result result = _codecvt.in(state, + reinterpret_cast<const char*>(sourceStart), + reinterpret_cast<const char*>(sourceEnd), + sourceNext, + targetStart, targetEnd, targetNext); + + if(result != codecvt_base::ok) { - throw IllegalConversionException(__FILE__, __LINE__, ex.what()); + throw IllegalConversionException(__FILE__, __LINE__, "codecvt.in failure"); } + + target.resize(targetNext - targetStart); } } @@ -215,14 +218,12 @@ public: targetStart = buffer.getMoreBytes(chunkSize, targetStart); targetEnd = targetStart + chunkSize; - } while(convertUTFWstringToUTF8(sourceStart, sourceEnd, targetStart, targetEnd) == false); return targetStart; } - virtual void fromUTF8(const Byte* sourceStart, const Byte* sourceEnd, wstring& target) const { if(sourceStart == sourceEnd) @@ -290,67 +291,38 @@ getUnicodeWstringConverter() return unicodeWstringConverter; } - class UTF8BufferI : public UTF8Buffer { public: - UTF8BufferI() : - _buffer(0), - _offset(0) - { - } - - ~UTF8BufferI() - { - free(_buffer); - } - + // + // Returns the first unused byte in the resized buffer + // Byte* getMoreBytes(size_t howMany, Byte* firstUnused) { - if(_buffer == 0) - { - _buffer = static_cast<Byte*>(malloc(howMany)); - if(!_buffer) - { - throw std::bad_alloc(); - } - } - else + size_t bytesUsed = 0; + if(firstUnused != 0) { - assert(firstUnused != 0); - _offset = firstUnused - _buffer; - Byte* newBuffer = static_cast<Byte*>(realloc(_buffer, _offset + howMany)); - if(!newBuffer) - { - reset(); - throw std::bad_alloc(); - } - else - { - _buffer = newBuffer; - } + bytesUsed = firstUnused - reinterpret_cast<const Byte*>(_buffer.data()); } - return _buffer + _offset; - } - - Byte* getBuffer() - { - return _buffer; + if(_buffer.size() < howMany + bytesUsed) + { + _buffer.resize(bytesUsed + howMany); + } + + return const_cast<Byte*>(reinterpret_cast<const Byte*>(_buffer.data())) + bytesUsed; } - void reset() + void swap(string& other, const Byte* tail) { - free(_buffer); - _buffer = 0; - _offset = 0; + assert(tail >= reinterpret_cast<const Byte*>(_buffer.data())); + _buffer.resize(tail - reinterpret_cast<const Byte*>(_buffer.data())); + other.swap(_buffer); } private: - - Byte* _buffer; - size_t _offset; + string _buffer; }; #ifdef _WIN32 @@ -516,8 +488,8 @@ IceUtil::wstringToString(const wstring& v, const StringConverterPtr& converter, // UTF8BufferI buffer; Byte* last = wConverterWithDefault->toUTF8(v.data(), v.data() + v.size(), buffer); - target = string(reinterpret_cast<const char*>(buffer.getBuffer()), last - buffer.getBuffer()); - + buffer.swap(target, last); + // // If narrow string converter is present convert to the native narrow string encoding, otherwise // native narrow string encoding is UTF8 and we are done. @@ -534,8 +506,7 @@ IceUtil::wstringToString(const wstring& v, const StringConverterPtr& converter, } wstring -IceUtil::stringToWstring(const string& v, const StringConverterPtr& converter, - const WstringConverterPtr& wConverter) +IceUtil::stringToWstring(const string& v, const StringConverterPtr& converter, const WstringConverterPtr& wConverter) { wstring target; if(!v.empty()) @@ -549,7 +520,7 @@ IceUtil::stringToWstring(const string& v, const StringConverterPtr& converter, { UTF8BufferI buffer; Byte* last = converter->toUTF8(v.data(), v.data() + v.size(), buffer); - tmp = string(reinterpret_cast<const char*>(buffer.getBuffer()), last - buffer.getBuffer()); + buffer.swap(tmp, last); } else { @@ -577,7 +548,9 @@ IceUtil::nativeToUTF8(const string& str, const IceUtil::StringConverterPtr& conv } UTF8BufferI buffer; Byte* last = converter->toUTF8(str.data(), str.data() + str.size(), buffer); - return string(reinterpret_cast<const char*>(buffer.getBuffer()), last - buffer.getBuffer()); + string result; + buffer.swap(result, last); + return result; } string @@ -620,11 +593,7 @@ IceUtilInternal::toUTF16(const vector<Byte>& source) #ifdef ICE_HAS_CODECVT_UTF8 assert(sizeof(Char16T) == sizeof(unsigned short)); -#ifdef ICE_LITTLE_ENDIAN - typedef wstring_convert<codecvt_utf8_utf16<Char16T, 0x10ffff, little_endian>, Char16T> Convert; -#else typedef wstring_convert<codecvt_utf8_utf16<Char16T>, Char16T> Convert; -#endif Convert convert; diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp index 22ced7e61b2..4db36d29e9d 100644 --- a/cpp/src/IceUtil/Unicode.cpp +++ b/cpp/src/IceUtil/Unicode.cpp @@ -26,90 +26,80 @@ using namespace IceUtilInternal; namespace { - // - // Helper class, base never defined - // Usage: WstringHelper<sizeof(wchar_t)>::toUTF8 and fromUTF8. - // - template<size_t wcharSize> - struct WstringHelper - { - static ConversionResult toUTF8( - const wchar_t*& sourceStart, const wchar_t* sourceEnd, - Byte*& targetStart, Byte* targetEnd); +// +// Helper class, base never defined +// Usage: WstringHelper<sizeof(wchar_t)>::toUTF8 and fromUTF8. +// +template<size_t wcharSize> struct WstringHelper; - static ConversionResult fromUTF8( - const Byte*& sourceStart, const Byte* sourceEnd, - wchar_t*& targetStart, wchar_t* targetEnd); - }; - template<> - struct WstringHelper<2> - { - static ConversionResult toUTF8( - const wchar_t*& sourceStart, const wchar_t* sourceEnd, - Byte*& targetStart, Byte* targetEnd) - { - return ConvertUTF16toUTF8( - reinterpret_cast<const UTF16**>(&sourceStart), - reinterpret_cast<const UTF16*>(sourceEnd), - &targetStart, targetEnd, lenientConversion); - } - - static ConversionResult fromUTF8( - const Byte*& sourceStart, const Byte* sourceEnd, - wchar_t*& targetStart, wchar_t* targetEnd) - { - return ConvertUTF8toUTF16( - &sourceStart, sourceEnd, - reinterpret_cast<UTF16**>(&targetStart), - reinterpret_cast<UTF16*>(targetEnd), lenientConversion); - } - }; - - template<> - struct WstringHelper<4> - { - static ConversionResult toUTF8( - const wchar_t*& sourceStart, const wchar_t* sourceEnd, - Byte*& targetStart, Byte* targetEnd) - { - return ConvertUTF32toUTF8( - reinterpret_cast<const UTF32**>(&sourceStart), - reinterpret_cast<const UTF32*>(sourceEnd), - &targetStart, targetEnd, lenientConversion); - } - - static ConversionResult fromUTF8( - const Byte*& sourceStart, const Byte* sourceEnd, - wchar_t*& targetStart, wchar_t* targetEnd) - { - return ConvertUTF8toUTF32( - &sourceStart, sourceEnd, - reinterpret_cast<UTF32**>(&targetStart), - reinterpret_cast<UTF32*>(targetEnd), lenientConversion); - } - }; - - void - checkResult(ConversionResult result) +template<> +struct WstringHelper<2> +{ + static ConversionResult toUTF8( + const wchar_t*& sourceStart, const wchar_t* sourceEnd, + Byte*& targetStart, Byte* targetEnd) + { + return ConvertUTF16toUTF8( + reinterpret_cast<const UTF16**>(&sourceStart), + reinterpret_cast<const UTF16*>(sourceEnd), + &targetStart, targetEnd, lenientConversion); + } + + static ConversionResult fromUTF8( + const Byte*& sourceStart, const Byte* sourceEnd, + wchar_t*& targetStart, wchar_t* targetEnd) + { + return ConvertUTF8toUTF16( + &sourceStart, sourceEnd, + reinterpret_cast<UTF16**>(&targetStart), + reinterpret_cast<UTF16*>(targetEnd), lenientConversion); + } +}; + +template<> +struct WstringHelper<4> +{ + static ConversionResult toUTF8( + const wchar_t*& sourceStart, const wchar_t* sourceEnd, + Byte*& targetStart, Byte* targetEnd) + { + return ConvertUTF32toUTF8( + reinterpret_cast<const UTF32**>(&sourceStart), + reinterpret_cast<const UTF32*>(sourceEnd), + &targetStart, targetEnd, lenientConversion); + } + + static ConversionResult fromUTF8( + const Byte*& sourceStart, const Byte* sourceEnd, + wchar_t*& targetStart, wchar_t* targetEnd) + { + return ConvertUTF8toUTF32( + &sourceStart, sourceEnd, + reinterpret_cast<UTF32**>(&targetStart), + reinterpret_cast<UTF32*>(targetEnd), lenientConversion); + } +}; + +void checkResult(ConversionResult result) +{ + switch (result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "source illegal"); + case targetExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "source illegal"); + default: { - switch (result) - { - case conversionOK: - break; - case sourceExhausted: - throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "source exhausted"); - case sourceIllegal: - throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "source illegal"); - case targetExhausted: - throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "source illegal"); - default: - { - assert(0); - throw IceUtil::IllegalConversionException(__FILE__, __LINE__); - } - } + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); } + } +} } // @@ -117,9 +107,8 @@ namespace // bool -IceUtilInternal::convertUTFWstringToUTF8( - const wchar_t*& sourceStart, const wchar_t* sourceEnd, - Byte*& targetStart, Byte* targetEnd) +IceUtilInternal::convertUTFWstringToUTF8(const wchar_t*& sourceStart, const wchar_t* sourceEnd, + Byte*& targetStart, Byte* targetEnd) { ConversionResult result = WstringHelper<sizeof(wchar_t)>::toUTF8( sourceStart, sourceEnd, targetStart, targetEnd); @@ -135,30 +124,20 @@ IceUtilInternal::convertUTFWstringToUTF8( } } - void -IceUtilInternal::convertUTF8ToUTFWstring(const Byte*& sourceStart, const Byte* sourceEnd, - std::wstring& target) +IceUtilInternal::convertUTF8ToUTFWstring(const Byte*& sourceStart, const Byte* sourceEnd, std::wstring& target) { - // - // Could be reimplemented without this temporary wchar_t buffer - // - size_t size = static_cast<size_t>(sourceEnd - sourceStart); - wchar_t* outBuf = new wchar_t[size]; - wchar_t* targetStart = outBuf; - wchar_t* targetEnd = targetStart + size; - - ConversionResult result = - WstringHelper<sizeof(wchar_t)>::fromUTF8( - sourceStart, sourceEnd, targetStart, targetEnd); - - if(result == conversionOK) - { - std::wstring s(outBuf, static_cast<size_t>(targetStart - outBuf)); - s.swap(target); - } - delete[] outBuf; + size_t sourceSize = static_cast<size_t>(sourceEnd - sourceStart); + + target.resize(sourceSize); + wchar_t* targetStart = const_cast<wchar_t*>(target.data()); + wchar_t* targetEnd = targetStart + sourceSize; + + ConversionResult result = WstringHelper<sizeof(wchar_t)>::fromUTF8(sourceStart, sourceEnd, + targetStart, targetEnd); + checkResult(result); + target.resize(targetStart - target.data()); } void diff --git a/cpp/test/IceUtil/unicode/Client.cpp b/cpp/test/IceUtil/unicode/Client.cpp index b3d3912057b..64c8fe5f0dd 100644 --- a/cpp/test/IceUtil/unicode/Client.cpp +++ b/cpp/test/IceUtil/unicode/Client.cpp @@ -17,6 +17,9 @@ #endif #include <fstream> +// Uncomment to include performance testing +//#define TEST_PERF + using namespace IceUtil; using namespace std; @@ -45,14 +48,14 @@ main(int argc, char* argv[]) #ifdef _WIN32 # ifdef __MINGW32__ - dir = argv[1]; + dir = argv[1]; # else - dir = IceUtil::wstringToString(argv[1]); + dir = IceUtil::wstringToString(argv[1]); # endif - dir += "\\"; + dir += "\\"; #else - dir = argv[1]; - dir += "/"; + dir = argv[1]; + dir += "/"; #endif } @@ -67,226 +70,264 @@ main(int argc, char* argv[]) string wcoeurFile = string("coeur.") + wstringEncoding; { - cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... "; - ifstream is((dir + "coeur.utf8").c_str()); - test(is.good()); - ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); - test(bis.good()); - - int lineNumber = 0; - - do - { - string line; - getline(is, line, '\n'); - lineNumber++; - wstring wline = stringToWstring(line); - - for(size_t i = 0; i < wline.length(); ++i) - { - wchar_t wc = wline[i]; - const char* buffer = reinterpret_cast<char*>(&wc); - for(size_t j = 0; j < sizeof(wchar_t); ++j) - { - test(bis.good()); - char c; - bis.get(c); - if(buffer[j] != c) - { - cerr << "Error at line " << lineNumber << " column " << i << endl; - cerr << "buffer[j] == " << hex << (int)static_cast<unsigned char>(buffer[j]) << endl; - cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; - } - test(buffer[j] == c); - } - } - // - // Skip newline character (Unix-style newline) - // - if(is.good()) - { - for(size_t j = 0; j < sizeof(wchar_t); ++j) - { - test(bis.good()); - char c; - bis.get(c); - } - } - else - { - char c; - bis.get(c); - test(bis.eof()); - } - } while(is.good()); - - cout << "ok" << endl; + cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... "; + ifstream is((dir + "coeur.utf8").c_str()); + test(is.good()); + ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); + test(bis.good()); + + int lineNumber = 0; + + do + { + string line; + getline(is, line, '\n'); + lineNumber++; + wstring wline = stringToWstring(line); + + for(size_t i = 0; i < wline.length(); ++i) + { + wchar_t wc = wline[i]; + const char* buffer = reinterpret_cast<char*>(&wc); + for(size_t j = 0; j < sizeof(wchar_t); ++j) + { + test(bis.good()); + char c; + bis.get(c); + if(buffer[j] != c) + { + cerr << "Error at line " << lineNumber << " column " << i << endl; + cerr << "buffer[j] == " << hex << (int)static_cast<unsigned char>(buffer[j]) << endl; + cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; + } + test(buffer[j] == c); + } + } + // + // Skip newline character (Unix-style newline) + // + if(is.good()) + { + for(size_t j = 0; j < sizeof(wchar_t); ++j) + { + test(bis.good()); + char c; + bis.get(c); + } + } + else + { + char c; + bis.get(c); + test(bis.eof()); + } + } while(is.good()); + + cout << "ok" << endl; + } + + { + cout << "testing wstring (" << wstringEncoding << ") to UTF-8 conversion... "; + + ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); + test(bis.good()); + + wstring ws; + char c; + + do + { + wchar_t wc; + char* buffer = reinterpret_cast<char*>(&wc); + + for(size_t j = 0; j < sizeof(wchar_t); ++j) + { + if(!bis.good()) + { + break; + } + bis.get(c); + buffer[j] = c; + } + + if(bis.good()) + { + ws.push_back(wc); + } + } while(bis.good()); + + string s = wstringToString(ws); + + ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary); + test(nbis.good()); + + for(size_t i = 0; i < s.size(); ++i) + { + test(nbis.good()); + nbis.get(c); + char ci = s[i]; + + if(c != ci) + { + cerr << "i == " << i << endl; + cerr << "ci == " << hex << (int)static_cast<unsigned char>(ci) << endl; + cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; + } + test(c == s[i]); + } + test(!nbis.eof()); + nbis.get(c); + test(nbis.eof()); + + cout << "ok" << endl; } { - cout << "testing wstring (" << wstringEncoding << ") to UTF-8 conversion... "; - - ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); - test(bis.good()); - - wstring ws; - char c; - - do - { - wchar_t wc; - char* buffer = reinterpret_cast<char*>(&wc); - - for(size_t j = 0; j < sizeof(wchar_t); ++j) - { - if(!bis.good()) - { - break; - } - bis.get(c); - buffer[j] = c; - } - - if(bis.good()) - { - ws.push_back(wc); - } - } while(bis.good()); - - string s = wstringToString(ws); - - ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary); - test(nbis.good()); - - for(size_t i = 0; i < s.size(); ++i) - { - test(nbis.good()); - nbis.get(c); - char ci = s[i]; - - if(c != ci) - { - cerr << "i == " << i << endl; - cerr << "ci == " << hex << (int)static_cast<unsigned char>(ci) << endl; - cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; - } - test(c == s[i]); - } - test(!nbis.eof()); - nbis.get(c); - test(nbis.eof()); - - cout << "ok" << endl; + cout << "testing wstring with surrogates... "; + + // + // Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units + // U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units + // + wstring ws = L"\u20ac\u20ac\U00010437"; + + if(sizeof(wchar_t) == 2) + { + test(ws.length() == 4); + } + else + { + test(sizeof(wchar_t) == 4); + test(ws.length() == 3); + } + + // + // The Unicode string converter implementation allocates an initial buffer + // of size max(2 * (sourceEnd - sourceStart), 4). + // With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6 + // bytes of the initial buffer. + + string ns = IceUtil::wstringToString(ws); + + const string good = "\xE2\x82\xAC\xE2\x82\xAC\xF0\x90\x90\xB7"; + test(ns == good); + test(ws == IceUtil::stringToWstring(ns)); + + cout << "ok" << endl; + + cout << "testing IceUtilInternal::toUTF16, toUTF32 and fromUTF32... "; + + vector<Byte> u8 = vector<Byte>(reinterpret_cast<const Byte*>(ns.data()), + reinterpret_cast<const Byte*>(ns.data() + ns.length())); + + vector<unsigned short> u16 = IceUtilInternal::toUTF16(u8); + test(u16.size() == 4); + test(u16[0] == 0x20ac); + test(u16[1] == 0x20ac); + test(u16[2] == 0xd801); + test(u16[3] == 0xdc37); + + vector<unsigned int> u32 = IceUtilInternal::toUTF32(u8); + test(u32.size() == 3); + test(u32[0] == 0x20ac); + test(u32[1] == 0x20ac); + test(u32[2] == 0x10437); + + vector<Byte> nu8 = IceUtilInternal::fromUTF32(u32); + test(nu8 == u8); + + cout << "ok" << endl; } +#ifdef TEST_PERF { - cout << "testing wstring with surrogates... "; - - // - // Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units - // U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units - // - wstring ws = L"\u20ac\u20ac\U00010437"; - - if(sizeof(wchar_t) == 2) - { - test(ws.length() == 4); - } - else - { - test(sizeof(wchar_t) == 4); - test(ws.length() == 3); - } - - // - // The Unicode string converter implementation allocates an initial buffer - // of size max(2 * (sourceEnd - sourceStart), 4). - // With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6 - // bytes of the initial buffer. - - string ns = IceUtil::wstringToString(ws); - - const string good = "\xE2\x82\xAC\xE2\x82\xAC\xF0\x90\x90\xB7"; - test(ns == good); - test(ws == IceUtil::stringToWstring(ns)); - - vector<Byte> u8 = vector<Byte>(reinterpret_cast<const Byte*>(ns.data()), - reinterpret_cast<const Byte*>(ns.data() + ns.length())); - - vector<unsigned short> u16 = IceUtilInternal::toUTF16(u8); - test(u16.size() == 4); - test(u16[0] == 0x20ac); - test(u16[1] == 0x20ac); - test(u16[2] == 0xd801); - test(u16[3] == 0xdc37); - - vector<unsigned int> u32 = IceUtilInternal::toUTF32(u8); - test(u32.size() == 3); - test(u32[0] == 0x20ac); - test(u32[1] == 0x20ac); - test(u32[2] == 0x10437); - - vector<Byte> nu8 = IceUtilInternal::fromUTF32(u32); - test(nu8 == u8); - - cout << "ok" << endl; + // The only performance-critical code is the UnicodeWstringConverter + // that is used whenever we marshal/unmarshal wstrings. + + const long iterations = 5000000; + const wstring ws = L"abcdefghijklmnopqrstuvwxyz+\u20ac\u20ac\U00010437"; + const string ns = IceUtil::wstringToString(ws); + test(IceUtil::stringToWstring(ns) == ws); + + cout << "testing performance with " << iterations << " iterations... "; + + IceUtil::Time toU8 = IceUtil::Time::now(IceUtil::Time::Monotonic); + for(long i = 0; i < iterations; ++i) + { + test(IceUtil::wstringToString(ws) == ns); + } + IceUtil::Time now = IceUtil::Time::now(IceUtil::Time::Monotonic); + toU8 = now - toU8; + + IceUtil::Time fromU8 = now; + for(long i = 0; i < iterations; ++i) + { + test(IceUtil::stringToWstring(ns) == ws); + } + fromU8 = IceUtil::Time::now(IceUtil::Time::Monotonic) - fromU8; + + cout << "toUTF8 = " << toU8 * 1000 << " ms; fromUTF8 = " + << fromU8 * 1000 << " ms ok" << endl; } +#endif + + { - cout << "testing error handling... "; - - // From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string - - string badUTF8[] = { - "\xc3\x28", - "\xa0\xa1", - "\xe2\x28\xa1", - "\xe2\x82\x28", - "\xf0\x28\x8c\xbc", - "\xf0\x90\x28\xbc", - "\xf0\x28\x8c\x28", - "\xf8\xa1\xa1\xa1\xa1", - "\xfc\xa1\xa1\xa1\xa1\xa1", - "" - }; - - for(size_t i = 0; badUTF8[i] != ""; ++i) - { - try - { - wstring ws = IceUtil::stringToWstring(badUTF8[i]); - wcerr << L"Unexpected: " << ws << endl; - test(false); - } - catch(const IceUtil::IllegalConversionException&) - {} - } - - // TODO: need test for bad UTF-32 strings + cout << "testing error handling... "; + + // From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string + + string badUTF8[] = { + "\xc3\x28", + "\xa0\xa1", + "\xe2\x28\xa1", + "\xe2\x82\x28", + "\xf0\x28\x8c\xbc", + "\xf0\x90\x28\xbc", + "\xf0\x28\x8c\x28", + "\xf8\xa1\xa1\xa1\xa1", + "\xfc\xa1\xa1\xa1\xa1\xa1", + "" + }; + + for(size_t i = 0; badUTF8[i] != ""; ++i) + { + try + { + wstring ws = IceUtil::stringToWstring(badUTF8[i]); + wcerr << L"Unexpected: " << ws << endl; + test(false); + } + catch(const IceUtil::IllegalConversionException&) + {} + } + + // TODO: need test for bad UTF-32 strings #ifdef _WIN32 - // Note: for an unknown reason, the conversion works without - // the extra letter (x below) when using codecvt_utf8_utf16. - - wstring badWstring[] = { - wstring(1, wchar_t(0xD800)) + L"x", - wstring(2, wchar_t(0xDB7F)), - L"" - }; - - for(size_t i = 0; badWstring[i] != L""; ++i) - { - try - { - string s = IceUtil::wstringToString(badWstring[i]); - test(false); - } - catch(const IceUtil::IllegalConversionException&) - {} - } + // Note: for an unknown reason, the conversion works without + // the extra letter (x below) when using codecvt_utf8_utf16. + + wstring badWstring[] = { + wstring(1, wchar_t(0xD800)) + L"x", + wstring(2, wchar_t(0xDB7F)), + L"" + }; + + for(size_t i = 0; badWstring[i] != L""; ++i) + { + try + { + string s = IceUtil::wstringToString(badWstring[i]); + test(false); + } + catch(const IceUtil::IllegalConversionException&) + {} + } #endif - cout << "ok" << endl; + cout << "ok" << endl; } return EXIT_SUCCESS; |