diff options
Diffstat (limited to 'cpp/test/IceUtil/unicode/Client.cpp')
-rw-r--r-- | cpp/test/IceUtil/unicode/Client.cpp | 154 |
1 files changed, 123 insertions, 31 deletions
diff --git a/cpp/test/IceUtil/unicode/Client.cpp b/cpp/test/IceUtil/unicode/Client.cpp index e5d05618cec..76d74fbe4b1 100644 --- a/cpp/test/IceUtil/unicode/Client.cpp +++ b/cpp/test/IceUtil/unicode/Client.cpp @@ -17,6 +17,9 @@ #endif #include <fstream> +// Uncomment to include performance testing +//#define TEST_PERF + using namespace IceUtil; using namespace std; @@ -42,21 +45,20 @@ main(int argc, char* argv[]) if(argc > 1) { -#ifdef _WIN32 +#ifdef _WIN32 # ifdef __MINGW32__ dir = argv[1]; # else - dir = IceUtil::wstringToString(argv[1]); + dir = wstringToString(argv[1]); # endif dir += "\\"; #else dir = argv[1]; dir += "/"; -#endif +#endif } - ostringstream os; os << "utf" << sizeof(wchar_t) * 8; #ifdef ICE_LITTLE_ENDIAN @@ -66,25 +68,23 @@ main(int argc, char* argv[]) #endif string wstringEncoding = os.str(); string wcoeurFile = string("coeur.") + wstringEncoding; - + { cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... "; ifstream is((dir + "coeur.utf8").c_str()); test(is.good()); ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); test(bis.good()); - + int lineNumber = 0; - + do { string line; getline(is, line, '\n'); - test(isLegalUTF8Sequence(reinterpret_cast<const Byte*>(line.data()), - reinterpret_cast<const Byte*>(line.data() + line.size()))); lineNumber++; wstring wline = stringToWstring(line); - + for(size_t i = 0; i < wline.length(); ++i) { wchar_t wc = wline[i]; @@ -122,7 +122,7 @@ main(int argc, char* argv[]) test(bis.eof()); } } while(is.good()); - + cout << "ok" << endl; } @@ -139,7 +139,7 @@ main(int argc, char* argv[]) { wchar_t wc; char* buffer = reinterpret_cast<char*>(&wc); - + for(size_t j = 0; j < sizeof(wchar_t); ++j) { if(!bis.good()) @@ -155,16 +155,16 @@ main(int argc, char* argv[]) ws.push_back(wc); } } while(bis.good()); - + string s = wstringToString(ws); - + ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary); test(nbis.good()); - + for(size_t i = 0; i < s.size(); ++i) { test(nbis.good()); - nbis.get(c); + nbis.get(c); char ci = s[i]; if(c != ci) @@ -183,6 +183,97 @@ main(int argc, char* argv[]) } { + cout << "testing wstring with surrogates... "; + + // + // Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units + // U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units + // + wstring ws = L"\u20ac\u20ac\U00010437"; + + if(sizeof(wchar_t) == 2) + { + test(ws.length() == 4); + } + else + { + test(sizeof(wchar_t) == 4); + test(ws.length() == 3); + } + + // + // The Unicode string converter implementation allocates an initial buffer + // of size max(2 * (sourceEnd - sourceStart), 4). + // With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6 + // bytes of the initial buffer. + + string ns = wstringToString(ws); + + const string good = "\xE2\x82\xAC\xE2\x82\xAC\xF0\x90\x90\xB7"; + test(ns == good); + test(ws == stringToWstring(ns)); + + cout << "ok" << endl; + + cout << "testing IceUtilInternal::toUTF16, toUTF32 and fromUTF32... "; + + vector<Byte> u8 = vector<Byte>(reinterpret_cast<const Byte*>(ns.data()), + reinterpret_cast<const Byte*>(ns.data() + ns.length())); + + vector<unsigned short> u16 = IceUtilInternal::toUTF16(u8); + test(u16.size() == 4); + test(u16[0] == 0x20ac); + test(u16[1] == 0x20ac); + test(u16[2] == 0xd801); + test(u16[3] == 0xdc37); + + vector<unsigned int> u32 = IceUtilInternal::toUTF32(u8); + test(u32.size() == 3); + test(u32[0] == 0x20ac); + test(u32[1] == 0x20ac); + test(u32[2] == 0x10437); + + vector<Byte> nu8 = IceUtilInternal::fromUTF32(u32); + test(nu8 == u8); + + cout << "ok" << endl; + } + +#ifdef TEST_PERF + { + // The only performance-critical code is the UnicodeWstringConverter + // that is used whenever we marshal/unmarshal wstrings. + + const long iterations = 5000000; + const wstring ws = L"abcdefghijklmnopqrstuvwxyz+\u20ac\u20ac\U00010437"; + const string ns = wstringToString(ws); + test(stringToWstring(ns) == ws); + + cout << "testing performance with " << iterations << " iterations... "; + + IceUtil::Time toU8 = IceUtil::Time::now(IceUtil::Time::Monotonic); + for(long i = 0; i < iterations; ++i) + { + test(wstringToString(ws) == ns); + } + IceUtil::Time now = IceUtil::Time::now(IceUtil::Time::Monotonic); + toU8 = now - toU8; + + IceUtil::Time fromU8 = now; + for(long i = 0; i < iterations; ++i) + { + test(stringToWstring(ns) == ws); + } + fromU8 = IceUtil::Time::now(IceUtil::Time::Monotonic) - fromU8; + + cout << "toUTF8 = " << toU8 * 1000 << " ms; fromUTF8 = " + << fromU8 * 1000 << " ms ok" << endl; + } + +#endif + + + { cout << "testing error handling... "; // From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string @@ -199,44 +290,45 @@ main(int argc, char* argv[]) "\xfc\xa1\xa1\xa1\xa1\xa1", "" }; - + for(size_t i = 0; badUTF8[i] != ""; ++i) { - test(isLegalUTF8Sequence(reinterpret_cast<const Byte*>(badUTF8[i].data()), - reinterpret_cast<const Byte*>(badUTF8[i].data() + badUTF8[i].size())) == false); - try { - wstring ws = IceUtil::stringToWstring(badUTF8[i]); + wstring ws = stringToWstring(badUTF8[i]); + wcerr << L"Unexpected: " << ws << endl; test(false); } - catch(const IceUtil::IllegalConversionException&) + catch(const IllegalConversionException&) {} - } + } - // TODO: need test for bad UTF-32 strings + // TODO: need test for bad UTF-32 strings #ifdef _WIN32 - - wstring badWstring[] = { - wstring(1, wchar_t(0xD800)), + + // Note: for an unknown reason, the conversion works without + // the extra letter (x below) when using codecvt_utf8_utf16. + + wstring badWstring[] = { + wstring(1, wchar_t(0xD800)) + L"x", wstring(2, wchar_t(0xDB7F)), L"" }; - + for(size_t i = 0; badWstring[i] != L""; ++i) { try { - string s = IceUtil::wstringToString(badWstring[i]); + string s = wstringToString(badWstring[i]); test(false); } - catch(const IceUtil::IllegalConversionException&) + catch(const IllegalConversionException&) {} } #endif cout << "ok" << endl; - + } return EXIT_SUCCESS; } |