diff options
author | Bernard Normier <bernard@zeroc.com> | 2016-06-03 14:11:48 -0400 |
---|---|---|
committer | Bernard Normier <bernard@zeroc.com> | 2016-06-03 14:11:48 -0400 |
commit | 48b141f8f6a8603f6b13a85bde002f6b480832f7 (patch) | |
tree | 41cf4ae281ad96e13216ea208a6579aa442844d3 /cpp/test | |
parent | Fixed 3.7a2 version in msbuild props files (diff) | |
download | ice-48b141f8f6a8603f6b13a85bde002f6b480832f7.tar.bz2 ice-48b141f8f6a8603f6b13a85bde002f6b480832f7.tar.xz ice-48b141f8f6a8603f6b13a85bde002f6b480832f7.zip |
Refactored string converters and use codecvt_utf8[_utf16] when
available instead of ConvertUTF
Diffstat (limited to 'cpp/test')
-rw-r--r-- | cpp/test/Ice/logger/Client2.cpp | 4 | ||||
-rw-r--r-- | cpp/test/Ice/logger/Client4.cpp | 4 | ||||
-rw-r--r-- | cpp/test/Ice/stringConverter/Client.cpp | 28 | ||||
-rw-r--r-- | cpp/test/Ice/stringConverter/Server.cpp | 7 | ||||
-rw-r--r-- | cpp/test/IceUtil/unicode/Client.cpp | 365 |
5 files changed, 216 insertions, 192 deletions
diff --git a/cpp/test/Ice/logger/Client2.cpp b/cpp/test/Ice/logger/Client2.cpp index b80603b47cd..d893e899ee6 100644 --- a/cpp/test/Ice/logger/Client2.cpp +++ b/cpp/test/Ice/logger/Client2.cpp @@ -42,9 +42,9 @@ main(int argc, char* argv[]) // 28605 == ISO 8859-15 codepage // SetConsoleOutputCP(28605); - IceUtil::setProcessStringConverter(new IceUtil::WindowsStringConverter(1250)); + IceUtil::setProcessStringConverter(IceUtil::createWindowsStringConverter(1250)); #else - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>("ISO8859-15")); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>("ISO8859-15")); #endif Ice::InitializationData id; id.properties = Ice::createProperties(); diff --git a/cpp/test/Ice/logger/Client4.cpp b/cpp/test/Ice/logger/Client4.cpp index 81675dc6d6f..a8307816269 100644 --- a/cpp/test/Ice/logger/Client4.cpp +++ b/cpp/test/Ice/logger/Client4.cpp @@ -41,9 +41,9 @@ main(int argc, char* argv[]) // int cp = GetConsoleOutputCP(); SetConsoleOutputCP(CP_UTF8); - IceUtil::setProcessStringConverter(new IceUtil::WindowsStringConverter(28605)); + IceUtil::setProcessStringConverter(IceUtil::createWindowsStringConverter(28605)); #else - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>("ISO8859-15")); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>("ISO8859-15")); #endif Ice::InitializationData id; id.properties = Ice::createProperties(); diff --git a/cpp/test/Ice/stringConverter/Client.cpp b/cpp/test/Ice/stringConverter/Client.cpp index a4626b615e4..c9fb040ef01 100644 --- a/cpp/test/Ice/stringConverter/Client.cpp +++ b/cpp/test/Ice/stringConverter/Client.cpp @@ -48,19 +48,19 @@ main(int argc, char* argv[]) // // 28605 == ISO 8859-15 codepage // - IceUtil::setProcessStringConverter(new IceUtil::WindowsStringConverter(28605)); + IceUtil::setProcessStringConverter(IceUtil::createWindowsStringConverter(28605)); useIconv = false; #elif defined(__hpux) if(useLocale) { - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>); } else { - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>("iso815")); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>("iso815")); } - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("ucs4")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("ucs4")); #elif defined(_AIX) @@ -68,46 +68,46 @@ main(int argc, char* argv[]) if(useLocale) { - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>()); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>()); } else { - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>("ISO8859-15")); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>("ISO8859-15")); } if(sizeof(wchar_t) == 4) { - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("UTF-32")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("UTF-32")); } else { - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("UTF-16")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("UTF-16")); } #else if(useLocale) { - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>()); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>()); } else { - IceUtil::setProcessStringConverter(new IceUtil::IconvStringConverter<char>("ISO8859-15")); + IceUtil::setProcessStringConverter(IceUtil::createIconvStringConverter<char>("ISO8859-15")); } if(sizeof(wchar_t) == 4) { # ifdef ICE_BIG_ENDIAN - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("UTF-32BE")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("UTF-32BE")); # else - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("UTF-32LE")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("UTF-32LE")); # endif } else { # ifdef ICE_BIG_ENDIAN - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("UTF-16BE")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("UTF-16BE")); # else - IceUtil::setProcessWstringConverter(new IceUtil::IconvStringConverter<wchar_t>("UTF-16LE")); + IceUtil::setProcessWstringConverter(IceUtil::createIconvStringConverter<wchar_t>("UTF-16LE")); # endif } #endif diff --git a/cpp/test/Ice/stringConverter/Server.cpp b/cpp/test/Ice/stringConverter/Server.cpp index da9dae3f8a5..ce592e19ba9 100644 --- a/cpp/test/Ice/stringConverter/Server.cpp +++ b/cpp/test/Ice/stringConverter/Server.cpp @@ -24,13 +24,6 @@ public: virtual wstring widen(ICE_IN(string) msg, const Ice::Current&) { - const Ice::Byte* cmsg = reinterpret_cast<const Ice::Byte*>(msg.c_str()); - - if(!IceUtil::isLegalUTF8Sequence(cmsg, cmsg + msg.size())) - { - throw Test::BadEncodingException(); - } - return IceUtil::stringToWstring(msg, IceUtil::getProcessStringConverter(), IceUtil::getProcessWstringConverter()); } diff --git a/cpp/test/IceUtil/unicode/Client.cpp b/cpp/test/IceUtil/unicode/Client.cpp index 3a48db3da86..b340252d010 100644 --- a/cpp/test/IceUtil/unicode/Client.cpp +++ b/cpp/test/IceUtil/unicode/Client.cpp @@ -42,21 +42,20 @@ main(int argc, char* argv[]) if(argc > 1) { -#ifdef _WIN32 +#ifdef _WIN32 # ifdef __MINGW32__ - dir = argv[1]; + dir = argv[1]; # else - dir = IceUtil::wstringToString(argv[1]); + dir = IceUtil::wstringToString(argv[1]); # endif - dir += "\\"; + dir += "\\"; #else - dir = argv[1]; - dir += "/"; -#endif + dir = argv[1]; + dir += "/"; +#endif } - ostringstream os; os << "utf" << sizeof(wchar_t) * 8; #ifdef ICE_LITTLE_ENDIAN @@ -66,177 +65,209 @@ main(int argc, char* argv[]) #endif string wstringEncoding = os.str(); string wcoeurFile = string("coeur.") + wstringEncoding; - + { - cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... "; - ifstream is((dir + "coeur.utf8").c_str()); - test(is.good()); - ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); - test(bis.good()); - - int lineNumber = 0; - - do - { - string line; - getline(is, line, '\n'); - test(isLegalUTF8Sequence(reinterpret_cast<const Byte*>(line.data()), - reinterpret_cast<const Byte*>(line.data() + line.size()))); - lineNumber++; - wstring wline = stringToWstring(line); - - for(size_t i = 0; i < wline.length(); ++i) - { - wchar_t wc = wline[i]; - const char* buffer = reinterpret_cast<char*>(&wc); - for(size_t j = 0; j < sizeof(wchar_t); ++j) - { - test(bis.good()); - char c; - bis.get(c); - if(buffer[j] != c) - { - cerr << "Error at line " << lineNumber << " column " << i << endl; - cerr << "buffer[j] == " << hex << (int)static_cast<unsigned char>(buffer[j]) << endl; - cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; - } - test(buffer[j] == c); - } - } - // - // Skip newline character (Unix-style newline) - // - if(is.good()) - { - for(size_t j = 0; j < sizeof(wchar_t); ++j) - { - test(bis.good()); - char c; - bis.get(c); - } - } - else - { - char c; - bis.get(c); - test(bis.eof()); - } - } while(is.good()); - - cout << "ok" << endl; + cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... "; + ifstream is((dir + "coeur.utf8").c_str()); + test(is.good()); + ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); + test(bis.good()); + + int lineNumber = 0; + + do + { + string line; + getline(is, line, '\n'); + lineNumber++; + wstring wline = stringToWstring(line); + + for(size_t i = 0; i < wline.length(); ++i) + { + wchar_t wc = wline[i]; + const char* buffer = reinterpret_cast<char*>(&wc); + for(size_t j = 0; j < sizeof(wchar_t); ++j) + { + test(bis.good()); + char c; + bis.get(c); + if(buffer[j] != c) + { + cerr << "Error at line " << lineNumber << " column " << i << endl; + cerr << "buffer[j] == " << hex << (int)static_cast<unsigned char>(buffer[j]) << endl; + cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; + } + test(buffer[j] == c); + } + } + // + // Skip newline character (Unix-style newline) + // + if(is.good()) + { + for(size_t j = 0; j < sizeof(wchar_t); ++j) + { + test(bis.good()); + char c; + bis.get(c); + } + } + else + { + char c; + bis.get(c); + test(bis.eof()); + } + } while(is.good()); + + cout << "ok" << endl; + } + + { + cout << "testing wstring (" << wstringEncoding << ") to UTF-8 conversion... "; + + ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); + test(bis.good()); + + wstring ws; + char c; + + do + { + wchar_t wc; + char* buffer = reinterpret_cast<char*>(&wc); + + for(size_t j = 0; j < sizeof(wchar_t); ++j) + { + if(!bis.good()) + { + break; + } + bis.get(c); + buffer[j] = c; + } + + if(bis.good()) + { + ws.push_back(wc); + } + } while(bis.good()); + + string s = wstringToString(ws); + + ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary); + test(nbis.good()); + + for(size_t i = 0; i < s.size(); ++i) + { + test(nbis.good()); + nbis.get(c); + char ci = s[i]; + + if(c != ci) + { + cerr << "i == " << i << endl; + cerr << "ci == " << hex << (int)static_cast<unsigned char>(ci) << endl; + cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; + } + test(c == s[i]); + } + test(!nbis.eof()); + nbis.get(c); + test(nbis.eof()); + + cout << "ok" << endl; } { - cout << "testing wstring (" << wstringEncoding << ") to UTF-8 conversion... "; - - ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary); - test(bis.good()); - - wstring ws; - char c; - - do - { - wchar_t wc; - char* buffer = reinterpret_cast<char*>(&wc); - - for(size_t j = 0; j < sizeof(wchar_t); ++j) - { - if(!bis.good()) - { - break; - } - bis.get(c); - buffer[j] = c; - } - - if(bis.good()) - { - ws.push_back(wc); - } - } while(bis.good()); - - string s = wstringToString(ws); - - ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary); - test(nbis.good()); - - for(size_t i = 0; i < s.size(); ++i) - { - test(nbis.good()); - nbis.get(c); - char ci = s[i]; - - if(c != ci) - { - cerr << "i == " << i << endl; - cerr << "ci == " << hex << (int)static_cast<unsigned char>(ci) << endl; - cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl; - } - test(c == s[i]); - } - test(!nbis.eof()); - nbis.get(c); - test(nbis.eof()); - - cout << "ok" << endl; + cout << "testing wstring with surrogates... "; + + // + // Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units + // U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units + // + wstring ws = L"\u20ac\u20ac\U00010437"; + + if(sizeof(wchar_t) == 2) + { + test(ws.length() == 4); + } + else + { + test(sizeof(wchar_t) == 4); + test(ws.length() == 3); + } + + // + // The Unicode string converter implementation allocates an initial buffer + // of size max(2 * (sourceEnd - sourceStart), 4). + // With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6 + // bytes of the initial buffer. + + string ns = IceUtil::wstringToString(ws); + + test(ns.length() == 10); + test(ws == IceUtil::stringToWstring(ns)); + + cout << "ok" << endl; } { - cout << "testing error handling... "; - - // From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string - - string badUTF8[] = { - "\xc3\x28", - "\xa0\xa1", - "\xe2\x28\xa1", - "\xe2\x82\x28", - "\xf0\x28\x8c\xbc", - "\xf0\x90\x28\xbc", - "\xf0\x28\x8c\x28", - "\xf8\xa1\xa1\xa1\xa1", - "\xfc\xa1\xa1\xa1\xa1\xa1", - "" - }; - - for(size_t i = 0; badUTF8[i] != ""; ++i) - { - test(isLegalUTF8Sequence(reinterpret_cast<const Byte*>(badUTF8[i].data()), - reinterpret_cast<const Byte*>(badUTF8[i].data() + badUTF8[i].size())) == false); - - try - { - wstring ws = IceUtil::stringToWstring(badUTF8[i]); - test(false); - } - catch(const IceUtil::IllegalConversionException&) - {} - } + cout << "testing error handling... "; + + // From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string + + string badUTF8[] = { + "\xc3\x28", + "\xa0\xa1", + "\xe2\x28\xa1", + "\xe2\x82\x28", + "\xf0\x28\x8c\xbc", + "\xf0\x90\x28\xbc", + "\xf0\x28\x8c\x28", + "\xf8\xa1\xa1\xa1\xa1", + "\xfc\xa1\xa1\xa1\xa1\xa1", + "" + }; + + for(size_t i = 0; badUTF8[i] != ""; ++i) + { + try + { + wstring ws = IceUtil::stringToWstring(badUTF8[i]); + wcerr << L"Unexpected: " << ws << endl; + test(false); + } + catch(const IceUtil::IllegalConversionException&) + {} + } // TODO: need test for bad UTF-32 strings #ifdef _WIN32 - - wstring badWstring[] = { - wstring(1, wchar_t(0xD800)), - wstring(2, wchar_t(0xDB7F)), - L"" - }; - - for(size_t i = 0; badWstring[i] != L""; ++i) - { - try - { - string s = IceUtil::wstringToString(badWstring[i]); - test(false); - } - catch(const IceUtil::IllegalConversionException&) - {} - } + + // Note: for an unknown reason, the conversion works without + // the extra letter (x below) when using codecvt_utf8_utf16. + + wstring badWstring[] = { + wstring(1, wchar_t(0xD800)) + L"x", + wstring(2, wchar_t(0xDB7F)), + L"" + }; + + for(size_t i = 0; badWstring[i] != L""; ++i) + { + try + { + string s = IceUtil::wstringToString(badWstring[i]); + test(false); + } + catch(const IceUtil::IllegalConversionException&) + {} + } #endif - cout << "ok" << endl; - + cout << "ok" << endl; + } return EXIT_SUCCESS; } |