summaryrefslogtreecommitdiff
path: root/cpp/test/IceUtil/unicode/Client.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cpp/test/IceUtil/unicode/Client.cpp')
-rw-r--r--cpp/test/IceUtil/unicode/Client.cpp154
1 files changed, 123 insertions, 31 deletions
diff --git a/cpp/test/IceUtil/unicode/Client.cpp b/cpp/test/IceUtil/unicode/Client.cpp
index e5d05618cec..76d74fbe4b1 100644
--- a/cpp/test/IceUtil/unicode/Client.cpp
+++ b/cpp/test/IceUtil/unicode/Client.cpp
@@ -17,6 +17,9 @@
#endif
#include <fstream>
+// Uncomment to include performance testing
+//#define TEST_PERF
+
using namespace IceUtil;
using namespace std;
@@ -42,21 +45,20 @@ main(int argc, char* argv[])
if(argc > 1)
{
-#ifdef _WIN32
+#ifdef _WIN32
# ifdef __MINGW32__
dir = argv[1];
# else
- dir = IceUtil::wstringToString(argv[1]);
+ dir = wstringToString(argv[1]);
# endif
dir += "\\";
#else
dir = argv[1];
dir += "/";
-#endif
+#endif
}
-
ostringstream os;
os << "utf" << sizeof(wchar_t) * 8;
#ifdef ICE_LITTLE_ENDIAN
@@ -66,25 +68,23 @@ main(int argc, char* argv[])
#endif
string wstringEncoding = os.str();
string wcoeurFile = string("coeur.") + wstringEncoding;
-
+
{
cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... ";
ifstream is((dir + "coeur.utf8").c_str());
test(is.good());
ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary);
test(bis.good());
-
+
int lineNumber = 0;
-
+
do
{
string line;
getline(is, line, '\n');
- test(isLegalUTF8Sequence(reinterpret_cast<const Byte*>(line.data()),
- reinterpret_cast<const Byte*>(line.data() + line.size())));
lineNumber++;
wstring wline = stringToWstring(line);
-
+
for(size_t i = 0; i < wline.length(); ++i)
{
wchar_t wc = wline[i];
@@ -122,7 +122,7 @@ main(int argc, char* argv[])
test(bis.eof());
}
} while(is.good());
-
+
cout << "ok" << endl;
}
@@ -139,7 +139,7 @@ main(int argc, char* argv[])
{
wchar_t wc;
char* buffer = reinterpret_cast<char*>(&wc);
-
+
for(size_t j = 0; j < sizeof(wchar_t); ++j)
{
if(!bis.good())
@@ -155,16 +155,16 @@ main(int argc, char* argv[])
ws.push_back(wc);
}
} while(bis.good());
-
+
string s = wstringToString(ws);
-
+
ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary);
test(nbis.good());
-
+
for(size_t i = 0; i < s.size(); ++i)
{
test(nbis.good());
- nbis.get(c);
+ nbis.get(c);
char ci = s[i];
if(c != ci)
@@ -183,6 +183,97 @@ main(int argc, char* argv[])
}
{
+ cout << "testing wstring with surrogates... ";
+
+ //
+ // Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units
+ // U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units
+ //
+ wstring ws = L"\u20ac\u20ac\U00010437";
+
+ if(sizeof(wchar_t) == 2)
+ {
+ test(ws.length() == 4);
+ }
+ else
+ {
+ test(sizeof(wchar_t) == 4);
+ test(ws.length() == 3);
+ }
+
+ //
+ // The Unicode string converter implementation allocates an initial buffer
+ // of size max(2 * (sourceEnd - sourceStart), 4).
+ // With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6
+ // bytes of the initial buffer.
+
+ string ns = wstringToString(ws);
+
+ const string good = "\xE2\x82\xAC\xE2\x82\xAC\xF0\x90\x90\xB7";
+ test(ns == good);
+ test(ws == stringToWstring(ns));
+
+ cout << "ok" << endl;
+
+ cout << "testing IceUtilInternal::toUTF16, toUTF32 and fromUTF32... ";
+
+ vector<Byte> u8 = vector<Byte>(reinterpret_cast<const Byte*>(ns.data()),
+ reinterpret_cast<const Byte*>(ns.data() + ns.length()));
+
+ vector<unsigned short> u16 = IceUtilInternal::toUTF16(u8);
+ test(u16.size() == 4);
+ test(u16[0] == 0x20ac);
+ test(u16[1] == 0x20ac);
+ test(u16[2] == 0xd801);
+ test(u16[3] == 0xdc37);
+
+ vector<unsigned int> u32 = IceUtilInternal::toUTF32(u8);
+ test(u32.size() == 3);
+ test(u32[0] == 0x20ac);
+ test(u32[1] == 0x20ac);
+ test(u32[2] == 0x10437);
+
+ vector<Byte> nu8 = IceUtilInternal::fromUTF32(u32);
+ test(nu8 == u8);
+
+ cout << "ok" << endl;
+ }
+
+#ifdef TEST_PERF
+ {
+ // The only performance-critical code is the UnicodeWstringConverter
+ // that is used whenever we marshal/unmarshal wstrings.
+
+ const long iterations = 5000000;
+ const wstring ws = L"abcdefghijklmnopqrstuvwxyz+\u20ac\u20ac\U00010437";
+ const string ns = wstringToString(ws);
+ test(stringToWstring(ns) == ws);
+
+ cout << "testing performance with " << iterations << " iterations... ";
+
+ IceUtil::Time toU8 = IceUtil::Time::now(IceUtil::Time::Monotonic);
+ for(long i = 0; i < iterations; ++i)
+ {
+ test(wstringToString(ws) == ns);
+ }
+ IceUtil::Time now = IceUtil::Time::now(IceUtil::Time::Monotonic);
+ toU8 = now - toU8;
+
+ IceUtil::Time fromU8 = now;
+ for(long i = 0; i < iterations; ++i)
+ {
+ test(stringToWstring(ns) == ws);
+ }
+ fromU8 = IceUtil::Time::now(IceUtil::Time::Monotonic) - fromU8;
+
+ cout << "toUTF8 = " << toU8 * 1000 << " ms; fromUTF8 = "
+ << fromU8 * 1000 << " ms ok" << endl;
+ }
+
+#endif
+
+
+ {
cout << "testing error handling... ";
// From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string
@@ -199,44 +290,45 @@ main(int argc, char* argv[])
"\xfc\xa1\xa1\xa1\xa1\xa1",
""
};
-
+
for(size_t i = 0; badUTF8[i] != ""; ++i)
{
- test(isLegalUTF8Sequence(reinterpret_cast<const Byte*>(badUTF8[i].data()),
- reinterpret_cast<const Byte*>(badUTF8[i].data() + badUTF8[i].size())) == false);
-
try
{
- wstring ws = IceUtil::stringToWstring(badUTF8[i]);
+ wstring ws = stringToWstring(badUTF8[i]);
+ wcerr << L"Unexpected: " << ws << endl;
test(false);
}
- catch(const IceUtil::IllegalConversionException&)
+ catch(const IllegalConversionException&)
{}
- }
+ }
- // TODO: need test for bad UTF-32 strings
+ // TODO: need test for bad UTF-32 strings
#ifdef _WIN32
-
- wstring badWstring[] = {
- wstring(1, wchar_t(0xD800)),
+
+ // Note: for an unknown reason, the conversion works without
+ // the extra letter (x below) when using codecvt_utf8_utf16.
+
+ wstring badWstring[] = {
+ wstring(1, wchar_t(0xD800)) + L"x",
wstring(2, wchar_t(0xDB7F)),
L""
};
-
+
for(size_t i = 0; badWstring[i] != L""; ++i)
{
try
{
- string s = IceUtil::wstringToString(badWstring[i]);
+ string s = wstringToString(badWstring[i]);
test(false);
}
- catch(const IceUtil::IllegalConversionException&)
+ catch(const IllegalConversionException&)
{}
}
#endif
cout << "ok" << endl;
-
+
}
return EXIT_SUCCESS;
}