1 files changed, 253 insertions, 212 deletions
diff --git a/cpp/test/IceUtil/unicode/Client.cpp b/cpp/test/IceUtil/unicode/Client.cpp
index b3d3912057b..64c8fe5f0dd 100644
--- a/cpp/test/IceUtil/unicode/Client.cpp
+++ b/cpp/test/IceUtil/unicode/Client.cpp
@@ -17,6 +17,9 @@
 #endif
 #include <fstream>
 
+// Uncomment to include performance testing
+//#define TEST_PERF
+
 using namespace IceUtil;
 using namespace std;
 
@@ -45,14 +48,14 @@ main(int argc, char* argv[])
 #ifdef _WIN32
 
 #   ifdef __MINGW32__
-	dir = argv[1];
+        dir = argv[1];
 #   else
-	dir = IceUtil::wstringToString(argv[1]);
+        dir = IceUtil::wstringToString(argv[1]);
 #   endif
-	dir += "\\";
+        dir += "\\";
 #else
-	dir = argv[1];
-	dir += "/";
+        dir = argv[1];
+        dir += "/";
 #endif
     }
 
@@ -67,226 +70,264 @@ main(int argc, char* argv[])
     string wcoeurFile = string("coeur.") + wstringEncoding;
 
     {
-	cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... ";
-	ifstream is((dir + "coeur.utf8").c_str());
-	test(is.good());
-	ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary);
-	test(bis.good());
-
-	int lineNumber = 0;
-
-	do
-	{
-	    string line;
-	    getline(is, line, '\n');
-	    lineNumber++;
-	    wstring wline = stringToWstring(line);
-
-	    for(size_t i = 0; i < wline.length(); ++i)
-	    {
-		wchar_t wc = wline[i];
-		const char* buffer = reinterpret_cast<char*>(&wc);
-		for(size_t j = 0; j < sizeof(wchar_t); ++j)
-		{
-		    test(bis.good());
-		    char c;
-		    bis.get(c);
-		    if(buffer[j] != c)
-		    {
-			cerr << "Error at line " << lineNumber << " column " << i << endl;
-			cerr << "buffer[j] == " << hex << (int)static_cast<unsigned char>(buffer[j]) << endl;
-			cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl;
-		    }
-		    test(buffer[j] == c);
-		}
-	    }
-	    //
-	    // Skip newline character (Unix-style newline)
-	    //
-	    if(is.good())
-	    {
-		for(size_t j = 0; j < sizeof(wchar_t); ++j)
-		{
-		    test(bis.good());
-		    char c;
-		    bis.get(c);
-		}
-	    }
-	    else
-	    {
-		char c;
-		bis.get(c);
-		test(bis.eof());
-	    }
-	} while(is.good());
-
-	cout << "ok" << endl;
+        cout << "testing UTF-8 to wstring (" << wstringEncoding << ") conversion... ";
+        ifstream is((dir + "coeur.utf8").c_str());
+        test(is.good());
+        ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary);
+        test(bis.good());
+
+        int lineNumber = 0;
+
+        do
+        {
+            string line;
+            getline(is, line, '\n');
+            lineNumber++;
+            wstring wline = stringToWstring(line);
+
+            for(size_t i = 0; i < wline.length(); ++i)
+            {
+                wchar_t wc = wline[i];
+                const char* buffer = reinterpret_cast<char*>(&wc);
+                for(size_t j = 0; j < sizeof(wchar_t); ++j)
+                {
+                    test(bis.good());
+                    char c;
+                    bis.get(c);
+                    if(buffer[j] != c)
+                    {
+                        cerr << "Error at line " << lineNumber << " column " << i << endl;
+                        cerr << "buffer[j] == " << hex << (int)static_cast<unsigned char>(buffer[j]) << endl;
+                        cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl;
+                    }
+                    test(buffer[j] == c);
+                }
+            }
+            //
+            // Skip newline character (Unix-style newline)
+            //
+            if(is.good())
+            {
+                for(size_t j = 0; j < sizeof(wchar_t); ++j)
+                {
+                    test(bis.good());
+                    char c;
+                    bis.get(c);
+                }
+            }
+            else
+            {
+                char c;
+                bis.get(c);
+                test(bis.eof());
+            }
+        } while(is.good());
+
+        cout << "ok" << endl;
+    }
+
+    {
+        cout << "testing wstring (" << wstringEncoding << ") to UTF-8 conversion... ";
+
+        ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary);
+        test(bis.good());
+
+        wstring ws;
+        char c;
+
+        do
+        {
+            wchar_t wc;
+            char* buffer = reinterpret_cast<char*>(&wc);
+
+            for(size_t j = 0; j < sizeof(wchar_t); ++j)
+            {
+                if(!bis.good())
+                {
+                    break;
+                }
+                bis.get(c);
+                buffer[j] = c;
+            }
+
+            if(bis.good())
+            {
+                ws.push_back(wc);
+            }
+        } while(bis.good());
+
+        string s = wstringToString(ws);
+
+        ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary);
+        test(nbis.good());
+
+        for(size_t i = 0; i < s.size(); ++i)
+        {
+            test(nbis.good());
+            nbis.get(c);
+            char ci = s[i];
+
+            if(c != ci)
+            {
+                cerr << "i == " << i << endl;
+                cerr << "ci == " << hex << (int)static_cast<unsigned char>(ci) << endl;
+                cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl;
+            }
+            test(c == s[i]);
+        }
+        test(!nbis.eof());
+        nbis.get(c);
+        test(nbis.eof());
+
+        cout << "ok" << endl;
     }
 
     {
-	cout << "testing wstring (" << wstringEncoding << ") to UTF-8 conversion... ";
-
-	ifstream bis((dir + wcoeurFile).c_str(), ios_base::binary);
-	test(bis.good());
-
-	wstring ws;
-	char c;
-
-	do
-	{
-	    wchar_t wc;
-	    char* buffer = reinterpret_cast<char*>(&wc);
-
-	    for(size_t j = 0; j < sizeof(wchar_t); ++j)
-	    {
-		if(!bis.good())
-		{
-		    break;
-		}
-		bis.get(c);
-		buffer[j] = c;
-	    }
-
-	    if(bis.good())
-	    {
-		ws.push_back(wc);
-	    }
-	} while(bis.good());
-
-	string s = wstringToString(ws);
-
-	ifstream nbis((dir + "coeur.utf8").c_str(), ios_base::binary);
-	test(nbis.good());
-
-	for(size_t i = 0; i < s.size(); ++i)
-	{
-	    test(nbis.good());
-	    nbis.get(c);
-	    char ci = s[i];
-
-	    if(c != ci)
-	    {
-		cerr << "i == " << i << endl;
-		cerr << "ci == " << hex << (int)static_cast<unsigned char>(ci) << endl;
-		cerr << "c == " << hex << (int)static_cast<unsigned char>(c) << endl;
-	    }
-	    test(c == s[i]);
-	}
-	test(!nbis.eof());
-	nbis.get(c);
-	test(nbis.eof());
-
-	cout << "ok" << endl;
+        cout << "testing wstring with surrogates... ";
+
+        //
+        // Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units
+        // U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units
+        //
+        wstring ws = L"\u20ac\u20ac\U00010437";
+
+        if(sizeof(wchar_t) == 2)
+        {
+            test(ws.length() == 4);
+        }
+        else
+        {
+            test(sizeof(wchar_t) == 4);
+            test(ws.length() == 3);
+        }
+
+        //
+        // The Unicode string converter implementation allocates an initial buffer
+        // of size max(2 * (sourceEnd - sourceStart), 4).
+        // With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6
+        // bytes of the initial buffer.
+
+        string ns = IceUtil::wstringToString(ws);
+
+        const string good = "\xE2\x82\xAC\xE2\x82\xAC\xF0\x90\x90\xB7";
+        test(ns == good);
+        test(ws == IceUtil::stringToWstring(ns));
+
+        cout << "ok" << endl;
+
+        cout << "testing IceUtilInternal::toUTF16, toUTF32 and fromUTF32... ";
+
+        vector<Byte> u8 = vector<Byte>(reinterpret_cast<const Byte*>(ns.data()),
+                                       reinterpret_cast<const Byte*>(ns.data() + ns.length()));
+
+        vector<unsigned short> u16 = IceUtilInternal::toUTF16(u8);
+        test(u16.size() == 4);
+        test(u16[0] == 0x20ac);
+        test(u16[1] == 0x20ac);
+        test(u16[2] == 0xd801);
+        test(u16[3] == 0xdc37);
+
+        vector<unsigned int> u32 = IceUtilInternal::toUTF32(u8);
+        test(u32.size() == 3);
+        test(u32[0] == 0x20ac);
+        test(u32[1] == 0x20ac);
+        test(u32[2] == 0x10437);
+
+        vector<Byte> nu8 = IceUtilInternal::fromUTF32(u32);
+        test(nu8 == u8);
+
+        cout << "ok" << endl;
     }
 
+#ifdef TEST_PERF
     {
-	cout << "testing wstring with surrogates... ";
-
-	//
-	// Euro sign (U+20AC) is encoded with 1 UTF-16 code unit, and 3 UTF-8 code units
-	// U+10437 is a Deseret character, encoded with 2 UTF-16 code units, and 4 UTF-8 code units
-	//
-	wstring ws = L"\u20ac\u20ac\U00010437";
-
-	if(sizeof(wchar_t) == 2)
-	{
-	    test(ws.length() == 4);
-	}
-	else
-	{
-	    test(sizeof(wchar_t) == 4);
-	    test(ws.length() == 3);
-	}
-
-	//
-	// The Unicode string converter implementation allocates an initial buffer
-	// of size max(2 * (sourceEnd - sourceStart), 4).
-	// With UTF-16 encoding, that's 8 and the first 2 euros will use the first 6
-	// bytes of the initial buffer.
-
-	string ns = IceUtil::wstringToString(ws);
-
-	const string good = "\xE2\x82\xAC\xE2\x82\xAC\xF0\x90\x90\xB7";
-	test(ns == good);
-	test(ws == IceUtil::stringToWstring(ns));
-
-	vector<Byte> u8 = vector<Byte>(reinterpret_cast<const Byte*>(ns.data()),
-				       reinterpret_cast<const Byte*>(ns.data() + ns.length()));
-
-	vector<unsigned short> u16 = IceUtilInternal::toUTF16(u8);
-	test(u16.size() == 4);
-	test(u16[0] == 0x20ac);
-	test(u16[1] == 0x20ac);
-	test(u16[2] == 0xd801);
-	test(u16[3] == 0xdc37);
-
-	vector<unsigned int> u32 = IceUtilInternal::toUTF32(u8);
-	test(u32.size() == 3);
-	test(u32[0] == 0x20ac);
-	test(u32[1] == 0x20ac);
-	test(u32[2] == 0x10437);
-
-	vector<Byte> nu8 = IceUtilInternal::fromUTF32(u32);
-	test(nu8 == u8);
-
-	cout << "ok" << endl;
+        // The only performance-critical code is the UnicodeWstringConverter
+        // that is used whenever we marshal/unmarshal wstrings.
+
+        const long iterations = 5000000;
+        const wstring ws = L"abcdefghijklmnopqrstuvwxyz+\u20ac\u20ac\U00010437";
+        const string ns = IceUtil::wstringToString(ws);
+        test(IceUtil::stringToWstring(ns) == ws);
+
+        cout << "testing performance with " << iterations << " iterations... ";
+
+        IceUtil::Time toU8 = IceUtil::Time::now(IceUtil::Time::Monotonic);
+        for(long i = 0; i < iterations; ++i)
+        {
+            test(IceUtil::wstringToString(ws) == ns);
+        }
+        IceUtil::Time now = IceUtil::Time::now(IceUtil::Time::Monotonic);
+        toU8 = now - toU8;
+
+        IceUtil::Time fromU8 = now;
+        for(long i = 0; i < iterations; ++i)
+        {
+            test(IceUtil::stringToWstring(ns) == ws);
+        }
+        fromU8 = IceUtil::Time::now(IceUtil::Time::Monotonic) - fromU8;
+
+        cout << "toUTF8 = " << toU8 * 1000 << " ms; fromUTF8 = "
+             << fromU8 * 1000 << " ms ok" << endl;
     }
 
+#endif
+
+
     {
-	cout << "testing error handling... ";
-
-	// From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string
-
-	string badUTF8[] = {
-	    "\xc3\x28",
-	    "\xa0\xa1",
-	    "\xe2\x28\xa1",
-	    "\xe2\x82\x28",
-	    "\xf0\x28\x8c\xbc",
-	    "\xf0\x90\x28\xbc",
-	    "\xf0\x28\x8c\x28",
-	    "\xf8\xa1\xa1\xa1\xa1",
-	    "\xfc\xa1\xa1\xa1\xa1\xa1",
-	    ""
-	};
-
-	for(size_t i = 0; badUTF8[i] != ""; ++i)
-	{
-	    try
-	    {
-		wstring ws = IceUtil::stringToWstring(badUTF8[i]);
-		wcerr << L"Unexpected: " << ws << endl;
-		test(false);
-	    }
-	    catch(const IceUtil::IllegalConversionException&)
-	    {}
-	}
-
-	// TODO: need test for bad UTF-32 strings
+        cout << "testing error handling... ";
+
+        // From http://stackoverflow.com/questions/1301402/example-invalid-utf8-string
+
+        string badUTF8[] = {
+            "\xc3\x28",
+            "\xa0\xa1",
+            "\xe2\x28\xa1",
+            "\xe2\x82\x28",
+            "\xf0\x28\x8c\xbc",
+            "\xf0\x90\x28\xbc",
+            "\xf0\x28\x8c\x28",
+            "\xf8\xa1\xa1\xa1\xa1",
+            "\xfc\xa1\xa1\xa1\xa1\xa1",
+            ""
+        };
+
+        for(size_t i = 0; badUTF8[i] != ""; ++i)
+        {
+            try
+            {
+                wstring ws = IceUtil::stringToWstring(badUTF8[i]);
+                wcerr << L"Unexpected: " << ws << endl;
+                test(false);
+            }
+            catch(const IceUtil::IllegalConversionException&)
+            {}
+        }
+
+        // TODO: need test for bad UTF-32 strings
 #ifdef _WIN32
 
-	// Note: for an unknown reason, the conversion works without
-	// the extra letter (x below) when using codecvt_utf8_utf16.
-
-	wstring badWstring[] = {
-	    wstring(1, wchar_t(0xD800)) + L"x",
-	    wstring(2, wchar_t(0xDB7F)),
-	    L""
-	};
-
-	for(size_t i = 0; badWstring[i] != L""; ++i)
-	{
-	    try
-	    {
-		string s = IceUtil::wstringToString(badWstring[i]);
-		test(false);
-	    }
-	    catch(const IceUtil::IllegalConversionException&)
-	    {}
-	}
+        // Note: for an unknown reason, the conversion works without
+        // the extra letter (x below) when using codecvt_utf8_utf16.
+
+        wstring badWstring[] = {
+            wstring(1, wchar_t(0xD800)) + L"x",
+            wstring(2, wchar_t(0xDB7F)),
+            L""
+        };
+
+        for(size_t i = 0; badWstring[i] != L""; ++i)
+        {
+            try
+            {
+                string s = IceUtil::wstringToString(badWstring[i]);
+                test(false);
+            }
+            catch(const IceUtil::IllegalConversionException&)
+            {}
+        }
 #endif
 
-	cout << "ok" << endl;
+        cout << "ok" << endl;
 
     }
     return EXIT_SUCCESS;