summaryrefslogtreecommitdiff
path: root/cpp/src/IceUtil/Unicode.cpp
diff options
context:
space:
mode:
authorMarc Laukien <marc@zeroc.com>2002-01-17 13:02:41 +0000
committerMarc Laukien <marc@zeroc.com>2002-01-17 13:02:41 +0000
commit41200f6bb0a1d22941279853dd00920e9aee76fa (patch)
tree760d4faf0552844cc9247694a484e4609c498504 /cpp/src/IceUtil/Unicode.cpp
parentadding --impl option (diff)
downloadice-41200f6bb0a1d22941279853dd00920e9aee76fa.tar.bz2
ice-41200f6bb0a1d22941279853dd00920e9aee76fa.tar.xz
ice-41200f6bb0a1d22941279853dd00920e9aee76fa.zip
fixed Unicode.cpp
Diffstat (limited to 'cpp/src/IceUtil/Unicode.cpp')
-rw-r--r--cpp/src/IceUtil/Unicode.cpp114
1 files changed, 106 insertions, 8 deletions
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp
index ba298e3b770..cf996046645 100644
--- a/cpp/src/IceUtil/Unicode.cpp
+++ b/cpp/src/IceUtil/Unicode.cpp
@@ -15,19 +15,117 @@ using namespace std;
string
IceUtil::wstringToString(const wstring& str)
{
- char* s = new char[str.size() + 1];
- wcstombs(s, str.c_str(), str.size() + 1);
- string result(s);
- delete [] s;
+ string result;
+ result.reserve(str.length() * SIZEOF_WCHAR_T);
+
+ for (unsigned int i = 0; i < str.length(); ++i)
+ {
+ wchar_t wc = str[i];
+
+ if (wc < 0x80)
+ {
+ result += static_cast<char>(wc);
+ }
+ else if (wc < 0x800)
+ {
+ result += 0xc0 | (wc>>6);
+ result += 0x80 | (wc & 0x3f);
+ }
+ else if (wc < 0x10000)
+ {
+ result += 0xe0 | (wc>>12);
+ result += 0x80 | ((wc>>6) & 0x3f);
+ result += 0x80 | (wc & 0x3f);
+ }
+ else if (wc < 0x10FFFF)
+ {
+ result += 0xf0 | (wc>>18);
+ result += 0x80 | ((wc>>12) & 0x3f);
+ result += 0x80 | ((wc>>6) & 0x3f);
+ result += 0x80 | (wc & 0x3f);
+ }
+ else
+ {
+ return result; // Error, not encodable.
+ }
+ }
+
return result;
}
wstring
IceUtil::stringToWstring(const string& str)
{
- wchar_t* s = new wchar_t[str.size() + 1];
- mbstowcs(s, str.c_str(), str.size() + 1);
- wstring result(s);
- delete [] s;
+ wstring result;
+ result.reserve(str.length());
+
+ unsigned int len;
+ for (unsigned int i = 0; i < str.length(); i += len)
+ {
+ unsigned char c = str[i];
+ wchar_t wc;
+
+ if (c < 0x80)
+ {
+ wc = c;
+ len = 1;
+ }
+ else if (c < 0xc0) // Lead byte must not be 10xxxxxx
+ {
+ return result; // Error, not encodable.
+ }
+ else if(c < 0xe0) // 110xxxxx
+ {
+ wc = c & 0x1f;
+ len = 2;
+ }
+ else if(c < 0xf0) // 1110xxxx
+ {
+ wc = c & 0xf;
+ len = 3;
+ }
+#if SIZEOF_WCHAR_T >= 4
+ else if(c < 0xf8) // 11110xxx
+ {
+ wc = c & 7;
+ len = 4;
+ }
+ else if (c < 0xfc) // 111110xx
+ {
+ // Length 5 and 6 is declared invalid in Unicode 3.1 and ISO 10646:2001.
+ wc = c & 3;
+ len = 5;
+ }
+ else if (c < 0xfe) // 1111110x
+ {
+ // Length 5 and 6 is declared invalid in Unicode 3.1 and ISO 10646:2001.
+ wc = c & 1;
+ len = 6;
+ }
+#endif
+ else
+ {
+ return result; // Error, not encodable.
+ }
+
+ if (i + len - 1 < str.length())
+ {
+ for (unsigned int j = 1; j < len - 1; ++j)
+ {
+ if ((str[i + j] & 0xc0) != 0x80) // All other bytes must be 10xxxxxx
+ {
+ return result; // Error, not encodable.
+ }
+
+ wc <<= 6;
+ wc |= str[i + j] & 0x3f;
+ }
+ }
+ else
+ {
+ return result; // Error, not encodable.
+ }
+ }
+
return result;
}