1 files changed, 176 insertions, 79 deletions
diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp
index d93c457bf9e..5df28c48df2 100644
--- a/cpp/src/slice2cpp/Gen.cpp
+++ b/cpp/src/slice2cpp/Gen.cpp
@@ -32,6 +32,45 @@ namespace
 {
 
 string
+u32CodePoint(unsigned int value)
+{
+    ostringstream s;
+    s << "\\U";
+    s << hex;
+    s.width(8);
+    s.fill('0');
+    s << value;
+    return s.str();
+}
+
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+    vector<unsigned int> u32buffer;
+    IceUtilInternal::ConversionResult result = convertUTF8ToUTF32(u8buffer, u32buffer, IceUtil::lenientConversion);
+    switch(result)
+    {
+        case conversionOK:
+            break;
+        case sourceExhausted:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+        case sourceIllegal:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+        default:
+        {
+            assert(0);
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+        }
+    }
+
+    for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c)
+    {
+        out << u32CodePoint(*c);
+    }
+}
+
+string
 getDeprecateSymbol(const ContainedPtr& p1, const ContainedPtr& p2)
 {
     string deprecateMetadata, deprecateSymbol;
@@ -74,115 +113,173 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
 
             if((useWstring & TypeContextUseWstring) || findMetaData(metaData) == "wstring")
             {
-                out << 'L';
-            }
-            out << "\"";                                    // Opening "
+                //
+                // Wide strings
+                //
+                vector<unsigned char> u8buffer;                  // Buffer to convert multibyte characters
 
-            for(size_t i = 0; i < value.size();)
-            {
-                if(charSet.find(value[i]) == charSet.end())
+                out << "L\"";
+                for(size_t i = 0; i < value.size();)
+                {
+                    if(charSet.find(value[i]) == charSet.end())
+                    {
+                        if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+                        {
+                            //
+                            // Print as unicode if not in basic source character set
+                            //
+                            out << u32CodePoint(static_cast<unsigned int>(value[i]));
+                        }
+                        else
+                        {
+                            u8buffer.push_back(value[i]);
+                        }
+                    }
+                    else
+                    {
+                        //
+                        // Write any pedding characters in the utf8 buffer
+                        //
+                        if(!u8buffer.empty())
+                        {
+                            writeU8Buffer(u8buffer, out);
+                            u8buffer.clear();
+                        }
+                        
+                        switch(value[i])
+                        {
+                            case '"':
+                            {
+                                out << "\\";
+                                break;
+                            }
+                        }
+                        
+                        out << value[i];                              // Print normally if in basic source character set
+                    }
+                    i++;
+                    
+                }
+                
+                //
+                // Write any pedding characters in the utf8 buffer
+                //
+                if(!u8buffer.empty())
                 {
-                    unsigned char uc = value[i];                  // char may be signed, so make it positive
-                    ostringstream s;
-                    s << "\\";                              // Print as octal if not in basic source character set
-                    s.width(3);
-                    s.fill('0');
-                    s << oct;
-                    s << static_cast<unsigned>(uc);
-                    out << s.str();
+                    writeU8Buffer(u8buffer, out);
+                    u8buffer.clear();
                 }
-                else
+                out << "\"";
+            }
+            else // narrow strings
+            {
+                out << "\"";                                    // Opening "
+
+                for(size_t i = 0; i < value.size();)
                 {
-                    switch(value[i])
+                    if(charSet.find(value[i]) == charSet.end())
+                    {
+                        unsigned char uc = value[i];                  // char may be signed, so make it positive
+                        ostringstream s;
+                        s << "\\";                                    // Print as octal if not in basic source character set
+                        s.width(3);
+                        s.fill('0');
+                        s << oct;
+                        s << static_cast<unsigned>(uc);
+                        out << s.str();
+                    }
+                    else
                     {
-                        case '\\':
+                        switch(value[i])
                         {
-                            string s = "\\";
-                            size_t j = i + 1;
-                            for(; j < value.size(); ++j)
+                            case '\\':
                             {
-                                if(value[j] != '\\')
+                                string s = "\\";
+                                size_t j = i + 1;
+                                for(; j < value.size(); ++j)
                                 {
-                                    break;
+                                    if(value[j] != '\\')
+                                    {
+                                        break;
+                                    }
+                                    s += "\\";
                                 }
-                                s += "\\";
-                            }
 
-                            //
-                            // An even number of slash \ will escape the backslash and
-                            // the codepoint will be interpreted as its charaters
-                            //
-                            // \\U00000041  - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
-                            // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
-                            //
-                            if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
-                            {
                                 //
-                                // Convert codepoint to UTF8 bytes and write the escaped bytes
+                                // An even number of slash \ will escape the backslash and
+                                // the codepoint will be interpreted as its charaters
+                                //
+                                // \\U00000041  - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+                                // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
                                 //
-                                out << s.substr(0, s.size() - 1);
+                                if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+                                {
+                                    //
+                                    // Convert codepoint to UTF8 bytes and write the escaped bytes
+                                    //
+                                    out << s.substr(0, s.size() - 1);
 
-                                size_t sz = value[j] == 'U' ? 8 : 4;
-                                string codepoint = value.substr(j + 1, sz);
-                                assert(codepoint.size() ==  sz);
+                                    size_t sz = value[j] == 'U' ? 8 : 4;
+                                    string codepoint = value.substr(j + 1, sz);
+                                    assert(codepoint.size() ==  sz);
 
-                                IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                                    IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
 
 
-                                vector<unsigned int> u32buffer;
-                                u32buffer.push_back(static_cast<unsigned int>(v));
+                                    vector<unsigned int> u32buffer;
+                                    u32buffer.push_back(static_cast<unsigned int>(v));
 
-                                vector<unsigned char> u8buffer;
+                                    vector<unsigned char> u8buffer;
 
-                                IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
-                                switch(result)
-                                {
-                                    case conversionOK:
-                                        break;
-                                    case sourceExhausted:
-                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
-                                    case sourceIllegal:
-                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
-                                    default:
+                                    IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+                                    switch(result)
                                     {
-                                        assert(0);
-                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+                                        case conversionOK:
+                                            break;
+                                        case sourceExhausted:
+                                            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+                                        case sourceIllegal:
+                                            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+                                        default:
+                                        {
+                                            assert(0);
+                                            throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+                                        }
                                     }
-                                }
 
-                                ostringstream s;
-                                for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+                                    ostringstream s;
+                                    for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+                                    {
+                                        s << "\\";
+                                        s.fill('0');
+                                        s.width(3);
+                                        s << oct;
+                                        s << static_cast<unsigned int>(*q);
+                                    }
+                                    out << s.str();
+
+                                    i = j + 1 + sz;
+                                }
+                                else
                                 {
-                                    s << "\\";
-                                    s.fill('0');
-                                    s.width(3);
-                                    s << oct;
-                                    s << static_cast<unsigned int>(*q);
+                                    out << s;
+                                    i = j;
                                 }
-                                out << s.str();
-
-                                i = j + 1 + sz;
+                                continue;
                             }
-                            else
+                            case '"':
                             {
-                                out << s;
-                                i = j;
+                                out << "\\";
+                                break;
                             }
-                            continue;
-                        }
-                        case '"':
-                        {
-                            out << "\\";
-                            break;
                         }
+                        
+                        out << value[i];                              // Print normally if in basic source character set
                     }
-                    
-                    out << value[i];                              // Print normally if in basic source character set
+                    ++i;
                 }
-                ++i;
+                out << "\"";                                    // Closing "
             }
-
-            out << "\"";                                    // Closing "
         }
         else if(bp && bp->kind() == Builtin::KindLong)
         {