From 2bd402833bfdb54c1940dd0038be8af05d6f5e6f Mon Sep 17 00:00:00 2001
From: Jose <jose@zeroc.com>
Date: Tue, 8 Mar 2016 13:46:55 +0100
Subject: ICE-6991 - Add support for unicode escape sequences

---
 cpp/src/IceUtil/Unicode.cpp     |  38 +++
 cpp/src/IceUtil/Unicode.h       |   9 +
 cpp/src/Slice/PythonUtil.cpp    | 177 +++++++++----
 cpp/src/Slice/RubyUtil.cpp      | 175 +++++++++----
 cpp/src/Slice/Scanner.cpp       | 543 +++++++++++++++++++++++++---------------
 cpp/src/Slice/Scanner.l         | 410 +++++++++++++++++++-----------
 cpp/src/slice2cpp/Gen.cpp       |   2 +-
 cpp/src/slice2cs/Gen.cpp        | 152 +++++++++--
 cpp/src/slice2cs/Makefile       |   2 +-
 cpp/src/slice2cs/Makefile.mak   |   2 +-
 cpp/src/slice2java/Gen.cpp      | 194 +++++++++++---
 cpp/src/slice2java/Makefile     |   2 +-
 cpp/src/slice2java/Makefile.mak |   2 +-
 cpp/src/slice2js/Gen.cpp        | 145 +++++++++--
 cpp/src/slice2js/Makefile       |   2 +-
 cpp/src/slice2js/Makefile.mak   |   2 +-
 cpp/src/slice2php/Main.cpp      |  93 ++++++-
 cpp/src/slice2php/Makefile      |   2 +-
 cpp/src/slice2php/Makefile.mak  |   2 +-
 19 files changed, 1421 insertions(+), 533 deletions(-)

(limited to 'cpp/src')
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp
index cae3476e277..7bad1d67c17 100644
--- a/cpp/src/IceUtil/Unicode.cpp
+++ b/cpp/src/IceUtil/Unicode.cpp
@@ -128,4 +128,42 @@ IceUtilInternal::convertUTF8ToUTFWstring(const Byte*& sourceStart, const Byte* s
     return result;
 }
 
+ConversionResult
+IceUtilInternal::convertUTF8ToUTF16(const vector<unsigned char>& source, vector<unsigned short>& target, ConversionFlags flags)
+{
+    target.resize(source.size());
+    const unsigned char* sourceStart = &source[0];
+    const unsigned char* sourceEnd = &source[0] + source.size();
+    
+    unsigned short* targetStart = &target[0]; 
+    unsigned short* targetEnd = &target[0] + target.size();
+    ConversionResult result = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+    
+    if(result == conversionOK)
+    {
+        target.resize(targetStart - &target[0]);
+    }
+    return result;
+}
+
+ConversionResult
+IceUtilInternal::convertUTF32ToUTF8(const vector<unsigned int>& source, vector<unsigned char>& target, ConversionFlags flags)
+{
+    target.resize(source.size() * 4);
+
+    const unsigned int* sourceStart = &source[0];
+    const unsigned int* sourceEnd = &source[0] + source.size();
+    
+    unsigned char* targetStart = &target[0]; 
+    unsigned char* targetEnd = &target[0] + target.size();
+    ConversionResult result = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+    
+    if(result == conversionOK)
+    {
+        target.resize(targetStart - &target[0]);
+    }
+    return result;
+}
+
+
 
diff --git a/cpp/src/IceUtil/Unicode.h b/cpp/src/IceUtil/Unicode.h
index 00333ce8a44..2c96d6c6448 100644
--- a/cpp/src/IceUtil/Unicode.h
+++ b/cpp/src/IceUtil/Unicode.h
@@ -44,6 +44,15 @@ ConversionResult
 convertUTF8ToUTFWstring(const IceUtil::Byte*& sourceStart, const IceUtil::Byte* sourceEnd, 
                         std::wstring& target, IceUtil::ConversionFlags flags);
 
+
+ICE_UTIL_API ConversionResult
+convertUTF8ToUTF16(const std::vector<unsigned char>&, std::vector<unsigned short>&,
+                   IceUtil::ConversionFlags);
+
+ICE_UTIL_API ConversionResult
+convertUTF32ToUTF8(const std::vector<unsigned int>&, std::vector<unsigned char>&,
+                   IceUtil::ConversionFlags);
+
 }
 
 #endif
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp
index 220e521d85d..92f657e8221 100644
--- a/cpp/src/Slice/PythonUtil.cpp
+++ b/cpp/src/Slice/PythonUtil.cpp
@@ -13,6 +13,7 @@
 #include <IceUtil/IceUtil.h>
 #include <IceUtil/StringUtil.h>
 #include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
 #include <climits>
 #include <iterator>
 
@@ -1879,68 +1880,138 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
 
                 _out << "\"";                                       // Opening "
 
-                for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+                for(size_t i = 0; i < value.size();)
                 {
-                    switch(*c)
+                    char c = value[i];
+                    switch(c)
                     {
-                    case '"':
-                    {
-                        _out << "\\\"";
-                        break;
-                    }
-                    case '\\':
-                    {
-                        _out << "\\\\";
-                        break;
-                    }
-                    case '\r':
-                    {
-                        _out << "\\r";
-                        break;
-                    }
-                    case '\n':
-                    {
-                        _out << "\\n";
-                        break;
-                    }
-                    case '\t':
-                    {
-                        _out << "\\t";
-                        break;
-                    }
-                    case '\b':
-                    {
-                        _out << "\\b";
-                        break;
-                    }
-                    case '\f':
-                    {
-                        _out << "\\f";
-                        break;
-                    }
-                    default:
-                    {
-                        if(charSet.find(*c) == charSet.end())
+                        case '"':
                         {
-                            unsigned char uc = *c;              // Char may be signed, so make it positive.
-                            stringstream s;
-                            s << "\\";                          // Print as octal if not in basic source character set.
-                            s.flags(ios_base::oct);
-                            s.width(3);
-                            s.fill('0');
-                            s << static_cast<unsigned>(uc);
-                            _out << s.str();
+                            _out << "\\\"";
+                            break;
                         }
-                        else
+                        case '\\':
                         {
-                            _out << *c;                         // Print normally if in basic source character set.
+                            string s = "\\";
+                            size_t j = i + 1;
+                            for(; j < value.size(); ++j)
+                            {
+                                if(value[j] != '\\')
+                                {
+                                    break;
+                                }
+                                s += "\\";
+                            }
+                            
+                            //
+                            // An even number of slash \ will escape the backslash and
+                            // the codepoint will be interpreted as its charaters
+                            //
+                            // \\u00000041  - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+                            // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+                            //
+                            if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+                            {
+                                //
+                                // Convert codepoint to UTF8 bytes and write the escaped bytes
+                                //
+                                _out << s.substr(0, s.size() - 1);
+                                
+                                size_t sz = value[j] == 'U' ? 8 : 4;
+                                string codepoint = value.substr(j + 1, sz);
+                                assert(codepoint.size() ==  sz);
+
+                                IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                                
+                                vector<unsigned int> u32buffer;
+                                u32buffer.push_back(v);
+                                
+                                vector<unsigned char> u8buffer;
+                                IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+                                switch(result)
+                                {
+                                    case conversionOK:
+                                        break;
+                                    case sourceExhausted:
+                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+                                    case sourceIllegal:
+                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+                                    default:
+                                    {
+                                        assert(0);
+                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+                                    }
+                                }
+                                
+                                ostringstream s;
+                                for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+                                {
+                                    s << "\\";
+                                    s.fill('0');
+                                    s.width(3);
+                                    s << oct;
+                                    s << static_cast<unsigned int>(*q);
+                                }
+                                _out << s.str();
+                                
+                                i = j + 1 + sz;
+                            }
+                            else
+                            {
+                                _out << s;
+                                i = j;
+                            }
+                            continue;
+                        }
+                        case '\r':
+                        {
+                            _out << "\\r";
+                            break;
+                        }
+                        case '\n':
+                        {
+                            _out << "\\n";
+                            break;
+                        }
+                        case '\t':
+                        {
+                            _out << "\\t";
+                            break;
+                        }
+                        case '\b':
+                        {
+                            _out << "\\b";
+                            break;
+                        }
+                        case '\f':
+                        {
+                            _out << "\\f";
+                            break;
+                        }
+                        default:
+                        {
+                            if(charSet.find(c) == charSet.end())
+                            {
+                                unsigned char uc = c;               // Char may be signed, so make it positive.
+                                stringstream s;
+                                s << "\\";                          // Print as octal if not in basic source character set.
+                                s.flags(ios_base::oct);
+                                s.width(3);
+                                s.fill('0');
+                                s << static_cast<unsigned>(uc);
+                                _out << s.str();
+                            }
+                            else
+                            {
+                                _out << c;                          // Print normally if in basic source character set.
+                            }
+                            break;
                         }
-                        break;
-                    }
                     }
+                    ++i;
                 }
 
-                _out << "\"";                                   // Closing "
+                _out << "\"";                                       // Closing "
                 break;
             }
             case Slice::Builtin::KindObject:
diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp
index 3e6fe7557ac..fbed4764bbe 100644
--- a/cpp/src/Slice/RubyUtil.cpp
+++ b/cpp/src/Slice/RubyUtil.cpp
@@ -12,6 +12,7 @@
 #include <Slice/Util.h>
 #include <IceUtil/Functional.h>
 #include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
 #include <iterator>
 
 using namespace std;
@@ -1470,65 +1471,134 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr
 
                 _out << "\"";                                      // Opening "
 
-                for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+                for(size_t i = 0; i < value.size();)
                 {
-                    switch(*c)
+                    char c = value[i];
+                    switch(c)
                     {
-                    case '"':
-                    {
-                        _out << "\\\"";
-                        break;
-                    }
-                    case '\\':
-                    {
-                        _out << "\\\\";
-                        break;
-                    }
-                    case '\r':
-                    {
-                        _out << "\\r";
-                        break;
-                    }
-                    case '\n':
-                    {
-                        _out << "\\n";
-                        break;
-                    }
-                    case '\t':
-                    {
-                        _out << "\\t";
-                        break;
-                    }
-                    case '\b':
-                    {
-                        _out << "\\b";
-                        break;
-                    }
-                    case '\f':
-                    {
-                        _out << "\\f";
-                        break;
-                    }
-                    default:
-                    {
-                        if(charSet.find(*c) == charSet.end())
+                        case '"':
                         {
-                            unsigned char uc = *c;              // Char may be signed, so make it positive.
-                            stringstream s;
-                            s << "\\";                          // Print as octal if not in basic source character set.
-                            s.flags(ios_base::oct);
-                            s.width(3);
-                            s.fill('0');
-                            s << static_cast<unsigned>(uc);
-                            _out << s.str();
+                            _out << "\\\"";
+                            break;
                         }
-                        else
+                        case '\\':
                         {
-                            _out << *c;                         // Print normally if in basic source character set.
+                            string s = "\\";
+                            size_t j = i + 1;
+                            for(; j < value.size(); ++j)
+                            {
+                                if(value[j] != '\\')
+                                {
+                                    break;
+                                }
+                                s += "\\";
+                            }
+                            
+                            //
+                            // An even number of slash \ will escape the backslash and
+                            // the codepoint will be interpreted as its charaters
+                            //
+                            // \\u00000041  - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+                            // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+                            //
+                            if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+                            {
+                                //
+                                // Convert codepoint to UTF8 bytes and write the escaped bytes
+                                //
+                                _out << s.substr(0, s.size() - 1);
+                                
+                                size_t sz = value[j] == 'U' ? 8 : 4;
+                                string codepoint = value.substr(j + 1, sz);
+                                assert(codepoint.size() == sz);
+                                IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                                
+                                vector<unsigned int> u32buffer;
+                                u32buffer.push_back(v);
+                                
+                                vector<unsigned char> u8buffer;
+                                IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+                                switch(result)
+                                {
+                                    case conversionOK:
+                                        break;
+                                    case sourceExhausted:
+                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+                                    case sourceIllegal:
+                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+                                    default:
+                                    {
+                                        assert(0);
+                                        throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+                                    }
+                                }
+                                
+                                ostringstream s;
+                                for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+                                {
+                                    s << "\\";
+                                    s.fill('0');
+                                    s.width(3);
+                                    s << oct;
+                                    s << static_cast<unsigned int>(*q);
+                                }
+                                _out << s.str();
+                                
+                                i = j + 1 + sz;
+                            }
+                            else
+                            {
+                                _out << s;
+                                i = j;
+                            }
+                            continue;
+                        }
+                        case '\r':
+                        {
+                            _out << "\\r";
+                            break;
+                        }
+                        case '\n':
+                        {
+                            _out << "\\n";
+                            break;
+                        }
+                        case '\t':
+                        {
+                            _out << "\\t";
+                            break;
+                        }
+                        case '\b':
+                        {
+                            _out << "\\b";
+                            break;
+                        }
+                        case '\f':
+                        {
+                            _out << "\\f";
+                            break;
+                        }
+                        default:
+                        {
+                            if(charSet.find(c) == charSet.end())
+                            {
+                                unsigned char uc = c;              // Char may be signed, so make it positive.
+                                stringstream s;
+                                s << "\\";                         // Print as octal if not in basic source character set.
+                                s.flags(ios_base::oct);
+                                s.width(3);
+                                s.fill('0');
+                                s << static_cast<unsigned>(uc);
+                                _out << s.str();
+                            }
+                            else
+                            {
+                                _out << c;                         // Print normally if in basic source character set.
+                            }
+                            break;
                         }
-                        break;
-                    }
                     }
+                    ++i;
                 }
 
                 _out << "\"";                                   // Closing "
@@ -1635,6 +1705,7 @@ Slice::Ruby::CodeVisitor::collectExceptionMembers(const ExceptionPtr& p, MemberI
 void
 Slice::Ruby::generate(const UnitPtr& un, bool all, bool checksum, const vector<string>& includePaths, Output& out)
 {
+    out <<"# encoding: utf-8";
     out << nl << "require 'Ice'";
 
     if(!all)
diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp
index 0a4fdad538d..fc2fc290dac 100644
--- a/cpp/src/Slice/Scanner.cpp
+++ b/cpp/src/Slice/Scanner.cpp
@@ -29,7 +29,7 @@
 #define FLEX_SCANNER
 #define YY_FLEX_MAJOR_VERSION 2
 #define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
+#define YY_FLEX_SUBMINOR_VERSION 39
 #if YY_FLEX_SUBMINOR_VERSION > 0
 #define FLEX_BETA
 #endif
@@ -74,7 +74,6 @@ typedef int flex_int32_t;
 typedef unsigned char flex_uint8_t; 
 typedef unsigned short int flex_uint16_t;
 typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
 
 /* Limits of integral types. */
 #ifndef INT8_MIN
@@ -105,6 +104,8 @@ typedef unsigned int flex_uint32_t;
 #define UINT32_MAX             (4294967295U)
 #endif
 
+#endif /* ! C99 */
+
 #endif /* ! FLEXINT_H */
 
 #ifdef __cplusplus
@@ -161,7 +162,15 @@ typedef unsigned int flex_uint32_t;
 
 /* Size of default input buffer. */
 #ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
 #define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
 #endif
 
 /* The state buf must be large enough to hold one state per character in the main buffer.
@@ -173,7 +182,12 @@ typedef unsigned int flex_uint32_t;
 typedef struct yy_buffer_state *YY_BUFFER_STATE;
 #endif
 
-extern int slice_leng;
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t slice_leng;
 
 extern FILE *slice_in, *slice_out;
 
@@ -182,6 +196,7 @@ extern FILE *slice_in, *slice_out;
 #define EOB_ACT_LAST_MATCH 2
 
     #define YY_LESS_LINENO(n)
+    #define YY_LINENO_REWIND_TO(ptr)
     
 /* Return all but the first "n" matched characters back to the input stream. */
 #define yyless(n) \
@@ -199,11 +214,6 @@ extern FILE *slice_in, *slice_out;
 
 #define unput(c) yyunput( c, (yytext_ptr)  )
 
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
 #ifndef YY_STRUCT_YY_BUFFER_STATE
 #define YY_STRUCT_YY_BUFFER_STATE
 struct yy_buffer_state
@@ -221,7 +231,7 @@ struct yy_buffer_state
 	/* Number of characters read into yy_ch_buf, not including EOB
 	 * characters.
 	 */
-	int yy_n_chars;
+	yy_size_t yy_n_chars;
 
 	/* Whether we "own" the buffer - i.e., we know we created it,
 	 * and can realloc() it to grow it, and should free() it to
@@ -291,8 +301,8 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
 
 /* yy_hold_char holds the character lost when slice_text is formed. */
 static char yy_hold_char;
-static int yy_n_chars;		/* number of characters read into yy_ch_buf */
-int slice_leng;
+static yy_size_t yy_n_chars;		/* number of characters read into yy_ch_buf */
+yy_size_t slice_leng;
 
 /* Points to current character in buffer. */
 static char *yy_c_buf_p = (char *) 0;
@@ -320,7 +330,7 @@ static void slice__init_buffer (YY_BUFFER_STATE b,FILE *file  );
 
 YY_BUFFER_STATE slice__scan_buffer (char *base,yy_size_t size  );
 YY_BUFFER_STATE slice__scan_string (yyconst char *yy_str  );
-YY_BUFFER_STATE slice__scan_bytes (yyconst char *bytes,int len  );
+YY_BUFFER_STATE slice__scan_bytes (yyconst char *bytes,yy_size_t len  );
 
 void *slice_alloc (yy_size_t  );
 void *slice_realloc (void *,yy_size_t  );
@@ -352,7 +362,7 @@ void slice_free (void *  );
 
 /* Begin user sect3 */
 
-#define slice_wrap(n) 1
+#define slice_wrap() 1
 #define YY_SKIP_YYWRAP
 
 typedef unsigned char YY_CHAR;
@@ -558,6 +568,8 @@ char *slice_text;
 #include <Slice/Grammar.h>
 #include <IceUtil/InputUtil.h>
 
+#include <iomanip>
+
 #include <stdlib.h>
 #include <math.h>
 
@@ -620,7 +632,7 @@ int checkKeyword(string&);
 
 
 
-#line 623 "lex.yy.c"
+#line 635 "lex.yy.c"
 
 #define INITIAL 0
 #define BOMSCAN 1
@@ -661,7 +673,7 @@ FILE *slice_get_out (void );
 
 void slice_set_out  (FILE * out_str  );
 
-int slice_get_leng (void );
+yy_size_t slice_get_leng (void );
 
 char *slice_get_text (void );
 
@@ -703,7 +715,12 @@ static int input (void );
 
 /* Amount of stuff to slurp up with each read. */
 #ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
 #define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
 #endif
 
 /* Copy whatever the last rule matched to the standard output. */
@@ -722,7 +739,7 @@ static int input (void );
 	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
 		{ \
 		int c = '*'; \
-		unsigned n; \
+		size_t n; \
 		for ( n = 0; n < max_size && \
 			     (c = getc( slice_in )) != EOF && c != '\n'; ++n ) \
 			buf[n] = (char) c; \
@@ -807,11 +824,6 @@ YY_DECL
 	register char *yy_cp, *yy_bp;
 	register int yy_act;
     
-#line 92 "Scanner.l"
-
-
-#line 813 "lex.yy.c"
-
 	if ( !(yy_init) )
 		{
 		(yy_init) = 1;
@@ -838,6 +850,12 @@ YY_DECL
 		slice__load_buffer_state( );
 		}
 
+	{
+#line 94 "Scanner.l"
+
+
+#line 857 "lex.yy.c"
+
 	while ( 1 )		/* loops until end-of-file is reached */
 		{
 		yy_cp = (yy_c_buf_p);
@@ -855,7 +873,7 @@ YY_DECL
 yy_match:
 		do
 			{
-			register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
+			register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
 			if ( yy_accept[yy_current_state] )
 				{
 				(yy_last_accepting_state) = yy_current_state;
@@ -895,7 +913,7 @@ case 1:
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up slice_text again */
 YY_RULE_SETUP
-#line 94 "Scanner.l"
+#line 96 "Scanner.l"
 {
     if(unit->scanPosition(slice_text))
     {
@@ -906,10 +924,11 @@ YY_RULE_SETUP
 case 2:
 /* rule 2 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
+YY_LINENO_REWIND_TO(yy_cp - 1);
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up slice_text again */
 YY_RULE_SETUP
-#line 101 "Scanner.l"
+#line 103 "Scanner.l"
 {
     if(unit->scanPosition(slice_text))
     {
@@ -922,7 +941,7 @@ case 3:
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up slice_text again */
 YY_RULE_SETUP
-#line 108 "Scanner.l"
+#line 110 "Scanner.l"
 {
     if(unit->scanPosition(slice_text))
     {
@@ -933,10 +952,11 @@ YY_RULE_SETUP
 case 4:
 /* rule 4 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
+YY_LINENO_REWIND_TO(yy_cp - 1);
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up slice_text again */
 YY_RULE_SETUP
-#line 115 "Scanner.l"
+#line 117 "Scanner.l"
 {
     if(unit->scanPosition(slice_text))
     {
@@ -946,7 +966,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 122 "Scanner.l"
+#line 124 "Scanner.l"
 {
     // C++-style comment
     BEGIN(MAINSCAN);
@@ -964,7 +984,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 6:
 YY_RULE_SETUP
-#line 137 "Scanner.l"
+#line 139 "Scanner.l"
 {
     // C-style comment
     BEGIN(MAINSCAN);
@@ -1008,7 +1028,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 7:
 YY_RULE_SETUP
-#line 178 "Scanner.l"
+#line 180 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     return ICE_SCOPE_DELIMITER;
@@ -1016,7 +1036,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 8:
 YY_RULE_SETUP
-#line 183 "Scanner.l"
+#line 185 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     return ICE_METADATA_OPEN;
@@ -1024,7 +1044,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 9:
 YY_RULE_SETUP
-#line 188 "Scanner.l"
+#line 190 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     return ICE_METADATA_CLOSE;
@@ -1032,7 +1052,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 10:
 YY_RULE_SETUP
-#line 193 "Scanner.l"
+#line 195 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     return ICE_GLOBAL_METADATA_OPEN;
@@ -1040,7 +1060,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 11:
 YY_RULE_SETUP
-#line 198 "Scanner.l"
+#line 200 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     return ICE_GLOBAL_METADATA_CLOSE;
@@ -1049,7 +1069,7 @@ YY_RULE_SETUP
 case 12:
 /* rule 12 can match eol */
 YY_RULE_SETUP
-#line 203 "Scanner.l"
+#line 205 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     StringTokPtr ident = new StringTok;
@@ -1077,7 +1097,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 13:
 YY_RULE_SETUP
-#line 228 "Scanner.l"
+#line 230 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     StringTokPtr ident = new StringTok;
@@ -1088,163 +1108,279 @@ YY_RULE_SETUP
 	YY_BREAK
 case 14:
 YY_RULE_SETUP
-#line 236 "Scanner.l"
+#line 238 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     StringTokPtr str = new StringTok;
     str->literal = "\"";
     while(true)
     {
-	char c = static_cast<char>(yyinput());
+        char c = static_cast<char>(yyinput());
         str->literal += c;
-	if(c == '"')
-	{
-	    break;
-	}
-	else if(c == EOF)
-	{
-	    unit->error("EOF in string");
-	    break;
-	}
-	else if(c == '\n')
-	{
-	    unit->error("newline in string");
-	}
-	else if(c == '\\')
-	{
-	    char next = static_cast<char>(yyinput());
+        if(c == '"')
+        {
+            break;
+        }
+        else if(c == EOF)
+        {
+            unit->error("EOF in string");
+            break;
+        }
+        else if(c == '\n')
+        {
+            unit->error("newline in string");
+        }
+        else if(c == '\\')
+        {
+            char next = static_cast<char>(yyinput());
             str->literal += next;
-	    switch(next)
-	    {
-		case '\\':
-		case '"':
-		case '\'':
-		{
-		    str->v += next;
-		    break;
-		}
-	    
-		case 'n':
-		{
-		    str->v += '\n';
-		    break;
-		}
-	    
-		case 'r':
-		{
-		    str->v += '\r';
-		    break;
-		}
-
-		case 't':
-		{
-		    str->v += '\t';
-		    break;
-		}
-	    
-		case 'v':
-		{
-		    str->v += '\v';
-		    break;
-		}
-	    
-		case 'f':
-		{
-		    str->v += '\f';
-		    break;
-		}
-
-		case 'a':
-		{
-		    str->v += '\a';
-		    break;
-		}
-
-		case 'b':
-		{
-		    str->v += '\b';
-		    break;
-		}
-
-		case '?':
-		{
-		    str->v += '\?';
-		    break;
-		}
-
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		{
-		    static string octalDigits = "01234567";
-		    unsigned short us = next - '0';
-		    if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
-		    {
+            switch(next)
+            {
+                case '\\':
+                {
+                    str->v += '\\';
+                    str->v += '\\';
+                    break;
+                }
+                case '"':
+                case '\'':
+                {
+                    str->v += next;
+                    break;
+                }
+            
+                case 'n':
+                {
+                    str->v += '\n';
+                    break;
+                }
+            
+                case 'r':
+                {
+                    str->v += '\r';
+                    break;
+                }
+
+                case 't':
+                {
+                    str->v += '\t';
+                    break;
+                }
+            
+                case 'v':
+                {
+                    str->v += '\v';
+                    break;
+                }
+            
+                case 'f':
+                {
+                    str->v += '\f';
+                    break;
+                }
+
+                case 'a':
+                {
+                    str->v += '\a';
+                    break;
+                }
+
+                case 'b':
+                {
+                    str->v += '\b';
+                    break;
+                }
+
+                case '?':
+                {
+                    str->v += '\?';
+                    break;
+                }
+
+                //
+                // Octal value \nnn limited to three octal digits but terminate at the first character 
+                // that is not a valid octal digit if encountered sooner.
+                //
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '7':
+                {
+                    static string octalDigits = "01234567";
+                    IceUtil::Int64 value = 0;
+                    string escape;
+                    escape += next;
+                    for(int i = 0; i < 2; ++i)
+                    {
+                        next = static_cast<char>(yyinput());
+                        if(octalDigits.find_first_of(next) == string::npos)
+                        {
+                            unput(next);
+                            break;
+                        }
+                        escape += next;                        
+                    }
+                    str->literal += escape;
+                    value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+                    
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    else if(value > 255)
+                    {
+                        ostringstream os;
+                        os << "octal escape sequence out of range: '\\" << oct << value << "'";
+                        unit->warning(os.str());
+                    }
+                    str->v += static_cast<char>(value);
+                    break;
+                }
+                case 'x':
+                {
+                    IceUtil::Int64 value = 0;
+                    string escape = "";
+                    while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
+                    {
+                        escape += next;
+                    }
+                    unput(next);
+                    
+                    str->literal += escape;
+                    value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    else if(value > 255)
+                    {
+                        ostringstream os;
+                        os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+                        unit->warning(os.str());
+                    }
+                    str->v += static_cast<char>(value);
+                    break;
+                }
+        
+                //
+                // Universal character name \unnnn code point U+nnnn
+                //
+                case 'u':
+                {
+                    IceUtil::Int64 value = 0;
+                    string escape = "";
+
+                    for(int i = 0; i < 4; ++i)
+                    {
+                        next = static_cast<char>(yyinput());
                         str->literal += next;
-		    	us = us * 8 + next - '0';
-			if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
-			{
-			    us = us * 8 + next - '0';
-			}
-			else
-			{
-			    unput(next);
-			}
-		    }
-		    else
-		    {
-		    	unput(next);
-		    }
-		    if(us == 0)
-		    {
-			unit->error("illegal NUL character in string constant");
-		    }
-		    str->v += static_cast<char>(us);
-		    break;
-		}
-		case 'x':
-		{
-		    IceUtil::Int64 ull = 0;
-		    while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
-		    {
+                        if(!isxdigit(static_cast<unsigned char>(next)))
+                        {
+                            unit->error("unknown escape sequence in string constant: " + str->literal);
+                            break;
+                        }
+                        escape += next;
+                    }
+                    
+                    value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+                    ostringstream os;
+                    os << '\\' << 'u';
+                    os.fill('0');
+                    os.width(4);
+                    os << hex << value;
+                    
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    
+                    
+                    //
+                    // Determine if a character is a surrogate:
+                    // 
+                    // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+                    // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+                    //
+                    else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+                    {
+                        unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+                    }
+
+                    str->v += os.str();
+                    
+                    break;
+                }
+                
+                case 'U':
+                {
+                    IceUtil::Int64 value = 0;
+                    string escape = "";
+                    
+                    for(int i = 0; i < 8; ++i)
+                    {
+                        next = static_cast<char>(yyinput());
                         str->literal += next;
-			ull *= 16;
-			if(isdigit(static_cast<unsigned char>(next)))
-			{
-			    ull += next - '0';
-			}
-			else if(islower(static_cast<unsigned char>(next)))
-			{
-			    ull += next - 'a' + 10;
-			}
-			else
-			{
-			    ull += next - 'A' + 10;
-			}
-		    }
-		    unput(next);
-		    if(ull == 0)
-		    {
-			unit->error("illegal NUL character in string constant");
-		    }
-		    str->v += static_cast<char>(ull);
-		    break;
-		}
-	
-		// TODO: add universal character names
-	    
-		default:
-		{
-		    str->v += c;
-		    unput(next);
-		}
-	    }
-	}
-	else
-	{
-	    str->v += c;
-	}
+                        if(!isxdigit(static_cast<unsigned char>(next)))
+                        {
+
+                            unit->error("unknown escape sequence in string constant: " + str->literal);
+                            break;
+                        }
+                        escape += next;
+                    }
+                    
+                    value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+                    ostringstream os;
+                    os << '\\' << 'U';
+                    os.fill('0');
+                    os.width(8);
+                    os << hex << value;
+                    
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    
+                    //
+                    // Determine if a character is a surrogate:
+                    // 
+                    // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+                    // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+                    //
+                    else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+                    {
+                        unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+                    }
+
+                    str->v += os.str();
+                    break;
+                }
+
+                default:
+                {
+                    ostringstream os;
+                    os << "unknown escape sequence '\\" << next << "'";
+                    unit->warning(os.str());
+                    //
+                    // We escape the backslack in a unknown escape sequence 
+                    // to keep compativility with 3.6"
+                    //
+                    str->v += '\\';
+                    str->v += c;
+                    unput(next);
+                }
+            }
+        }
+        else
+        {
+            str->v += c;
+        }
     }
     *yylvalp = str;
     return ICE_STRING_LITERAL;
@@ -1252,7 +1388,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 15:
 YY_RULE_SETUP
-#line 397 "Scanner.l"
+#line 515 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     IntegerTokPtr itp = new IntegerTok;
@@ -1271,7 +1407,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 16:
 YY_RULE_SETUP
-#line 413 "Scanner.l"
+#line 531 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     errno = 0;
@@ -1305,7 +1441,7 @@ YY_RULE_SETUP
 case 17:
 /* rule 17 can match eol */
 YY_RULE_SETUP
-#line 443 "Scanner.l"
+#line 561 "Scanner.l"
 {
     // Ignore white-space
     
@@ -1321,7 +1457,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 18:
 YY_RULE_SETUP
-#line 456 "Scanner.l"
+#line 574 "Scanner.l"
 {
     // Ignore UTF-8 BOM, rule only active when parsing start of file.
     
@@ -1330,7 +1466,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 19:
 YY_RULE_SETUP
-#line 462 "Scanner.l"
+#line 580 "Scanner.l"
 {
     BEGIN(MAINSCAN);
     if(slice_text[0] < 32 || slice_text[0] > 126)
@@ -1349,10 +1485,10 @@ YY_RULE_SETUP
 	YY_BREAK
 case 20:
 YY_RULE_SETUP
-#line 478 "Scanner.l"
+#line 596 "Scanner.l"
 ECHO;
 	YY_BREAK
-#line 1355 "lex.yy.c"
+#line 1491 "lex.yy.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(BOMSCAN):
 case YY_STATE_EOF(MAINSCAN):
@@ -1486,6 +1622,7 @@ case YY_STATE_EOF(MAINSCAN):
 			"fatal flex scanner internal error--no action found" );
 	} /* end of action switch */
 		} /* end of scanning one token */
+	} /* end of user's declarations */
 } /* end of slice_lex */
 
 /* yy_get_next_buffer - try to read in a new buffer
@@ -1541,21 +1678,21 @@ static int yy_get_next_buffer (void)
 
 	else
 		{
-			int num_to_read =
+			yy_size_t num_to_read =
 			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
 
 		while ( num_to_read <= 0 )
 			{ /* Not enough room in the buffer - grow it. */
 
 			/* just a shorter name for the current buffer */
-			YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+			YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
 
 			int yy_c_buf_p_offset =
 				(int) ((yy_c_buf_p) - b->yy_ch_buf);
 
 			if ( b->yy_is_our_buffer )
 				{
-				int new_size = b->yy_buf_size * 2;
+				yy_size_t new_size = b->yy_buf_size * 2;
 
 				if ( new_size <= 0 )
 					b->yy_buf_size += b->yy_buf_size / 8;
@@ -1586,7 +1723,7 @@ static int yy_get_next_buffer (void)
 
 		/* Read in more data. */
 		YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
-			(yy_n_chars), (size_t) num_to_read );
+			(yy_n_chars), num_to_read );
 
 		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
 		}
@@ -1682,7 +1819,7 @@ static int yy_get_next_buffer (void)
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 	yy_is_jam = (yy_current_state == 72);
 
-	return yy_is_jam ? 0 : yy_current_state;
+		return yy_is_jam ? 0 : yy_current_state;
 }
 
     static void yyunput (int c, register char * yy_bp )
@@ -1697,7 +1834,7 @@ static int yy_get_next_buffer (void)
 	if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
 		{ /* need to shift things up to make room */
 		/* +2 for EOB chars. */
-		register int number_to_move = (yy_n_chars) + 2;
+		register yy_size_t number_to_move = (yy_n_chars) + 2;
 		register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
 					YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
 		register char *source =
@@ -1746,7 +1883,7 @@ static int yy_get_next_buffer (void)
 
 		else
 			{ /* need more input */
-			int offset = (yy_c_buf_p) - (yytext_ptr);
+			yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
 			++(yy_c_buf_p);
 
 			switch ( yy_get_next_buffer(  ) )
@@ -2020,7 +2157,7 @@ void slice_pop_buffer_state (void)
  */
 static void slice_ensure_buffer_stack (void)
 {
-	int num_to_alloc;
+	yy_size_t num_to_alloc;
     
 	if (!(yy_buffer_stack)) {
 
@@ -2112,17 +2249,17 @@ YY_BUFFER_STATE slice__scan_string (yyconst char * yystr )
 
 /** Setup the input buffer state to scan the given bytes. The next call to slice_lex() will
  * scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
  * 
  * @return the newly allocated buffer state object.
  */
-YY_BUFFER_STATE slice__scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
+YY_BUFFER_STATE slice__scan_bytes  (yyconst char * yybytes, yy_size_t  _yybytes_len )
 {
 	YY_BUFFER_STATE b;
 	char *buf;
 	yy_size_t n;
-	int i;
+	yy_size_t i;
     
 	/* Get memory for full buffer, including space for trailing EOB's. */
 	n = _yybytes_len + 2;
@@ -2204,7 +2341,7 @@ FILE *slice_get_out  (void)
 /** Get the length of the current token.
  * 
  */
-int slice_get_leng  (void)
+yy_size_t slice_get_leng  (void)
 {
         return slice_leng;
 }
@@ -2352,7 +2489,7 @@ void slice_free (void * ptr )
 
 #define YYTABLES_NAME "yytables"
 
-#line 478 "Scanner.l"
+#line 595 "Scanner.l"
 
 
 
diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l
index a5f8d439a2d..9054c1e90ed 100644
--- a/cpp/src/Slice/Scanner.l
+++ b/cpp/src/Slice/Scanner.l
@@ -13,6 +13,8 @@
 #include <Slice/Grammar.h>
 #include <IceUtil/InputUtil.h>
 
+#include <iomanip>
+
 #include <stdlib.h>
 #include <math.h>
 
@@ -239,156 +241,272 @@ floating_literal	(({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]+{e
     str->literal = "\"";
     while(true)
     {
-	char c = static_cast<char>(yyinput());
+        char c = static_cast<char>(yyinput());
         str->literal += c;
-	if(c == '"')
-	{
-	    break;
-	}
-	else if(c == EOF)
-	{
-	    unit->error("EOF in string");
-	    break;
-	}
-	else if(c == '\n')
-	{
-	    unit->error("newline in string");
-	}
-	else if(c == '\\')
-	{
-	    char next = static_cast<char>(yyinput());
+        if(c == '"')
+        {
+            break;
+        }
+        else if(c == EOF)
+        {
+            unit->error("EOF in string");
+            break;
+        }
+        else if(c == '\n')
+        {
+            unit->error("newline in string");
+        }
+        else if(c == '\\')
+        {
+            char next = static_cast<char>(yyinput());
             str->literal += next;
-	    switch(next)
-	    {
-		case '\\':
-		case '"':
-		case '\'':
-		{
-		    str->v += next;
-		    break;
-		}
-	    
-		case 'n':
-		{
-		    str->v += '\n';
-		    break;
-		}
-	    
-		case 'r':
-		{
-		    str->v += '\r';
-		    break;
-		}
-
-		case 't':
-		{
-		    str->v += '\t';
-		    break;
-		}
-	    
-		case 'v':
-		{
-		    str->v += '\v';
-		    break;
-		}
-	    
-		case 'f':
-		{
-		    str->v += '\f';
-		    break;
-		}
-
-		case 'a':
-		{
-		    str->v += '\a';
-		    break;
-		}
-
-		case 'b':
-		{
-		    str->v += '\b';
-		    break;
-		}
-
-		case '?':
-		{
-		    str->v += '\?';
-		    break;
-		}
-
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		{
-		    static string octalDigits = "01234567";
-		    unsigned short us = next - '0';
-		    if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
-		    {
+            switch(next)
+            {
+                case '\\':
+                {
+                    str->v += '\\';
+                    str->v += '\\';
+                    break;
+                }
+                case '"':
+                case '\'':
+                {
+                    str->v += next;
+                    break;
+                }
+            
+                case 'n':
+                {
+                    str->v += '\n';
+                    break;
+                }
+            
+                case 'r':
+                {
+                    str->v += '\r';
+                    break;
+                }
+
+                case 't':
+                {
+                    str->v += '\t';
+                    break;
+                }
+            
+                case 'v':
+                {
+                    str->v += '\v';
+                    break;
+                }
+            
+                case 'f':
+                {
+                    str->v += '\f';
+                    break;
+                }
+
+                case 'a':
+                {
+                    str->v += '\a';
+                    break;
+                }
+
+                case 'b':
+                {
+                    str->v += '\b';
+                    break;
+                }
+
+                case '?':
+                {
+                    str->v += '\?';
+                    break;
+                }
+
+                //
+                // Octal value \nnn limited to three octal digits but terminate at the first character 
+                // that is not a valid octal digit if encountered sooner.
+                //
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '7':
+                {
+                    static string octalDigits = "01234567";
+                    IceUtil::Int64 value = 0;
+                    string escape;
+                    escape += next;
+                    for(int i = 0; i < 2; ++i)
+                    {
+                        next = static_cast<char>(yyinput());
+                        if(octalDigits.find_first_of(next) == string::npos)
+                        {
+                            unput(next);
+                            break;
+                        }
+                        escape += next;                        
+                    }
+                    str->literal += escape;
+                    value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+                    
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    else if(value > 255)
+                    {
+                        ostringstream os;
+                        os << "octal escape sequence out of range: '\\" << oct << value << "'";
+                        unit->warning(os.str());
+                    }
+                    str->v += static_cast<char>(value);
+                    break;
+                }
+                case 'x':
+                {
+                    IceUtil::Int64 value = 0;
+                    string escape = "";
+                    while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
+                    {
+                        escape += next;
+                    }
+                    unput(next);
+                    
+                    str->literal += escape;
+                    value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    else if(value > 255)
+                    {
+                        ostringstream os;
+                        os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+                        unit->warning(os.str());
+                    }
+                    str->v += static_cast<char>(value);
+                    break;
+                }
+        
+                //
+                // Universal character name \unnnn code point U+nnnn
+                //
+                case 'u':
+                {
+                    IceUtil::Int64 value = 0;
+                    string escape = "";
+
+                    for(int i = 0; i < 4; ++i)
+                    {
+                        next = static_cast<char>(yyinput());
                         str->literal += next;
-		    	us = us * 8 + next - '0';
-			if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
-			{
-			    us = us * 8 + next - '0';
-			}
-			else
-			{
-			    unput(next);
-			}
-		    }
-		    else
-		    {
-		    	unput(next);
-		    }
-		    if(us == 0)
-		    {
-			unit->error("illegal NUL character in string constant");
-		    }
-		    str->v += static_cast<char>(us);
-		    break;
-		}
-		case 'x':
-		{
-		    IceUtil::Int64 ull = 0;
-		    while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
-		    {
+                        if(!isxdigit(static_cast<unsigned char>(next)))
+                        {
+                            unit->error("unknown escape sequence in string constant: " + str->literal);
+                            break;
+                        }
+                        escape += next;
+                    }
+                    
+                    value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+                    ostringstream os;
+                    os << '\\' << 'u';
+                    os.fill('0');
+                    os.width(4);
+                    os << hex << value;
+                    
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    
+                    
+                    //
+                    // Determine if a character is a surrogate:
+                    // 
+                    // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+                    // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+                    //
+                    else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+                    {
+                        unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+                    }
+
+                    str->v += os.str();
+                    
+                    break;
+                }
+                
+                case 'U':
+                {
+                    IceUtil::Int64 value = 0;
+                    string escape = "";
+                    
+                    for(int i = 0; i < 8; ++i)
+                    {
+                        next = static_cast<char>(yyinput());
                         str->literal += next;
-			ull *= 16;
-			if(isdigit(static_cast<unsigned char>(next)))
-			{
-			    ull += next - '0';
-			}
-			else if(islower(static_cast<unsigned char>(next)))
-			{
-			    ull += next - 'a' + 10;
-			}
-			else
-			{
-			    ull += next - 'A' + 10;
-			}
-		    }
-		    unput(next);
-		    if(ull == 0)
-		    {
-			unit->error("illegal NUL character in string constant");
-		    }
-		    str->v += static_cast<char>(ull);
-		    break;
-		}
-	
-		// TODO: add universal character names
-	    
-		default:
-		{
-		    str->v += c;
-		    unput(next);
-		}
-	    }
-	}
-	else
-	{
-	    str->v += c;
-	}
+                        if(!isxdigit(static_cast<unsigned char>(next)))
+                        {
+
+                            unit->error("unknown escape sequence in string constant: " + str->literal);
+                            break;
+                        }
+                        escape += next;
+                    }
+                    
+                    value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+                    ostringstream os;
+                    os << '\\' << 'U';
+                    os.fill('0');
+                    os.width(8);
+                    os << hex << value;
+                    
+                    if(value == 0)
+                    {
+                        unit->error("illegal NUL character in string constant");
+                    }
+                    
+                    //
+                    // Determine if a character is a surrogate:
+                    // 
+                    // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+                    // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+                    //
+                    else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+                    {
+                        unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+                    }
+
+                    str->v += os.str();
+                    break;
+                }
+
+                default:
+                {
+                    ostringstream os;
+                    os << "unknown escape sequence '\\" << next << "'";
+                    unit->warning(os.str());
+                    //
+                    // We escape the backslack in a unknown escape sequence 
+                    // to keep compativility with 3.6"
+                    //
+                    str->v += '\\';
+                    str->v += c;
+                    unput(next);
+                }
+            }
+        }
+        else
+        {
+            str->v += c;
+        }
     }
     *yylvalp = str;
     return ICE_STRING_LITERAL;
diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp
index 9937c9b0f30..bcb5d5d2475 100644
--- a/cpp/src/slice2cpp/Gen.cpp
+++ b/cpp/src/slice2cpp/Gen.cpp
@@ -93,13 +93,13 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
                 {
                     switch(*c)
                     {
-                        case '\\':
                         case '"':
                         {
                             out << "\\";
                             break;
                         }
                     }
+                    
                     out << *c;                              // Print normally if in basic source character set
                 }
             }
diff --git a/cpp/src/slice2cs/Gen.cpp b/cpp/src/slice2cs/Gen.cpp
index cf09aa9df1b..e537f90f3ea 100644
--- a/cpp/src/slice2cs/Gen.cpp
+++ b/cpp/src/slice2cs/Gen.cpp
@@ -10,6 +10,7 @@
 #include <IceUtil/DisableWarnings.h>
 #include <IceUtil/Functional.h>
 #include <IceUtil/StringUtil.h>
+#include <IceUtil/InputUtil.h>
 #include <Gen.h>
 #include <limits>
 #include <sys/stat.h>
@@ -20,6 +21,7 @@
 #endif
 #include <IceUtil/Iterator.h>
 #include <IceUtil/UUID.h>
+#include <IceUtil/Unicode.h>
 #include <Slice/Checksum.h>
 #include <Slice/DotNetNames.h>
 #include <Slice/FileTracker.h>
@@ -34,6 +36,45 @@ using namespace IceUtilInternal;
 namespace
 {
 
+string
+u16CodePoint(unsigned short value)
+{
+    ostringstream s;
+    s << "\\u";
+    s << hex;
+    s.width(4);
+    s.fill('0');
+    s << value;
+    return s.str();
+}
+
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+    vector<unsigned short> u16buffer;
+    IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+    switch(result)
+    {
+        case conversionOK:
+            break;
+        case sourceExhausted:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+        case sourceIllegal:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+        default:
+        {
+            assert(0);
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+        }
+    }
+    
+    for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+    {
+        out << u16CodePoint(*c);
+    }
+}
+
 string
 sliceModeToIceMode(Operation::Mode opMode)
 {
@@ -1983,41 +2024,120 @@ Slice::CsVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePt
             // here because they are sensitive to the current locale.
             //
             static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
-                                                   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-                                                   "0123456789"
-                                                   "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
+                                                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                                    "0123456789"
+                                                    "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
+
             static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
 
             _out << "\"";                                    // Opening "
 
-            for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+            vector<unsigned char> u8buffer;                  // Buffer to convert multibyte characters
+            
+            for(size_t i = 0; i < value.size();)
             {
-                if(charSet.find(*c) == charSet.end())
+                if(charSet.find(value[i]) == charSet.end())
                 {
-                    unsigned char uc = *c;                   // char may be signed, so make it positive
-                    ostringstream s;
-                    s << "\\u";                      // Print as unicode if not in basic source character set
-                    s << hex;
-                    s.width(4);
-                    s.fill('0');
-                    s << static_cast<unsigned>(uc);
-                    _out << s.str();
+                    if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+                    {
+                        //
+                        // Print as unicode if not in basic source character set
+                        //
+                        _out << u16CodePoint(static_cast<unsigned int>(value[i]));
+                    }
+                    else
+                    {
+                        u8buffer.push_back(value[i]);
+                    }
                 }
                 else
                 {
-                   switch(*c)
+                    //
+                    // Write any pedding characters in the utf8 buffer
+                    //
+                    if(!u8buffer.empty())
+                    {
+                        writeU8Buffer(u8buffer, _out);
+                        u8buffer.clear();
+                    }
+                    switch(value[i])
                     {
                         case '\\':
+                        {
+                            string s = "\\";
+                            size_t j = i + 1;
+                            for(; j < value.size(); ++j)
+                            {
+                                if(value[j] != '\\')
+                                {
+                                    break;
+                                }
+                                s += "\\";
+                            }
+                            
+                            //
+                            // An even number of slash \ will escape the backslash and
+                            // the codepoint will be interpreted as its charaters
+                            //
+                            // \\U00000041  - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+                            // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+                            //
+                            if(s.size() % 2 != 0 && value[j] == 'U')
+                            {
+                                _out << s.substr(0, s.size() - 1);
+                                i = j + 1;
+
+                                string codepoint = value.substr(j + 1, 8);
+                                assert(codepoint.size() ==  8);
+
+                                IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                                
+                                
+                                //
+                                // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+                                // and is represented using a Unicode surrogate pair. 
+                                //
+                                if(v > 0xFFFF)
+                                {
+                                    unsigned int high = ((v - 0x10000) / 0x400) + 0xD800;
+                                    unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00;
+                                    _out << u16CodePoint(high);
+                                    _out << u16CodePoint(low);
+                                }
+                                else
+                                {
+                                    _out << "\\U" << codepoint;
+                                }
+                                
+                                i = j + 1 + 8;
+                            }
+                            else
+                            {
+                                _out << s;
+                                i = j;
+                            }
+                            continue;
+                        }
                         case '"':
                         {
                             _out << "\\";
                             break;
                         }
                     }
-                    _out << *c;                              // Print normally if in basic source character set
+                    _out << value[i];                        // Print normally if in basic source character set
                 }
+                i++;
             }
-
+            
+            //
+            // Write any pedding characters in the utf8 buffer
+            //
+            if(!u8buffer.empty())
+            {
+                writeU8Buffer(u8buffer, _out);
+                u8buffer.clear();
+            }
+                    
             _out << "\"";                                    // Closing "
         }
         else if(bp && bp->kind() == Builtin::KindLong)
diff --git a/cpp/src/slice2cs/Makefile b/cpp/src/slice2cs/Makefile
index e51e24c0445..e46c1005dd6 100644
--- a/cpp/src/slice2cs/Makefile
+++ b/cpp/src/slice2cs/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR	= $(LOADER_PATH)/../$(libsubdir)
 
 include $(top_srcdir)/config/Make.rules
 
-CPPFLAGS	:= -I. $(CPPFLAGS)
+CPPFLAGS	:= -I. -I.. $(CPPFLAGS)
 
 $(NAME): $(OBJS)
 	rm -f $@
diff --git a/cpp/src/slice2cs/Makefile.mak b/cpp/src/slice2cs/Makefile.mak
index 0a422c08b34..ac748e4fabd 100644
--- a/cpp/src/slice2cs/Makefile.mak
+++ b/cpp/src/slice2cs/Makefile.mak
@@ -18,7 +18,7 @@ OBJS		= .\Gen.obj \
 
 !include $(top_srcdir)/config/Make.rules.mak
 
-CPPFLAGS	= -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS	= -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
 
 !if "$(GENERATE_PDB)" == "yes"
 PDBFLAGS        = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/src/slice2java/Gen.cpp b/cpp/src/slice2java/Gen.cpp
index e6f2bc91571..eba540f57a6 100644
--- a/cpp/src/slice2java/Gen.cpp
+++ b/cpp/src/slice2java/Gen.cpp
@@ -14,6 +14,7 @@
 #include <IceUtil/Iterator.h>
 #include <IceUtil/StringUtil.h>
 #include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
 #include <cstring>
 
 #include <limits>
@@ -23,6 +24,44 @@ using namespace Slice;
 using namespace IceUtil;
 using namespace IceUtilInternal;
 
+string
+u16CodePoint(unsigned short value)
+{
+    ostringstream s;
+    s << "\\u";
+    s << hex;
+    s.width(4);
+    s.fill('0');
+    s << value;
+    return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+    vector<unsigned short> u16buffer;
+    IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+    switch(result)
+    {
+        case conversionOK:
+            break;
+        case sourceExhausted:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+        case sourceIllegal:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+        default:
+        {
+            assert(0);
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+        }
+    }
+    
+    for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+    {
+        out << u16CodePoint(*c);
+    }
+}
+
 static string
 sliceModeToIceMode(Operation::Mode opMode)
 {
@@ -1863,54 +1902,153 @@ Slice::JavaVisitor::writeConstantValue(Output& out, const TypePtr& type, const S
                                                            "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
                     static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
                     out << "\"";
-
-                    for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+                    
+                    vector<unsigned char> u8buffer;                  // Buffer to convert multibyte characters
+                    
+                    for(size_t i = 0; i < value.size();)
                     {
-                        if(charSet.find(*c) == charSet.end())
+                        if(charSet.find(value[i]) == charSet.end())
                         {
-                            switch(*c)
+                            char c = value[i];
+                            if(static_cast<unsigned char>(c) < 128) // Single byte character
                             {
                                 //
-                                // Java doesn't want '\n' or '\r\n' encoded as universal
-                                // characters, that gives an error "unclosed string literal"
+                                // Print as unicode if not in basic source character set
                                 //
-                                case '\r':
-                                {
-                                    out << "\\r";
-                                    break;
-                                }
-                                case '\n':
+                                switch(c)
                                 {
-                                    out << "\\n";
-                                    break;
-                                }
-                                default:
-                                {
-                                    unsigned char uc = *c;
-                                    ostringstream s;
-                                    s << "\\u";
-                                    s.flags(ios_base::hex);
-                                    s.width(4);
-                                    s.fill('0');
-                                    s << static_cast<unsigned>(uc);
-                                    out << s.str();
-                                    break;
+                                    //
+                                    // Java doesn't want '\n' or '\r\n' encoded as universal
+                                    // characters, that gives an error "unclosed string literal"
+                                    //
+                                    case '\r':
+                                    {
+                                        out << "\\r";
+                                        break;
+                                    }
+                                    case '\n':
+                                    {
+                                        out << "\\n";
+                                        break;
+                                    }
+                                    default:
+                                    {
+                                        out << u16CodePoint(c);
+                                        break;
+                                    }
                                 }
                             }
+                            else
+                            {
+                                u8buffer.push_back(value[i]);
+                            }
                         }
                         else
                         {
-                            switch(*c)
+                            //
+                            // Write any pedding characters in the utf8 buffer
+                            //
+                            if(!u8buffer.empty())
+                            {
+                                writeU8Buffer(u8buffer, out);
+                                u8buffer.clear();
+                            }
+                            switch(value[i])
                             {
                                 case '\\':
+                                {
+                                    string s = "\\";
+                                    size_t j = i + 1;
+                                    for(; j < value.size(); ++j)
+                                    {
+                                        if(value[j] != '\\')
+                                        {
+                                            break;
+                                        }
+                                        s += "\\";
+                                    }
+                                    
+                                    //
+                                    // An even number of slash \ will escape the backslash and
+                                    // the codepoint will be interpreted as its charaters
+                                    //
+                                    // \\U00000041  - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+                                    // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+                                    //
+                                    if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+                                    {
+                                        size_t sz = value[j] == 'U' ? 8 : 4;
+                                        out << s.substr(0, s.size() - 1);
+                                        i = j + 1;
+
+                                        string codepoint = value.substr(j + 1, sz);
+                                        assert(codepoint.size() ==  sz);
+
+                                        IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                                        
+                                        
+                                        //
+                                        // Java doesn't like this special characters encoded as universal characters
+                                        //
+                                        if(v == 0x5c)
+                                        {
+                                            out << "\\\\";
+                                        }
+                                        else if(v == 0xa)
+                                        {
+                                            out << "\\n";
+                                        }
+                                        else if(v == 0xd)
+                                        {
+                                            out << "\\r";
+                                        }
+                                        else if(v == 0x22)
+                                        {
+                                            out << "\\\"";
+                                        }
+                                        //
+                                        // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+                                        // and is represented using a Unicode surrogate pair. 
+                                        //
+                                        else if(v > 0xFFFF)
+                                        {
+                                            unsigned int high = ((v - 0x10000) / 0x400) + 0xD800;
+                                            unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00;
+                                            out << u16CodePoint(high);
+                                            out << u16CodePoint(low);
+                                        }
+                                        else
+                                        {
+                                            out << u16CodePoint(v);
+                                        }
+                                        
+                                        i = j + 1 + sz;
+                                    }
+                                    else
+                                    {
+                                        out << s;
+                                        i = j;
+                                    }
+                                    continue;
+                                }
                                 case '"':
                                 {
                                     out << "\\";
                                     break;
                                 }
                             }
-                            out << *c;
+                            out << value[i];                        // Print normally if in basic source character set
                         }
+                        i++;
+                    }
+
+                    //
+                    // Write any pedding characters in the utf8 buffer
+                    //
+                    if(!u8buffer.empty())
+                    {
+                        writeU8Buffer(u8buffer, out);
+                        u8buffer.clear();
                     }
 
                     out << "\"";
diff --git a/cpp/src/slice2java/Makefile b/cpp/src/slice2java/Makefile
index 010554d8e12..e2a882bfb6f 100644
--- a/cpp/src/slice2java/Makefile
+++ b/cpp/src/slice2java/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR	= $(LOADER_PATH)/../$(libsubdir)
 
 include $(top_srcdir)/config/Make.rules
 
-CPPFLAGS	:= -I. $(CPPFLAGS)
+CPPFLAGS	:= -I. -I.. $(CPPFLAGS)
 
 $(NAME): $(OBJS)
 	rm -f $@
diff --git a/cpp/src/slice2java/Makefile.mak b/cpp/src/slice2java/Makefile.mak
index 26a40e3aa55..1d93e22c79d 100644
--- a/cpp/src/slice2java/Makefile.mak
+++ b/cpp/src/slice2java/Makefile.mak
@@ -18,7 +18,7 @@ OBJS		= .\Gen.obj \
 
 !include $(top_srcdir)/config/Make.rules.mak
 
-CPPFLAGS	= -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS	= -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
 
 !if "$(GENERATE_PDB)" == "yes"
 PDBFLAGS        = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/src/slice2js/Gen.cpp b/cpp/src/slice2js/Gen.cpp
index 11bd0f608e5..871dd7d47b3 100644
--- a/cpp/src/slice2js/Gen.cpp
+++ b/cpp/src/slice2js/Gen.cpp
@@ -20,6 +20,7 @@
 #include <direct.h>
 #endif
 #include <IceUtil/Iterator.h>
+#include <IceUtil/Unicode.h>
 #include <IceUtil/UUID.h>
 #include <Slice/Checksum.h>
 #include <Slice/FileTracker.h>
@@ -34,6 +35,44 @@ using namespace IceUtilInternal;
 namespace
 {
 
+string
+u16CodePoint(unsigned short value)
+{
+    ostringstream s;
+    s << "\\u";
+    s << hex;
+    s.width(4);
+    s.fill('0');
+    s << value;
+    return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+    vector<unsigned short> u16buffer;
+    IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+    switch(result)
+    {
+        case conversionOK:
+            break;
+        case sourceExhausted:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+        case sourceIllegal:
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+        default:
+        {
+            assert(0);
+            throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+        }
+    }
+    
+    for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+    {
+        out << u16CodePoint(*c);
+    }
+}
+
 string
 sliceModeToIceMode(Operation::Mode opMode)
 {
@@ -479,34 +518,112 @@ Slice::JsVisitor::writeConstantValue(const string& scope, const TypePtr& type, c
 
             _out << "\"";                                    // Opening "
 
-            for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+            vector<unsigned char> u8buffer;                  // Buffer to convert multibyte characters
+            
+            for(size_t i = 0; i < value.size();)
             {
-                if(charSet.find(*c) == charSet.end())
-                {
-                    unsigned char uc = *c;                   // char may be signed, so make it positive
-                    ostringstream s;
-                    s << "\\u";                      // Print as unicode if not in basic source character set
-                    s << hex;
-                    s.width(4);
-                    s.fill('0');
-                    s << static_cast<unsigned>(uc);
-                    _out << s.str();
+                if(charSet.find(value[i]) == charSet.end())
+                {                    
+                    if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+                    {
+                        //
+                        // Print as unicode if not in basic source character set
+                        //
+                        _out << u16CodePoint(static_cast<unsigned int>(value[i]));
+                    }
+                    else
+                    {
+                        u8buffer.push_back(value[i]);
+                    }
                 }
                 else
                 {
-                   switch(*c)
+                    //
+                    // Write any pedding characters in the utf8 buffer
+                    //
+                    if(!u8buffer.empty())
+                    {
+                        writeU8Buffer(u8buffer, _out);
+                        u8buffer.clear();
+                    }
+                    switch(value[i])
                     {
                         case '\\':
+                        {
+                            string s = "\\";
+                            size_t j = i + 1;
+                            for(; j < value.size(); ++j)
+                            {
+                                if(value[j] != '\\')
+                                {
+                                    break;
+                                }
+                                s += "\\";
+                            }
+                            
+                            //
+                            // An even number of slash \ will escape the backslash and
+                            // the codepoint will be interpreted as its charaters
+                            //
+                            // \\U00000041  - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+                            // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+                            //
+                            if(s.size() % 2 != 0 && value[j] == 'U')
+                            {
+                                _out << s.substr(0, s.size() - 1);
+                                i = j + 1;
+
+                                string codepoint = value.substr(j + 1, 8);
+                                assert(codepoint.size() ==  8);
+
+                                IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                                
+                                
+                                //
+                                // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+                                // and is represented using a Unicode surrogate pair. 
+                                //
+                                if(v > 0xFFFF)
+                                {
+                                    unsigned int high = ((v - 0x10000) / 0x400) + 0xD800;
+                                    unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00;
+                                    _out << u16CodePoint(high);
+                                    _out << u16CodePoint(low);
+                                }
+                                else
+                                {
+                                    _out << u16CodePoint(v);
+                                }
+                                
+                                i = j + 1 + 8;
+                            }
+                            else
+                            {
+                                _out << s;
+                                i = j;
+                            }
+                            continue;
+                        }
                         case '"':
                         {
                             _out << "\\";
                             break;
                         }
                     }
-                    _out << *c;                              // Print normally if in basic source character set
+                    _out << value[i];                        // Print normally if in basic source character set
                 }
+                i++;
             }
-
+            
+            //
+            // Write any pedding characters in the utf8 buffer
+            //
+            if(!u8buffer.empty())
+            {
+                writeU8Buffer(u8buffer, _out);
+                u8buffer.clear();
+            }
+                    
             _out << "\"";                                    // Closing "
         }
         else if(bp && bp->kind() == Builtin::KindLong)
diff --git a/cpp/src/slice2js/Makefile b/cpp/src/slice2js/Makefile
index bd1bbe967f8..0aaf14a7a0d 100644
--- a/cpp/src/slice2js/Makefile
+++ b/cpp/src/slice2js/Makefile
@@ -21,7 +21,7 @@ RPATH_DIR	= $(LOADER_PATH)/../$(libsubdir)
 
 include $(top_srcdir)/config/Make.rules
 
-CPPFLAGS	:= -I. $(CPPFLAGS)
+CPPFLAGS	:= -I. -I.. $(CPPFLAGS)
 
 $(NAME): $(OBJS)
 	rm -f $@
diff --git a/cpp/src/slice2js/Makefile.mak b/cpp/src/slice2js/Makefile.mak
index 0014e231948..79bd9f23cca 100644
--- a/cpp/src/slice2js/Makefile.mak
+++ b/cpp/src/slice2js/Makefile.mak
@@ -19,7 +19,7 @@ OBJS		= .\Gen.obj \
 
 !include $(top_srcdir)/config/Make.rules.mak
 
-CPPFLAGS	= -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS	= -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
 
 !if "$(GENERATE_PDB)" == "yes"
 PDBFLAGS        = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/src/slice2php/Main.cpp b/cpp/src/slice2php/Main.cpp
index 77e2a93e921..6de877cec54 100644
--- a/cpp/src/slice2php/Main.cpp
+++ b/cpp/src/slice2php/Main.cpp
@@ -16,6 +16,7 @@
 #include <IceUtil/StringUtil.h>
 #include <IceUtil/Mutex.h>
 #include <IceUtil/MutexPtrLock.h>
+#include <IceUtil/Unicode.h>
 #include <Slice/Checksum.h>
 #include <Slice/Preprocessor.h>
 #include <Slice/FileTracker.h>
@@ -1270,9 +1271,10 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
 
                 _out << "\"";                                       // Opening "
 
-                for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+                for(size_t i = 0; i < value.size();)
                 {
-                    switch(*c)
+                    char c = value[i];
+                    switch(c)
                     {
                     case '$':
                     {
@@ -1286,8 +1288,79 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
                     }
                     case '\\':
                     {
-                        _out << "\\\\";
-                        break;
+                        
+                        string s = "\\";
+                        size_t j = i + 1;
+                        for(; j < value.size(); ++j)
+                        {
+                            if(value[j] != '\\')
+                            {
+                                break;
+                            }
+                            s += "\\";
+                        }
+                        
+                        //
+                        // An even number of slash \ will escape the backslash and
+                        // the codepoint will be interpreted as its charaters
+                        //
+                        // \\u00000041  - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+                        // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+                        //
+                        if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+                        {
+                            //
+                            // Convert codepoint to UTF8 bytes and write the escaped bytes
+                            //
+                            _out << s.substr(0, s.size() - 1);
+                            
+                            size_t sz = value[j] == 'U' ? 8 : 4;
+                            string codepoint = value.substr(j + 1, sz);
+                            assert(codepoint.size() ==  sz);
+
+                            IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+                            
+                            
+                            vector<unsigned int> u32buffer;
+                            u32buffer.push_back(v);
+                            
+                            vector<unsigned char> u8buffer;
+                            
+                            IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+                            switch(result)
+                            {
+                                case conversionOK:
+                                    break;
+                                case sourceExhausted:
+                                    throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+                                case sourceIllegal:
+                                    throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+                                default:
+                                {
+                                    assert(0);
+                                    throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+                                }
+                            }
+                            
+                            ostringstream s;
+                            for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+                            {
+                                s << "\\";
+                                s.fill('0');
+                                s.width(3);
+                                s << oct;
+                                s << static_cast<unsigned int>(*q);
+                            }
+                            _out << s.str();
+                            
+                            i = j + 1 + sz;
+                        }
+                        else
+                        {
+                            _out << s;
+                            i = j;
+                        }
+                        continue;
                     }
                     case '\r':
                     {
@@ -1304,11 +1377,6 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
                         _out << "\\t";
                         break;
                     }
-                    case '\b':
-                    {
-                        _out << "\\b";
-                        break;
-                    }
                     case '\f':
                     {
                         _out << "\\f";
@@ -1316,9 +1384,9 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
                     }
                     default:
                     {
-                        if(charSet.find(*c) == charSet.end())
+                        if(charSet.find(c) == charSet.end())
                         {
-                            unsigned char uc = *c;              // Char may be signed, so make it positive.
+                            unsigned char uc = c;              // Char may be signed, so make it positive.
                             stringstream s;
                             s << "\\";                          // Print as octal if not in basic source character set.
                             s.flags(ios_base::oct);
@@ -1329,11 +1397,12 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
                         }
                         else
                         {
-                            _out << *c;                         // Print normally if in basic source character set.
+                            _out << c;                          // Print normally if in basic source character set.
                         }
                         break;
                     }
                     }
+                    ++i;
                 }
 
                 _out << "\"";                                   // Closing "
diff --git a/cpp/src/slice2php/Makefile b/cpp/src/slice2php/Makefile
index 8bc91a8cd66..62f82531201 100644
--- a/cpp/src/slice2php/Makefile
+++ b/cpp/src/slice2php/Makefile
@@ -19,7 +19,7 @@ RPATH_DIR	= $(LOADER_PATH)/../$(libsubdir)
 
 include $(top_srcdir)/config/Make.rules
 
-CPPFLAGS	:= -I. $(CPPFLAGS)
+CPPFLAGS	:= -I. -I.. $(CPPFLAGS)
 
 $(NAME): $(OBJS)
 	rm -f $@
diff --git a/cpp/src/slice2php/Makefile.mak b/cpp/src/slice2php/Makefile.mak
index 47cab5776bd..1218590c485 100644
--- a/cpp/src/slice2php/Makefile.mak
+++ b/cpp/src/slice2php/Makefile.mak
@@ -17,7 +17,7 @@ OBJS		= .\Main.obj
 
 !include $(top_srcdir)/config/Make.rules.mak
 
-CPPFLAGS	= -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS	= -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
 
 !if "$(GENERATE_PDB)" == "yes"
 PDBFLAGS        = /pdb:$(NAME:.exe=.pdb)
-- 
cgit v1.2.3