1 files changed, 343 insertions, 172 deletions
diff --git a/js/src/Ice/StringUtil.js b/js/src/Ice/StringUtil.js
index 2ab07467549..0f125dc3e02 100644
--- a/js/src/Ice/StringUtil.js
+++ b/js/src/Ice/StringUtil.js
@@ -7,66 +7,56 @@
 //
 // **********************************************************************
 
-var Ice = require("../Ice/Debug").Ice;    
-var Debug = Ice.Debug;
+const Ice = require("../Ice/Debug").Ice;
+const Debug = Ice.Debug;
 
-Ice.StringUtil =
+Ice.StringUtil = class
 {
     //
     // Return the index of the first character in str to
     // appear in match, starting from start. Returns -1 if none is
     // found.
     //
-    findFirstOf: function(str, match, start)
+    static findFirstOf(str, match, start)
     {
         start = start === undefined ? 0 : start;
-
-        var len = str.length;
-        for(var i = start; i < len; i++)
+        for(let i = start; i < str.length; i++)
         {
-            var ch = str.charAt(i);
+            const ch = str.charAt(i);
             if(match.indexOf(ch) != -1)
             {
                 return i;
             }
         }
-
         return -1;
-    },
+    }
     //
     // Return the index of the first character in str which does
     // not appear in match, starting from start. Returns -1 if none is
     // found.
     //
-    findFirstNotOf: function(str, match, start)
+    static findFirstNotOf(str, match, start)
     {
         start = start === undefined ? 0 : start;
-
-        var len = str.length;
-        for(var i = start; i < len; i++)
+        for(let i = start; i < str.length; i++)
         {
-            var ch = str.charAt(i);
+            const ch = str.charAt(i);
             if(match.indexOf(ch) == -1)
             {
                 return i;
             }
         }
-
         return -1;
-    },
+    }
     //
-    // Add escape sequences (such as "\n", or "\007") to make a string
-    // readable in ASCII. Any characters that appear in special are
-    // prefixed with a backlash in the returned string.
+    // Add escape sequences (such as "\n", or "\123") to s
     //
-    escapeString: function(s, special)
+    static escapeString(s, special, toStringMode)
     {
         special = special === undefined ? null : special;
-
-        var i, length;
         if(special !== null)
         {
-            for(i = 0, length = special.length; i < length; ++i)
+            for(let i = 0; i < special.length; ++i)
             {
                 if(special.charCodeAt(i) < 32 || special.charCodeAt(i) > 126)
                 {
@@ -75,55 +65,107 @@ Ice.StringUtil =
             }
         }
 
-        var result = [], c;
-        for(i = 0, length = s.length; i < length; ++i)
+        let result = [];
+
+        if(toStringMode === Ice.ToStringMode.Compat)
         {
-            c = s.charCodeAt(i);
-            if(c < 128)
-            {
-                encodeChar(c, result, special);
-            }
-            else if(c > 127 && c < 2048)
+            // Encode UTF-8 bytes
+            var bytes = unescape(encodeURIComponent(s));
+            for(let i = 0; i < bytes.length; ++i)
             {
-                encodeChar((c >> 6) | 192, result, special);
-                encodeChar((c & 63) | 128, result, special);
+                const c = bytes.charCodeAt(i);
+                encodeChar(c, result, special, toStringMode);
             }
-            else
+        }
+        else
+        {
+            for(let i = 0; i < s.length; ++i)
             {
-                encodeChar((c >> 12) | 224, result, special);
-                encodeChar(((c >> 6) & 63) | 128, result, special);
-                encodeChar((c & 63) | 128, result, special);
+                const c = s.charCodeAt(i);
+                if(toStringMode === Ice.ToStringMode.Unicode || c < 0xD800 || c > 0xDFFF)
+                {
+                    encodeChar(c, result, special, toStringMode);
+                }
+                else
+                {
+                    Debug.assert(toStringMode === Ice.ToStringMode.ASCII && c >= 0xD800 && c <= 0xDFFF);
+                    if(i + 1 === s.length)
+                    {
+                        throw new Error("High surrogate without low surrogate");
+                    }
+                    else
+                    {
+                        const codePoint = s.codePointAt(i);
+                        Debug.assert(codePoint > 0xFFFF);
+                        i++;
+
+                        // append \Unnnnnnnn
+                        result.push("\\U");
+                        const hex = codePoint.toString(16);
+                        for(let j = hex.length; j < 8; j++)
+                        {
+                            result.push('0');
+                        }
+                        result.push(hex);
+                    }
+                }
             }
         }
-
         return result.join("");
-    },
+    }
     //
     // Remove escape sequences added by escapeString. Throws Error
     // for an invalid input string.
     //
-    unescapeString: function(s, start, end)
+    static unescapeString(s, start, end, special)
     {
         start = start === undefined ? 0 : start;
         end = end === undefined ? s.length : end;
+        special = special === undefined ? null : special;
 
         Debug.assert(start >= 0 && start <= end && end <= s.length);
 
-        var arr = [];
-        decodeString(s, start, end, arr);
+        if(special !== null)
+        {
+            for(let i = 0; i < special.length; ++i)
+            {
+                if(special.charCodeAt(i) < 32 || special.charCodeAt(i) > 126)
+                {
+                    throw new Error("special characters must be in ASCII range 32-126");
+                }
+            }
+        }
 
-        return arr.join("");
-    },
+        // Optimization for strings without escapes
+        let p = s.indexOf('\\', start);
+        if(p == -1 || p >= end)
+        {
+            p = start;
+            while(p < end)
+            {
+                checkChar(s, p++);
+            }
+            return s.substring(start, end);
+        }
+        else
+        {
+            const arr = [];
+            while(start < end)
+            {
+                start = decodeChar(s, start, end, special, arr);
+            }
+            return arr.join("");
+        }
+    }
     //
     // Split string helper; returns null for unmatched quotes
     //
-    splitString: function(str, delim)
+    static splitString(str, delim)
     {
-        var v = [];
-        var s = "";
-        var pos = 0;
-
-        var quoteChar = null;
+        const v = [];
+        let s = "";
+        let pos = 0;
+        let quoteChar = null;
         while(pos < str.length)
         {
             if(quoteChar === null && (str.charAt(pos) === '"' || str.charAt(pos) === '\''))
@@ -177,24 +219,23 @@ Ice.StringUtil =
         }
 
         return v;
-    },
+    }
     //
     // If a single or double quotation mark is found at the start position,
     // then the position of the matching closing quote is returned. If no
     // quotation mark is found at the start position, then 0 is returned.
     // If no matching closing quote is found, then -1 is returned.
     //
-    checkQuote: function(s, start)
+    static checkQuote(s, start)
     {
         start = start === undefined ? 0 : start;
 
-        var quoteChar = s.charAt(start);
+        let quoteChar = s.charAt(start);
         if(quoteChar == '"' || quoteChar == '\'')
         {
             start++;
-            var len = s.length;
-            var pos;
-            while(start < len && (pos = s.indexOf(quoteChar, start)) != -1)
+            let pos;
+            while(start < s.length && (pos = s.indexOf(quoteChar, start)) != -1)
             {
                 if(s.charAt(pos - 1) != '\\')
                 {
@@ -205,22 +246,19 @@ Ice.StringUtil =
             return -1; // Unmatched quote
         }
         return 0; // Not quoted
-    },
-    hashCode: function(s)
+    }
+    static hashCode(s)
     {
-        var hash = 0;
-        var n = s.length;
-
-        for(var i = 0; i < n; i++)
+        let hash = 0;
+        for(let i = 0; i < s.length; i++)
         {
             hash = 31 * hash + s.charCodeAt(i);
         }
-
         return hash;
-    },
-    toInt: function(s)
+    }
+    static toInt(s)
     {
-        var n = parseInt(s, 10);
+        const n = parseInt(s, 10);
         if(isNaN(n))
         {
             throw new Error("conversion of `" + s + "' to int failed");
@@ -230,15 +268,10 @@ Ice.StringUtil =
 };
 module.exports.Ice = Ice;
 
-//
-// Write the byte b as an escape sequence if it isn't a printable ASCII
-// character and append the escape sequence to sb. Additional characters
-// that should be escaped can be passed in special. If b is any of these
-// characters, b is preceded by a backslash in sb.
-//
-function encodeChar(b, sb, special)
+
+function encodeChar(c, sb, special, toStringMode)
 {
-    switch(b)
+    switch(c)
     {
         case 92: // '\\'
         {
@@ -255,6 +288,19 @@ function encodeChar(b, sb, special)
             sb.push("\\\"");
             break;
         }
+        case 7: // '\a'
+        {
+            if(toStringMode == Ice.ToStringMode.Compat)
+            {
+                // Octal escape for compatibility with 3.6 and earlier
+                sb.push("\\007");
+            }
+            else
+            {
+                sb.push("\\a");
+            }
+            break;
+        }
         case 8: // '\b'
         {
             sb.push("\\b");
@@ -280,48 +326,88 @@ function encodeChar(b, sb, special)
             sb.push("\\t");
             break;
         }
+        case 11: // '\v'
+        {
+            if(toStringMode == Ice.ToStringMode.Compat)
+            {
+                // Octal escape for compatibility with 3.6 and earlier
+                sb.push("\\013");
+            }
+            else
+            {
+                sb.push("\\v");
+            }
+            break;
+        }
         default:
         {
-            if(!(b >= 32 && b <= 126))
+            var s = String.fromCharCode(c);
+
+            if(special !== null && special.indexOf(s) !== -1)
             {
                 sb.push('\\');
-                var octal = b.toString(8);
-                //
-                // Add leading zeroes so that we avoid problems during
-                // decoding. For example, consider the encoded string
-                // \0013 (i.e., a character with value 1 followed by
-                // the character '3'). If the leading zeroes were omitted,
-                // the result would be incorrectly interpreted by the
-                // decoder as a single character with value 11.
-                //
-                for(var j = octal.length; j < 3; j++)
-                {
-                    sb.push('0');
-                }
-                sb.push(octal);
+                sb.push(s);
             }
             else
             {
-                var c = String.fromCharCode(b);
-                if(special !== null && special.indexOf(c) !== -1)
+                if(c < 32 || c > 126)
                 {
-                    sb.push('\\');
-                    sb.push(c);
+                    if(toStringMode === Ice.ToStringMode.Compat)
+                    {
+                        //
+                        // When ToStringMode=Compat, c is a UTF-8 byte
+                        //
+                        Debug.assert(c < 256);
+                        sb.push('\\');
+                        const octal = c.toString(8);
+                        //
+                        // Add leading zeroes so that we avoid problems during
+                        // decoding. For example, consider the encoded string
+                        // \0013 (i.e., a character with value 1 followed by
+                        // the character '3'). If the leading zeroes were omitted,
+                        // the result would be incorrectly interpreted by the
+                        // decoder as a single character with value 11.
+                        //
+                        for(let j = octal.length; j < 3; j++)
+                        {
+                            sb.push('0');
+                        }
+                        sb.push(octal);
+                    }
+                    else if(c < 32 || c == 127 || toStringMode === Ice.ToStringMode.ASCII)
+                    {
+                        // append \\unnnn
+                        sb.push("\\u");
+                        const hex = c.toString(16);
+                        for(let j = hex.length; j < 4; j++)
+                        {
+                            sb.push('0');
+                        }
+                        sb.push(hex);
+                    }
+                    else
+                    {
+                        // keep as is
+                        sb.push(s);
+                    }
                 }
                 else
                 {
-                    sb.push(c);
+                    // printable ASCII character
+                    sb.push(s);
                 }
             }
+            break;
         }
     }
 }
+
 function checkChar(s, pos)
 {
-    var n = s.charCodeAt(pos);
-    if(!(n >= 32 && n <= 126))
+    const c = s.charCodeAt(pos);
+    if(c < 32 || c === 127)
     {
-        var msg;
+        let msg;
         if(pos > 0)
         {
             msg = "character after `" + s.substring(0, pos) + "'";
@@ -330,76 +416,133 @@ function checkChar(s, pos)
         {
             msg = "first character";
         }
-        msg += " is not a printable ASCII character (ordinal " + n + ")";
+        msg += " has invalid ordinal value" + c;
         throw new Error(msg);
     }
-    return n;
+    return s.charAt(pos)
 }
-
 //
-// Decode the character or escape sequence starting at start and return it.
-// nextStart is set to the index of the first character following the decoded
-// character or escape sequence.
+// Decode the character or escape sequence starting at start and appends it to result;
+// returns the index of the first character following the decoded character
+// or escape sequence.
 //
-function decodeChar(s, start, end, nextStart)
+function decodeChar(s, start, end, special, result)
 {
     Debug.assert(start >= 0);
+    Debug.assert(start < end);
     Debug.assert(end <= s.length);
 
-    if(start >= end)
+    if(s.charAt(start) != '\\')
     {
-        throw new Error("EOF while decoding string");
+        result.push(checkChar(s, start++));
     }
-
-    var c;
-
-    if(s.charAt(start) != '\\')
+    else if(start + 1 === end)
     {
-        c = checkChar(s, start++);
+        ++start;
+        result.push("\\"); // trailing backslash
     }
     else
     {
-        if(start + 1 == end)
-        {
-            throw new Error("trailing backslash");
-        }
-        switch(s.charAt(++start))
+        let c = s.charAt(++start);
+
+        switch(c)
         {
             case '\\':
             case '\'':
             case '"':
+            case '?':
             {
-                c = s.charCodeAt(start++);
+                ++start;
+                result.push(c);
+                break;
+            }
+            case 'a':
+            {
+                ++start;
+                result.append("\u0007");
                 break;
             }
             case 'b':
             {
                 ++start;
-                c = "\b".charCodeAt(0);
+                result.push("\b");
                 break;
             }
             case 'f':
             {
                 ++start;
-                c = "\f".charCodeAt(0);
+                result.push("\f");
                 break;
             }
             case 'n':
             {
                 ++start;
-                c = "\n".charCodeAt(0);
+                result.push("\n");
                 break;
             }
             case 'r':
             {
                 ++start;
-                c = "\r".charCodeAt(0);
+                result.push("\r")
                 break;
             }
             case 't':
             {
                 ++start;
-                c = "\t".charCodeAt(0);
+                result.push("\t")
+                break;
+            }
+            case 'v':
+            {
+                ++start;
+                result.push("\v");
+                break;
+            }
+            case 'u':
+            case 'U':
+            {
+                let codePoint = 0;
+                const inBMP = (c === 'u');
+                let size = inBMP ? 4 : 8;
+                ++start;
+                while(size > 0 && start < end)
+                {
+                    let charVal = s.charCodeAt(start++);
+                    if(charVal >= 0x30 && charVal <= 0x39)
+                    {
+                        charVal -= 0x30;
+                    }
+                    else if(charVal >= 0x61 && charVal <= 0x66)
+                    {
+                        charVal += 10 - 0x61;
+                    }
+                    else if(charVal >= 0x41 && charVal <= 0x46)
+                    {
+                        charVal += 10 - 0x41;
+                    }
+                    else
+                    {
+                        break; // while
+                    }
+                    codePoint = codePoint * 16 + charVal;
+                    --size;
+                }
+                if(size > 0)
+                {
+                    throw new Error("Invalid universal character name: too few hex digits");
+                }
+                if(codePoint >= 0xD800 && codePoint <= 0xDFFF)
+                {
+                    throw new Error("A universal character name cannot designate a surrogate");
+                }
+                if(inBMP || codePoint <= 0xFFFF)
+                {
+                    result.push(String.fromCharCode(codePoint));
+                }
+                else
+                {
+                    result.push(String.fromCodePoint(codePoint));
+                }
                 break;
             }
             case '0':
@@ -410,67 +553,95 @@ function decodeChar(s, start, end, nextStart)
             case '5':
             case '6':
             case '7':
+            case 'x':
             {
-                var octalChars = "01234567";
-                var val = 0;
-                for(var j = 0; j < 3 && start < end; ++j)
+                // UTF-8 byte sequence encoded with octal or hex escapes
+
+                let arr = [];
+                let more = true;
+                while(more)
                 {
-                    var ch = s.charAt(start++);
-                    if(octalChars.indexOf(ch) == -1)
+                    let val = 0;
+                    if(c === 'x')
                     {
-                        --start;
-                        break;
+                        let size = 2;
+                        ++start;
+                        while(size > 0 && start < end)
+                        {
+                            let charVal = s.charCodeAt(start++);
+                            if(charVal >= 0x30 && charVal <= 0x39)
+                            {
+                                charVal -= 0x30;
+                            }
+                            else if(charVal >= 0x61 && charVal <= 0x66)
+                            {
+                                charVal += 10 - 0x61;
+                            }
+                            else if(charVal >= 0x41 && charVal <= 0x46)
+                            {
+                                charVal += 10 - 0x41;
+                            }
+                            else
+                            {
+                                break; // while
+                            }
+                            val = val * 16 + charVal;
+                            --size;
+                        }
+                        if(size === 2)
+                        {
+                            throw new Error("Invalid \\x escape sequence: no hex digit");
+                        }
+                    }
+                    else
+                    {
+                        for(let j = 0; j < 3 && start < end; ++j)
+                        {
+                            let charVal = s.charCodeAt(start++) - '0'.charCodeAt(0);
+                            if(charVal < 0 || charVal > 7)
+                            {
+                                --start; // move back
+                                Debug.assert(j !== 0); // must be at least one digit
+                                break; // for
+                            }
+                            val = val * 8 + charVal;
+                        }
+                        if(val > 255)
+                        {
+                            throw new Error("octal value \\" + val.toString(8) + " (" + val + ") is out of range");
+                        }
+                    }
+
+                    arr.push(String.fromCharCode(val));
+
+                    more = false;
+                    if((start + 1 < end) && s.charAt(start) === '\\')
+                    {
+                        c = s.charAt(start + 1);
+                        let charVal = s.charCodeAt(start + 1);
+                        if(c === 'x' || (charVal >= 0x30 && charVal <= 0x39))
+                        {
+                            start++;
+                            more = true;
+                        }
                     }
-                    val = val * 8 + parseInt(ch);
-                }
-                if(val > 255)
-                {
-                    var msg = "octal value \\" + val.toString(8) + " (" + val + ") is out of range";
-                    throw new Error(msg);
                 }
-                c = val;
+
+                // Decode UTF-8 arr into string
+                result.push(decodeURIComponent(escape(arr.join(""))));
                 break;
             }
             default:
             {
-                c = checkChar(s, start++);
+                if(special === null || special.length === 0 || special.indexOf(c) === -1)
+                {
+                    result.push("\\"); // not in special, so we keep the backslash
+                }
+                result.push(checkChar(s, start++));
                 break;
             }
         }
     }
-    nextStart.value = start;
-    return c;
-}
-
-//
-// Remove escape sequences from s and append the result to sb.
-// Return true if successful, false otherwise.
-//
-function decodeString(s, start, end, arr)
-{
-    var nextStart = { 'value': 0 }, c, c2, c3;
-    while(start < end)
-    {
-        c = decodeChar(s, start, end, nextStart);
-        start = nextStart.value;
 
-        if(c < 128)
-        {
-            arr.push(String.fromCharCode(c));
-        }
-        else if(c > 191 && c < 224)
-        {
-            c2 = decodeChar(s, start, end, nextStart);
-            start = nextStart.value;
-            arr.push(String.fromCharCode(((c & 31) << 6) | (c2 & 63)));
-        }
-        else
-        {
-            c2 = decodeChar(s, start, end, nextStart);
-            start = nextStart.value;
-            c3 = decodeChar(s, start, end, nextStart);
-            start = nextStart.value;
-            arr.push(String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)));
-        }
-    }
+    return start;
 }