diff options
Diffstat (limited to 'js/src/Ice/StringUtil.js')
-rw-r--r-- | js/src/Ice/StringUtil.js | 515 |
1 files changed, 343 insertions, 172 deletions
diff --git a/js/src/Ice/StringUtil.js b/js/src/Ice/StringUtil.js index 2ab07467549..0f125dc3e02 100644 --- a/js/src/Ice/StringUtil.js +++ b/js/src/Ice/StringUtil.js @@ -7,66 +7,56 @@ // // ********************************************************************** -var Ice = require("../Ice/Debug").Ice; -var Debug = Ice.Debug; +const Ice = require("../Ice/Debug").Ice; +const Debug = Ice.Debug; -Ice.StringUtil = +Ice.StringUtil = class { // // Return the index of the first character in str to // appear in match, starting from start. Returns -1 if none is // found. // - findFirstOf: function(str, match, start) + static findFirstOf(str, match, start) { start = start === undefined ? 0 : start; - - var len = str.length; - for(var i = start; i < len; i++) + for(let i = start; i < str.length; i++) { - var ch = str.charAt(i); + const ch = str.charAt(i); if(match.indexOf(ch) != -1) { return i; } } - return -1; - }, + } // // Return the index of the first character in str which does // not appear in match, starting from start. Returns -1 if none is // found. // - findFirstNotOf: function(str, match, start) + static findFirstNotOf(str, match, start) { start = start === undefined ? 0 : start; - - var len = str.length; - for(var i = start; i < len; i++) + for(let i = start; i < str.length; i++) { - var ch = str.charAt(i); + const ch = str.charAt(i); if(match.indexOf(ch) == -1) { return i; } } - return -1; - }, + } // - // Add escape sequences (such as "\n", or "\007") to make a string - // readable in ASCII. Any characters that appear in special are - // prefixed with a backlash in the returned string. + // Add escape sequences (such as "\n", or "\123") to s // - escapeString: function(s, special) + static escapeString(s, special, toStringMode) { special = special === undefined ? null : special; - - var i, length; if(special !== null) { - for(i = 0, length = special.length; i < length; ++i) + for(let i = 0; i < special.length; ++i) { if(special.charCodeAt(i) < 32 || special.charCodeAt(i) > 126) { @@ -75,55 +65,107 @@ Ice.StringUtil = } } - var result = [], c; - for(i = 0, length = s.length; i < length; ++i) + let result = []; + + if(toStringMode === Ice.ToStringMode.Compat) { - c = s.charCodeAt(i); - if(c < 128) - { - encodeChar(c, result, special); - } - else if(c > 127 && c < 2048) + // Encode UTF-8 bytes + var bytes = unescape(encodeURIComponent(s)); + for(let i = 0; i < bytes.length; ++i) { - encodeChar((c >> 6) | 192, result, special); - encodeChar((c & 63) | 128, result, special); + const c = bytes.charCodeAt(i); + encodeChar(c, result, special, toStringMode); } - else + } + else + { + for(let i = 0; i < s.length; ++i) { - encodeChar((c >> 12) | 224, result, special); - encodeChar(((c >> 6) & 63) | 128, result, special); - encodeChar((c & 63) | 128, result, special); + const c = s.charCodeAt(i); + if(toStringMode === Ice.ToStringMode.Unicode || c < 0xD800 || c > 0xDFFF) + { + encodeChar(c, result, special, toStringMode); + } + else + { + Debug.assert(toStringMode === Ice.ToStringMode.ASCII && c >= 0xD800 && c <= 0xDFFF); + if(i + 1 === s.length) + { + throw new Error("High surrogate without low surrogate"); + } + else + { + const codePoint = s.codePointAt(i); + Debug.assert(codePoint > 0xFFFF); + i++; + + // append \Unnnnnnnn + result.push("\\U"); + const hex = codePoint.toString(16); + for(let j = hex.length; j < 8; j++) + { + result.push('0'); + } + result.push(hex); + } + } } } - return result.join(""); - }, + } // // Remove escape sequences added by escapeString. Throws Error // for an invalid input string. // - unescapeString: function(s, start, end) + static unescapeString(s, start, end, special) { start = start === undefined ? 0 : start; end = end === undefined ? s.length : end; + special = special === undefined ? null : special; Debug.assert(start >= 0 && start <= end && end <= s.length); - var arr = []; - decodeString(s, start, end, arr); + if(special !== null) + { + for(let i = 0; i < special.length; ++i) + { + if(special.charCodeAt(i) < 32 || special.charCodeAt(i) > 126) + { + throw new Error("special characters must be in ASCII range 32-126"); + } + } + } - return arr.join(""); - }, + // Optimization for strings without escapes + let p = s.indexOf('\\', start); + if(p == -1 || p >= end) + { + p = start; + while(p < end) + { + checkChar(s, p++); + } + return s.substring(start, end); + } + else + { + const arr = []; + while(start < end) + { + start = decodeChar(s, start, end, special, arr); + } + return arr.join(""); + } + } // // Split string helper; returns null for unmatched quotes // - splitString: function(str, delim) + static splitString(str, delim) { - var v = []; - var s = ""; - var pos = 0; - - var quoteChar = null; + const v = []; + let s = ""; + let pos = 0; + let quoteChar = null; while(pos < str.length) { if(quoteChar === null && (str.charAt(pos) === '"' || str.charAt(pos) === '\'')) @@ -177,24 +219,23 @@ Ice.StringUtil = } return v; - }, + } // // If a single or double quotation mark is found at the start position, // then the position of the matching closing quote is returned. If no // quotation mark is found at the start position, then 0 is returned. // If no matching closing quote is found, then -1 is returned. // - checkQuote: function(s, start) + static checkQuote(s, start) { start = start === undefined ? 0 : start; - var quoteChar = s.charAt(start); + let quoteChar = s.charAt(start); if(quoteChar == '"' || quoteChar == '\'') { start++; - var len = s.length; - var pos; - while(start < len && (pos = s.indexOf(quoteChar, start)) != -1) + let pos; + while(start < s.length && (pos = s.indexOf(quoteChar, start)) != -1) { if(s.charAt(pos - 1) != '\\') { @@ -205,22 +246,19 @@ Ice.StringUtil = return -1; // Unmatched quote } return 0; // Not quoted - }, - hashCode: function(s) + } + static hashCode(s) { - var hash = 0; - var n = s.length; - - for(var i = 0; i < n; i++) + let hash = 0; + for(let i = 0; i < s.length; i++) { hash = 31 * hash + s.charCodeAt(i); } - return hash; - }, - toInt: function(s) + } + static toInt(s) { - var n = parseInt(s, 10); + const n = parseInt(s, 10); if(isNaN(n)) { throw new Error("conversion of `" + s + "' to int failed"); @@ -230,15 +268,10 @@ Ice.StringUtil = }; module.exports.Ice = Ice; -// -// Write the byte b as an escape sequence if it isn't a printable ASCII -// character and append the escape sequence to sb. Additional characters -// that should be escaped can be passed in special. If b is any of these -// characters, b is preceded by a backslash in sb. -// -function encodeChar(b, sb, special) + +function encodeChar(c, sb, special, toStringMode) { - switch(b) + switch(c) { case 92: // '\\' { @@ -255,6 +288,19 @@ function encodeChar(b, sb, special) sb.push("\\\""); break; } + case 7: // '\a' + { + if(toStringMode == Ice.ToStringMode.Compat) + { + // Octal escape for compatibility with 3.6 and earlier + sb.push("\\007"); + } + else + { + sb.push("\\a"); + } + break; + } case 8: // '\b' { sb.push("\\b"); @@ -280,48 +326,88 @@ function encodeChar(b, sb, special) sb.push("\\t"); break; } + case 11: // '\v' + { + if(toStringMode == Ice.ToStringMode.Compat) + { + // Octal escape for compatibility with 3.6 and earlier + sb.push("\\013"); + } + else + { + sb.push("\\v"); + } + break; + } default: { - if(!(b >= 32 && b <= 126)) + var s = String.fromCharCode(c); + + if(special !== null && special.indexOf(s) !== -1) { sb.push('\\'); - var octal = b.toString(8); - // - // Add leading zeroes so that we avoid problems during - // decoding. For example, consider the encoded string - // \0013 (i.e., a character with value 1 followed by - // the character '3'). If the leading zeroes were omitted, - // the result would be incorrectly interpreted by the - // decoder as a single character with value 11. - // - for(var j = octal.length; j < 3; j++) - { - sb.push('0'); - } - sb.push(octal); + sb.push(s); } else { - var c = String.fromCharCode(b); - if(special !== null && special.indexOf(c) !== -1) + if(c < 32 || c > 126) { - sb.push('\\'); - sb.push(c); + if(toStringMode === Ice.ToStringMode.Compat) + { + // + // When ToStringMode=Compat, c is a UTF-8 byte + // + Debug.assert(c < 256); + sb.push('\\'); + const octal = c.toString(8); + // + // Add leading zeroes so that we avoid problems during + // decoding. For example, consider the encoded string + // \0013 (i.e., a character with value 1 followed by + // the character '3'). If the leading zeroes were omitted, + // the result would be incorrectly interpreted by the + // decoder as a single character with value 11. + // + for(let j = octal.length; j < 3; j++) + { + sb.push('0'); + } + sb.push(octal); + } + else if(c < 32 || c == 127 || toStringMode === Ice.ToStringMode.ASCII) + { + // append \\unnnn + sb.push("\\u"); + const hex = c.toString(16); + for(let j = hex.length; j < 4; j++) + { + sb.push('0'); + } + sb.push(hex); + } + else + { + // keep as is + sb.push(s); + } } else { - sb.push(c); + // printable ASCII character + sb.push(s); } } + break; } } } + function checkChar(s, pos) { - var n = s.charCodeAt(pos); - if(!(n >= 32 && n <= 126)) + const c = s.charCodeAt(pos); + if(c < 32 || c === 127) { - var msg; + let msg; if(pos > 0) { msg = "character after `" + s.substring(0, pos) + "'"; @@ -330,76 +416,133 @@ function checkChar(s, pos) { msg = "first character"; } - msg += " is not a printable ASCII character (ordinal " + n + ")"; + msg += " has invalid ordinal value" + c; throw new Error(msg); } - return n; + return s.charAt(pos) } - // -// Decode the character or escape sequence starting at start and return it. -// nextStart is set to the index of the first character following the decoded -// character or escape sequence. +// Decode the character or escape sequence starting at start and appends it to result; +// returns the index of the first character following the decoded character +// or escape sequence. // -function decodeChar(s, start, end, nextStart) +function decodeChar(s, start, end, special, result) { Debug.assert(start >= 0); + Debug.assert(start < end); Debug.assert(end <= s.length); - if(start >= end) + if(s.charAt(start) != '\\') { - throw new Error("EOF while decoding string"); + result.push(checkChar(s, start++)); } - - var c; - - if(s.charAt(start) != '\\') + else if(start + 1 === end) { - c = checkChar(s, start++); + ++start; + result.push("\\"); // trailing backslash } else { - if(start + 1 == end) - { - throw new Error("trailing backslash"); - } - switch(s.charAt(++start)) + let c = s.charAt(++start); + + switch(c) { case '\\': case '\'': case '"': + case '?': { - c = s.charCodeAt(start++); + ++start; + result.push(c); + break; + } + case 'a': + { + ++start; + result.append("\u0007"); break; } case 'b': { ++start; - c = "\b".charCodeAt(0); + result.push("\b"); break; } case 'f': { ++start; - c = "\f".charCodeAt(0); + result.push("\f"); break; } case 'n': { ++start; - c = "\n".charCodeAt(0); + result.push("\n"); break; } case 'r': { ++start; - c = "\r".charCodeAt(0); + result.push("\r") break; } case 't': { ++start; - c = "\t".charCodeAt(0); + result.push("\t") + break; + } + case 'v': + { + ++start; + result.push("\v"); + break; + } + case 'u': + case 'U': + { + let codePoint = 0; + const inBMP = (c === 'u'); + let size = inBMP ? 4 : 8; + ++start; + while(size > 0 && start < end) + { + let charVal = s.charCodeAt(start++); + if(charVal >= 0x30 && charVal <= 0x39) + { + charVal -= 0x30; + } + else if(charVal >= 0x61 && charVal <= 0x66) + { + charVal += 10 - 0x61; + } + else if(charVal >= 0x41 && charVal <= 0x46) + { + charVal += 10 - 0x41; + } + else + { + break; // while + } + codePoint = codePoint * 16 + charVal; + --size; + } + if(size > 0) + { + throw new Error("Invalid universal character name: too few hex digits"); + } + if(codePoint >= 0xD800 && codePoint <= 0xDFFF) + { + throw new Error("A universal character name cannot designate a surrogate"); + } + if(inBMP || codePoint <= 0xFFFF) + { + result.push(String.fromCharCode(codePoint)); + } + else + { + result.push(String.fromCodePoint(codePoint)); + } break; } case '0': @@ -410,67 +553,95 @@ function decodeChar(s, start, end, nextStart) case '5': case '6': case '7': + case 'x': { - var octalChars = "01234567"; - var val = 0; - for(var j = 0; j < 3 && start < end; ++j) + // UTF-8 byte sequence encoded with octal or hex escapes + + let arr = []; + let more = true; + while(more) { - var ch = s.charAt(start++); - if(octalChars.indexOf(ch) == -1) + let val = 0; + if(c === 'x') { - --start; - break; + let size = 2; + ++start; + while(size > 0 && start < end) + { + let charVal = s.charCodeAt(start++); + if(charVal >= 0x30 && charVal <= 0x39) + { + charVal -= 0x30; + } + else if(charVal >= 0x61 && charVal <= 0x66) + { + charVal += 10 - 0x61; + } + else if(charVal >= 0x41 && charVal <= 0x46) + { + charVal += 10 - 0x41; + } + else + { + break; // while + } + val = val * 16 + charVal; + --size; + } + if(size === 2) + { + throw new Error("Invalid \\x escape sequence: no hex digit"); + } + } + else + { + for(let j = 0; j < 3 && start < end; ++j) + { + let charVal = s.charCodeAt(start++) - '0'.charCodeAt(0); + if(charVal < 0 || charVal > 7) + { + --start; // move back + Debug.assert(j !== 0); // must be at least one digit + break; // for + } + val = val * 8 + charVal; + } + if(val > 255) + { + throw new Error("octal value \\" + val.toString(8) + " (" + val + ") is out of range"); + } + } + + arr.push(String.fromCharCode(val)); + + more = false; + if((start + 1 < end) && s.charAt(start) === '\\') + { + c = s.charAt(start + 1); + let charVal = s.charCodeAt(start + 1); + if(c === 'x' || (charVal >= 0x30 && charVal <= 0x39)) + { + start++; + more = true; + } } - val = val * 8 + parseInt(ch); - } - if(val > 255) - { - var msg = "octal value \\" + val.toString(8) + " (" + val + ") is out of range"; - throw new Error(msg); } - c = val; + + // Decode UTF-8 arr into string + result.push(decodeURIComponent(escape(arr.join("")))); break; } default: { - c = checkChar(s, start++); + if(special === null || special.length === 0 || special.indexOf(c) === -1) + { + result.push("\\"); // not in special, so we keep the backslash + } + result.push(checkChar(s, start++)); break; } } } - nextStart.value = start; - return c; -} - -// -// Remove escape sequences from s and append the result to sb. -// Return true if successful, false otherwise. -// -function decodeString(s, start, end, arr) -{ - var nextStart = { 'value': 0 }, c, c2, c3; - while(start < end) - { - c = decodeChar(s, start, end, nextStart); - start = nextStart.value; - if(c < 128) - { - arr.push(String.fromCharCode(c)); - } - else if(c > 191 && c < 224) - { - c2 = decodeChar(s, start, end, nextStart); - start = nextStart.value; - arr.push(String.fromCharCode(((c & 31) << 6) | (c2 & 63))); - } - else - { - c2 = decodeChar(s, start, end, nextStart); - start = nextStart.value; - c3 = decodeChar(s, start, end, nextStart); - start = nextStart.value; - arr.push(String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63))); - } - } + return start; } |