summaryrefslogtreecommitdiff
path: root/java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java
diff options
context:
space:
mode:
Diffstat (limited to 'java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java')
-rw-r--r--java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java362
1 files changed, 251 insertions, 111 deletions
diff --git a/java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java b/java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java
index 953359c5c4b..5c274db2375 100644
--- a/java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java
+++ b/java-compat/src/Ice/src/main/java/IceUtilInternal/StringUtil.java
@@ -75,97 +75,119 @@ public final class StringUtil
return -1;
}
- //
- // Write the byte b as an escape sequence if it isn't a printable ASCII
- // character and append the escape sequence to sb. Additional characters
- // that should be escaped can be passed in special. If b is any of these
- // characters, b is preceded by a backslash in sb.
- //
private static void
- encodeChar(byte b, StringBuilder sb, String special)
+ encodeChar(char c, StringBuilder sb, String special, Ice.ToStringMode toStringMode)
{
- switch(b)
+ switch(c)
{
- case (byte)'\\':
+ case '\\':
{
sb.append("\\\\");
break;
}
- case (byte)'\'':
+ case '\'':
{
sb.append("\\'");
break;
}
- case (byte)'"':
+ case '"':
{
sb.append("\\\"");
break;
}
- case (byte)'\b':
+ case '\b':
{
sb.append("\\b");
break;
}
- case (byte)'\f':
+ case '\f':
{
sb.append("\\f");
break;
}
- case (byte)'\n':
+ case '\n':
{
sb.append("\\n");
break;
}
- case (byte)'\r':
+ case '\r':
{
sb.append("\\r");
break;
}
- case (byte)'\t':
+ case '\t':
{
sb.append("\\t");
break;
}
default:
{
- if(!(b >= 32 && b <= 126))
- {
- sb.append('\\');
- String octal = Integer.toOctalString(b < 0 ? b + 256 : b);
- //
- // Add leading zeroes so that we avoid problems during
- // decoding. For example, consider the encoded string
- // \0013 (i.e., a character with value 1 followed by
- // the character '3'). If the leading zeroes were omitted,
- // the result would be incorrectly interpreted by the
- // decoder as a single character with value 11.
- //
- for(int j = octal.length(); j < 3; j++)
- {
- sb.append('0');
- }
- sb.append(octal);
- }
- else if(special != null && special.indexOf((char)b) != -1)
+ if(special != null && special.indexOf(c) != -1)
{
sb.append('\\');
- sb.append((char)b);
+ sb.append(c);
}
else
{
- sb.append((char)b);
+ if(c < 32 || c > 126)
+ {
+ if(toStringMode == Ice.ToStringMode.Compat)
+ {
+ //
+ // When ToStringMode=Compat, c is a UTF-8 byte
+ //
+ assert(c < 256);
+
+ sb.append('\\');
+ String octal = Integer.toOctalString(c);
+ //
+ // Add leading zeroes so that we avoid problems during
+ // decoding. For example, consider the encoded string
+ // \0013 (i.e., a character with value 1 followed by
+ // the character '3'). If the leading zeroes were omitted,
+ // the result would be incorrectly interpreted by the
+ // decoder as a single character with value 11.
+ //
+ for(int j = octal.length(); j < 3; j++)
+ {
+ sb.append('0');
+ }
+ sb.append(octal);
+ }
+ else if(c < 32 || c == 127 || toStringMode == Ice.ToStringMode.ASCII)
+ {
+ // append \\unnnn
+ sb.append("\\u");
+ String hex = Integer.toHexString(c);
+ for(int j = hex.length(); j < 4; j++)
+ {
+ sb.append('0');
+ }
+ sb.append(hex);
+ }
+ else
+ {
+ // keep as is
+ sb.append(c);
+ }
+ }
+ else
+ {
+ // printable ASCII character
+ sb.append(c);
+ }
}
+ break;
}
}
}
//
- // Add escape sequences (such as "\n", or "\007") to make a string
- // readable in ASCII. Any characters that appear in special are
- // prefixed with a backlash in the returned string.
+ // Add escape sequences (like "\n" to the input string)
+ // The second parameter adds characters to escape, and can be empty.
//
public static String
- escapeString(String s, String special)
+ escapeString(String s, String special, Ice.ToStringMode toStringMode)
{
if(special != null)
{
@@ -178,31 +200,72 @@ public final class StringUtil
}
}
- byte[] bytes = null;
- try
+ if(toStringMode == Ice.ToStringMode.Compat)
{
- bytes = s.getBytes("UTF8");
+ // Encode UTF-8 bytes
+
+ byte[] bytes = null;
+ try
+ {
+ bytes = s.getBytes("UTF8");
+ }
+ catch(java.io.UnsupportedEncodingException ex)
+ {
+ assert(false);
+ return null;
+ }
+
+ StringBuilder result = new StringBuilder(bytes.length);
+ for(int i = 0; i < bytes.length; i++)
+ {
+ encodeChar((char)(bytes[i] & 0xFF), result, special, toStringMode);
+ }
+
+ return result.toString();
}
- catch(java.io.UnsupportedEncodingException ex)
+ else
{
- assert(false);
- return null;
- }
+ StringBuilder result = new StringBuilder(s.length());
- StringBuilder result = new StringBuilder(bytes.length);
- for(int i = 0; i < bytes.length; i++)
- {
- encodeChar(bytes[i], result, special);
- }
+ for(int i = 0; i < s.length(); i++)
+ {
+ char c = s.charAt(i);
+ if(toStringMode == Ice.ToStringMode.Unicode || !Character.isSurrogate(c))
+ {
+ encodeChar(c, result, special, toStringMode);
+ }
+ else
+ {
+ assert(toStringMode == Ice.ToStringMode.ASCII && Character.isSurrogate(c));
+ if(i + 1 == s.length())
+ {
+ throw new IllegalArgumentException("High surrogate without low surrogate");
+ }
+ else
+ {
+ i++;
+ int codePoint = Character.toCodePoint(c, s.charAt(i));
+ // append \Unnnnnnnn
+ result.append("\\U");
+ String hex = Integer.toHexString(codePoint);
+ for(int j = hex.length(); j < 8; j++)
+ {
+ result.append('0');
+ }
+ result.append(hex);
+ }
+ }
+ }
- return result.toString();
+ return result.toString();
+ }
}
private static char
checkChar(String s, int pos)
{
char c = s.charAt(pos);
- if(!(c >= 32 && c <= 126))
+ if(c < 32 || c == 127)
{
String msg;
if(pos > 0)
@@ -213,28 +276,27 @@ public final class StringUtil
{
msg = "first character";
}
- msg += " is not a printable ASCII character (ordinal " + (int)c + ")";
+ msg += " has invalid ordinal value " + (int)c;
throw new IllegalArgumentException(msg);
}
return c;
}
//
- // Decode the character or escape sequence starting at start and return it.
- // newStart is set to the index of the first character following the decoded character
+ // Decode the character or escape sequence starting at start and appends it to result;
+ // returns the index of the first character following the decoded character
// or escape sequence.
//
- private static char decodeChar(String s, int start, int end, Ice.Holder<Integer> nextStart)
+ private static int
+ decodeChar(String s, int start, int end, StringBuilder result)
{
assert(start >= 0);
assert(start < end);
assert(end <= s.length());
- char c;
-
if(s.charAt(start) != '\\')
{
- c = checkChar(s, start++);
+ result.append(checkChar(s, start++));
}
else
{
@@ -242,45 +304,98 @@ public final class StringUtil
{
throw new IllegalArgumentException("trailing backslash");
}
- switch(s.charAt(++start))
+
+ char c = s.charAt(++start);
+
+ switch(c)
{
case '\\':
case '\'':
case '"':
{
- c = s.charAt(start++);
+ ++start;
+ result.append(c);
break;
}
case 'b':
{
++start;
- c = '\b';
+ result.append('\b');
break;
}
case 'f':
{
++start;
- c = '\f';
+ result.append('\f');
break;
}
case 'n':
{
++start;
- c = '\n';
+ result.append('\n');
break;
}
case 'r':
{
++start;
- c = '\r';
+ result.append('\r');
break;
}
case 't':
{
++start;
- c = '\t';
+ result.append('\t');
break;
}
+ case 'u':
+ case 'U':
+ {
+ int codePoint = 0;
+ boolean inBMP = (c == 'u');
+ int size = inBMP ? 4 : 8;
+ ++start;
+ while(size > 0 && start < end)
+ {
+ c = s.charAt(start++);
+ int charVal = 0;
+ if(c >= '0' && c <= '9')
+ {
+ charVal = c - '0';
+ }
+ else if(c >= 'a' && c <= 'f')
+ {
+ charVal = 10 + (c - 'a');
+ }
+ else if(c >= 'A' && c <= 'F')
+ {
+ charVal = 10 + (c - 'A');
+ }
+ else
+ {
+ break; // while
+ }
+ codePoint = codePoint * 16 + charVal;
+ --size;
+ }
+ if(size > 0)
+ {
+ throw new IllegalArgumentException("Invalid universal character name: too few hex digits");
+ }
+ if(inBMP && Character.isSurrogate((char)codePoint))
+ {
+ throw new IllegalArgumentException("A non-BMP character cannot be encoded with \\unnnn, use \\Unnnnnnnn instead");
+ }
+ if(inBMP || Character.isBmpCodePoint(codePoint))
+ {
+ result.append((char)codePoint);
+ }
+ else
+ {
+ result.append(Character.toChars(codePoint));
+ }
+ break;
+ }
+
case '0':
case '1':
case '2':
@@ -290,49 +405,71 @@ public final class StringUtil
case '6':
case '7':
{
- int val = 0;
- for(int j = 0; j < 3 && start < end; ++j)
+ // UTF-8 byte sequence encoded with octal escapes
+
+ byte[] arr = new byte[end - start];
+ int i = 0;
+ boolean done = false;
+ while(!done)
{
- int charVal = s.charAt(start++) - '0';
- if(charVal < 0 || charVal > 7)
+ int val = 0;
+ for(int j = 0; j < 3 && start < end; ++j)
{
- --start;
- break;
+ int charVal = s.charAt(start++) - '0';
+ if(charVal < 0 || charVal > 7)
+ {
+ --start;
+ if(j == 0)
+ {
+ // first character after escape is not 0-7:
+ done = true;
+ --start; // go back to the previous backslash
+ }
+ break; // for
+ }
+ val = val * 8 + charVal;
}
- val = val * 8 + charVal;
+
+ if(!done)
+ {
+ if(val > 255)
+ {
+ String msg = "octal value \\" + Integer.toOctalString(val) + " (" + val + ") is out of range";
+ throw new IllegalArgumentException(msg);
+ }
+ arr[i++] = (byte)val;
+
+ if((start + 1 < end) && s.charAt(start) == '\\')
+ {
+ start++;
+ // loop, read next octal escape sequence
+ }
+ else
+ {
+ done = true;
+ }
+ }
+ }
+
+ try
+ {
+ result.append(new String(arr, 0, i, "UTF8"));
}
- if(val > 255)
+ catch(java.io.UnsupportedEncodingException ex)
{
- String msg = "octal value \\" + Integer.toOctalString(val) + " (" + val + ") is out of range";
- throw new IllegalArgumentException(msg);
+ throw new IllegalArgumentException("unsupported encoding", ex);
}
- c = (char)val;
break;
}
default:
{
- c = checkChar(s, start++);
+ result.append(checkChar(s, start++));
break;
}
}
}
- nextStart.value = start;
- return c;
- }
- //
- // Remove escape sequences from s and append the result to sb.
- // Return true if successful, false otherwise.
- //
- private static void
- decodeString(String s, int start, int end, StringBuilder sb)
- {
- Ice.Holder<Integer> nextStart = new Ice.Holder<Integer>();
- while(start < end)
- {
- sb.append(decodeChar(s, start, end, nextStart));
- start = nextStart.value;
- }
+ return start;
}
//
@@ -344,26 +481,29 @@ public final class StringUtil
{
assert(start >= 0 && start <= end && end <= s.length());
- StringBuilder sb = new StringBuilder(end - start);
- decodeString(s, start, end, sb);
- String decodedString = sb.toString();
-
- byte[] arr = new byte[decodedString.length()];
- for(int i = 0; i < arr.length; ++i)
- {
- arr[i] = (byte)decodedString.charAt(i);
- }
-
- try
+ // Optimization for strings without escapes
+ int p = s.indexOf('\\', start);
+ if(p == -1 || p >= end)
{
- return new String(arr, 0, arr.length, "UTF8");
+ p = start;
+ while(p < end)
+ {
+ checkChar(s, p++);
+ }
+ return s.substring(start, end);
}
- catch(java.io.UnsupportedEncodingException ex)
+ else
{
- throw new IllegalArgumentException("unsupported encoding", ex);
+ StringBuilder sb = new StringBuilder(end - start);
+ while(start < end)
+ {
+ start = decodeChar(s, start, end, sb);
+ }
+ return sb.toString();
}
}
+
//
// Join a list of strings using the given delimiter.
//