diff options
Diffstat (limited to 'java/src/IceUtil/StringUtil.java')
-rw-r--r-- | java/src/IceUtil/StringUtil.java | 432 |
1 files changed, 246 insertions, 186 deletions
diff --git a/java/src/IceUtil/StringUtil.java b/java/src/IceUtil/StringUtil.java index 755bfb54a83..edcd20bd9e1 100644 --- a/java/src/IceUtil/StringUtil.java +++ b/java/src/IceUtil/StringUtil.java @@ -75,91 +75,109 @@ public final class StringUtil return -1; } + // + // Write the byte b as an escape sequence if it isn't a printable ASCII + // character and append the escape sequence to sb. Additional characters + // that should be escaped can be passed in special. If b is any of these + // characters, b is preceded by a backslash in sb. + // private static void - escapeChar(byte b, StringBuffer s, String special) + encodeChar(byte b, StringBuffer sb, String special) { switch(b) { - case (byte)'\\': - { - s.append("\\\\"); - break; - } - case (byte)'\'': - { - s.append("\\'"); - break; - } - case (byte)'"': - { - s.append("\\\""); - break; - } - case (byte)'\b': - { - s.append("\\b"); - break; - } - case (byte)'\f': - { - s.append("\\f"); - break; - } - case (byte)'\n': - { - s.append("\\n"); - break; - } - case (byte)'\r': - { - s.append("\\r"); - break; - } - case (byte)'\t': - { - s.append("\\t"); - break; - } - default: - { - if(b <= (byte)31 || b == (byte)127) // Bytes are signed in Java (-128 to 127) - { - s.append('\\'); - String octal = Integer.toOctalString(b); - // - // Add leading zeroes so that we avoid problems during - // decoding. For example, consider the encoded string - // \0013 (i.e., a character with value 1 followed by - // the character '3'). If the leading zeroes were omitted, - // the result would be incorrectly interpreted by the - // decoder as a single character with value 11. - // - for(int j = octal.length(); j < 3; j++) - { - s.append('0'); - } - s.append(octal); - } - else if(special != null && special.indexOf((char)b) != -1) - { - s.append('\\'); - escapeChar(b, s, null); - } - else - { - s.append((char)b); - } - } + case (byte)'\\': + { + sb.append("\\\\"); + break; + } + case (byte)'\'': + { + sb.append("\\'"); + break; + } + case (byte)'"': + { + sb.append("\\\""); + break; + } + case (byte)'\b': + { + sb.append("\\b"); + break; + } + case (byte)'\f': + { + sb.append("\\f"); + break; + } + case (byte)'\n': + { + sb.append("\\n"); + break; + } + case (byte)'\r': + { + sb.append("\\r"); + break; + } + case (byte)'\t': + { + sb.append("\\t"); + break; + } + default: + { + if(!(b >= 32 && b <= 126)) + { + sb.append('\\'); + String octal = Integer.toOctalString(b < 0 ? b + 256 : b); + // + // Add leading zeroes so that we avoid problems during + // decoding. For example, consider the encoded string + // \0013 (i.e., a character with value 1 followed by + // the character '3'). If the leading zeroes were omitted, + // the result would be incorrectly interpreted by the + // decoder as a single character with value 11. + // + for(int j = octal.length(); j < 3; j++) + { + sb.append('0'); + } + sb.append(octal); + } + else if(special != null && special.indexOf((char)b) != -1) + { + sb.append('\\'); + sb.append((char)b); + } + else + { + sb.append((char)b); + } + } } } // - // Add escape sequences (like "\n", or "\0xxx") to make a string - // readable in ASCII. + // Add escape sequences (such as "\n", or "\007") to make a string + // readable in ASCII. Any characters that appear in special are + // prefixed with a backlash in the returned string. // public static String escapeString(String s, String special) { + if(special != null) + { + for(int i = 0; i < special.length(); ++i) + { + if(special.charAt(i) < 32 || special.charAt(i) > 126) + { + throw new IllegalArgumentException("special characters must be in ASCII range 32-126"); + } + } + } + byte[] bytes = null; try { @@ -172,135 +190,177 @@ public final class StringUtil } StringBuffer result = new StringBuffer(bytes.length); - for(int i = 0; i < bytes.length; i++) { - escapeChar(bytes[i], result, special); + encodeChar(bytes[i], result, special); } return result.toString(); } + private static char + checkChar(char c) + { + if(!(c >= 32 && c <= 126)) + { + throw new IllegalArgumentException("illegal input character"); + } + return c; + } + + // + // Decode the character or escape sequence starting at start and return it. + // newStart is set to the index of the first character following the decoded character + // or escape sequence. + // + private static char decodeChar(String s, int start, int end, Ice.IntHolder nextStart) + { + assert(start >= 0); + assert(start < end); + assert(end <= s.length()); + + char c; + + if(s.charAt(start) != '\\') + { + c = checkChar(s.charAt(start++)); + } + else + { + if(start + 1 == end) + { + throw new IllegalArgumentException("trailing backslash in argument"); + } + switch(s.charAt(++start)) + { + case '\\': + case '\'': + case '"': + { + c = s.charAt(start++); + break; + } + case 'b': + { + ++start; + c = '\b'; + break; + } + case 'f': + { + ++start; + c = '\f'; + break; + } + case 'n': + { + ++start; + c = '\n'; + break; + } + case 'r': + { + ++start; + c = '\r'; + break; + } + case 't': + { + ++start; + c = '\t'; + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + int oct = 0; + for(int j = 0; j < 3 && start < end; ++j) + { + int charVal = s.charAt(start++) - '0'; + if(charVal < 0 || charVal > 7) + { + --start; + break; + } + oct = oct * 8 + charVal; + } + if(oct > 255) + { + throw new IllegalArgumentException("octal value out of range"); + } + c = (char)oct; + break; + } + default: + { + c = checkChar(s.charAt(start++)); + break; + } + } + } + nextStart.value = start; + return c; + } + + // + // Remove escape sequences from s and append the result to sb. + // Return true if successful, false otherwise. + // + private static void + decodeString(String s, int start, int end, StringBuffer sb) + { + Ice.IntHolder nextStart = new Ice.IntHolder(); + while(start < end) + { + sb.append(decodeChar(s, start, end, nextStart)); + start = nextStart.value; + } + } + // // Remove escape sequences added by escapeString. // public static boolean unescapeString(String s, int start, int end, Ice.StringHolder result) { - final int len = s.length(); - assert(start >= 0); - assert(end <= len); - assert(start <= end); + if(start < 0) + { + throw new IllegalArgumentException("start offset must be >= 0"); + } + if(end > s.length()) + { + throw new IllegalArgumentException("end offset must <= s.length()"); + } + if(start > end) + { + throw new IllegalArgumentException("start offset must <= end offset"); + } - byte[] bytes = new byte[len]; - int bc = 0; - while(start < end) - { - char ch = s.charAt(start); - if(ch == '\\') - { - start++; - if(start == end) - { - return false; // Missing character - } - ch = s.charAt(start); - switch(ch) - { - case '\\': - { - bytes[bc++] = (byte)'\\'; - break; - } - case '\'': - case '"': - { - bytes[bc++] = (byte)ch; - break; - } - case 'b': - { - bytes[bc++] = (byte)'\b'; - break; - } - case 'f': - { - bytes[bc++] = (byte)'\f'; - break; - } - case 'n': - { - bytes[bc++] = (byte)'\n'; - break; - } - case 'r': - { - bytes[bc++] = (byte)'\r'; - break; - } - case 't': - { - bytes[bc++] = (byte)'\t'; - break; - } - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - int count = 0; - int val = 0; - while(count < 3 && start < end && s.charAt(start) >= '0' && s.charAt(start) <= '9') - { - val <<= 3; - val |= s.charAt(start) - '0'; - start++; - count++; - } - if(val > 255) - { - return false; // Octal value out of range - } - bytes[bc++] = (byte)val; - continue; // don't increment start - } - default: - { - byte b = (byte)ch; - if(b <= (byte)31 || b == (byte)127) // Bytes are signed in Java (-128 to 127) - { - return false; // Malformed encoding - } - else - { - bytes[bc++] = b; - } - } - } - } - else - { - bytes[bc++] = (byte)ch; - } - start++; - } + try + { + StringBuffer sb = new StringBuffer(); + decodeString(s, start, end, sb); + String decodedString = sb.toString(); - try - { - result.value = new String(bytes, 0, bc, "UTF8"); - } - catch(java.io.UnsupportedEncodingException ex) - { - assert(false); - } + byte[] arr = new byte[decodedString.length()]; + for(int i = 0; i < arr.length; ++i) + { + arr[i] = (byte)decodedString.charAt(i); + } - return true; + result.value = new String(arr, 0, arr.length, "UTF8"); + return true; + } + catch(java.lang.Exception ex) + { + return false; + } } public static int |