summaryrefslogtreecommitdiff
path: root/cpp/src/IceUtil/StringUtil.cpp
diff options
context:
space:
mode:
authorMichi Henning <michi@zeroc.com>2006-02-21 02:17:16 +0000
committerMichi Henning <michi@zeroc.com>2006-02-21 02:17:16 +0000
commit2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f (patch)
tree78fa7550ce16ede027333c9e12b2b571d5fbf154 /cpp/src/IceUtil/StringUtil.cpp
parentFixed Equals() so it can't throw an exception. (diff)
downloadice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.bz2
ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.xz
ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.zip
Bug 867.
Diffstat (limited to 'cpp/src/IceUtil/StringUtil.cpp')
-rw-r--r--cpp/src/IceUtil/StringUtil.cpp442
1 files changed, 258 insertions, 184 deletions
diff --git a/cpp/src/IceUtil/StringUtil.cpp b/cpp/src/IceUtil/StringUtil.cpp
index 954394fcc25..89d9efdaae3 100644
--- a/cpp/src/IceUtil/StringUtil.cpp
+++ b/cpp/src/IceUtil/StringUtil.cpp
@@ -8,8 +8,10 @@
// **********************************************************************
#include <IceUtil/StringUtil.h>
+#include <IceUtil/Unicode.h>
using namespace std;
+using namespace IceUtil;
static string
toOctalString(unsigned int n)
@@ -29,209 +31,281 @@ toOctalString(unsigned int n)
return string(s, charPos, (32 - charPos));
}
+//
+// Write the byte b as an escape sequence if it isn't a printable ASCII
+// character and append the escape sequence to s. Additional characters
+// that should be escaped can be passed in special. If b is any of these
+// characters, b is preceded by a backslash in s.
+//
static void
-escapeChar(string::value_type b, string& s, const string& special)
+encodeChar(string::value_type b, string& s, const string& special)
{
switch(b)
{
- case '\\':
- {
- s.append("\\\\");
- break;
- }
- case '\'':
- {
- s.append("\\'");
- break;
- }
- case '"':
- {
- s.append("\\\"");
- break;
- }
- case '\b':
- {
- s.append("\\b");
- break;
+ case '\\':
+ {
+ s.append("\\\\");
+ break;
+ }
+
+ case '\'':
+ {
+ s.append("\\'");
+ break;
+ }
+
+ case '"':
+ {
+ s.append("\\\"");
+ break;
+ }
+
+ case '\b':
+ {
+ s.append("\\b");
+ break;
+ }
+
+ case '\f':
+ {
+ s.append("\\f");
+ break;
+ }
+
+ case '\n':
+ {
+ s.append("\\n");
+ break;
+ }
+
+ case '\r':
+ {
+ s.append("\\r");
+ break;
+ }
+
+ case '\t':
+ {
+ s.append("\\t");
+ break;
+ }
+
+ default:
+ {
+ unsigned char i = static_cast<unsigned char>(b);
+ if(!(i >= 32 && i <= 126))
+ {
+ s.push_back('\\');
+ string octal = toOctalString(i);
+ //
+ // Add leading zeroes so that we avoid problems during
+ // decoding. For example, consider the escaped string
+ // \0013 (i.e., a character with value 1 followed by the
+ // character '3'). If the leading zeroes were omitted, the
+ // result would be incorrectly interpreted as a single
+ // character with value 11.
+ //
+ for(string::size_type j = octal.size(); j < 3; j++)
+ {
+ s.push_back('0');
+ }
+ s.append(octal);
+ }
+ else if(special.find(b) != string::npos)
+ {
+ s.push_back('\\');
+ s.push_back(b);
+ }
+ else
+ {
+ s.push_back(b);
+ }
+ break;
+ }
}
- case '\f':
+}
+
+//
+// Add escape sequences (such as "\n", or "\007") to make a string
+// readable in ASCII. Any characters that appear in special are
+// prefixed with a backslash in the returned string.
+//
+string
+IceUtil::escapeString(const string& s, const string& special)
+{
+ string::size_type i;
+ for(i = 0; i < special.size(); ++i)
{
- s.append("\\f");
- break;
+ if(static_cast<unsigned char>(special[i]) < 32 || static_cast<unsigned char>(special[i]) > 126)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "special characters must be in ASCII range 32-126");
+ }
}
- case '\n':
+
+ string result;
+ for(i = 0; i < s.size(); ++i)
{
- s.append("\\n");
- break;
+ encodeChar(s[i], result, special);
}
- case '\r':
+
+ return result;
+}
+
+static char
+checkChar(char c)
+{
+ if(!(static_cast<unsigned char>(c) >= 32 && static_cast<unsigned char>(c) <= 126))
{
- s.append("\\r");
- break;
+ throw IllegalArgumentException(__FILE__, __LINE__, "illegal input character");
}
- case '\t':
+ return c;
+}
+
+//
+// Decode the character or escape sequence starting at start and return it.
+// end marks the one-past-the-end position of the substring to be scanned.
+// nextStart is set to the index of the first character following the decoded
+// character or escape sequence.
+//
+static char
+decodeChar(const string& s, string::size_type start, string::size_type end, string::size_type& nextStart)
+{
+ assert(start >= 0);
+ assert(start < end);
+ assert(end <= s.size());
+
+ char c;
+
+ if(s[start] != '\\')
{
- s.append("\\t");
- break;
+ c = checkChar(s[start++]);
}
- default:
+ else
{
- //
- // Octal encode anything that is outside 32 to 126.
- //
- unsigned char i = static_cast<unsigned char>(b);
- if(i < 32 || i > 126)
- {
- s.push_back('\\');
- string octal = toOctalString(i);
-
- //
- // Add leading zeroes so that we avoid problems during
- // decoding. For example, consider the escaped string
- // \0013 (i.e., a character with value 1 followed by the
- // character '3'). If the leading zeroes were omitted, the
- // result would be incorrectly interpreted as a single
- // character with value 11.
- //
- for(string::size_type j = octal.size(); j < 3; j++)
- {
- s.push_back('0');
- }
- s.append(octal);
- }
- else if(special.find(b) != string::npos)
- {
- s.push_back('\\');
- escapeChar(b, s, "");
- }
- else
- {
- s.push_back(b);
- }
- }
+ if(start + 1 == end)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "trailing backslash in argument");
+ }
+ switch(s[++start])
+ {
+ case '\\':
+ case '\'':
+ case '"':
+ {
+ c = s[start++];
+ break;
+ }
+ case 'b':
+ {
+ ++start;
+ c = '\b';
+ break;
+ }
+ case 'f':
+ {
+ ++start;
+ c = '\f';
+ break;
+ }
+ case 'n':
+ {
+ ++start;
+ c = '\n';
+ break;
+ }
+ case 'r':
+ {
+ ++start;
+ c = '\r';
+ break;
+ }
+ case 't':
+ {
+ ++start;
+ c = '\t';
+ break;
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ int oct = 0;
+ for(int j = 0; j < 3 && start < end; ++j)
+ {
+ int charVal = s[start++] - '0';
+ if(charVal < 0 || charVal > 7)
+ {
+ --start;
+ break;
+ }
+ oct = oct * 8 + charVal;
+ }
+ if(oct > 255)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "octal value out of range");
+ }
+ c = (char)oct;
+ break;
+ }
+ default:
+ {
+ c = checkChar(s[start++]);
+ break;
+ }
+ }
}
+ nextStart = start;
+ return c;
}
-string
-IceUtil::escapeString(const string& s, const string& special)
+//
+// Remove escape sequences from s and append the result to sb.
+// Return true if successful, false otherwise.
+//
+static void decodeString(const string& s, string::size_type start, string::size_type end, string& sb)
{
- string result;
- result.reserve(s.size());
-
- for(string::size_type i = 0; i < s.size(); i++)
+ while(start < end)
{
- escapeChar(s[i], result, special);
+ sb.push_back(decodeChar(s, start, end, start));
}
-
- return result;
}
+//
+// Remove escape sequences added by escapeString.
+//
bool
IceUtil::unescapeString(const string& s, string::size_type start, string::size_type end, string& result)
{
- assert(end <= s.size());
- assert(start <= end);
+ if(start < 0)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "start offset must be >= 0");
+ }
+ if(end > s.size())
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "end offset must be <= s.size()");
+ }
+ if(start > end)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "start offset must <= end offset");
+ }
result.reserve(end - start);
- while(start < end)
+ try
{
- char ch = s[start];
- if(ch == '\\')
- {
- start++;
- if(start == end)
- {
- return false; // Missing character.
- }
- ch = s[start];
- switch(ch)
- {
- case '\\':
- {
- result.push_back('\\');
- break;
- }
- case '\'':
- case '"':
- {
- result.push_back(ch);
- break;
- }
- case 'b':
- {
- result.push_back('\b');
- break;
- }
- case 'f':
- {
- result.push_back('\f');
- break;
- }
- case 'n':
- {
- result.push_back('\n');
- break;
- }
- case 'r':
- {
- result.push_back('\r');
- break;
- }
- case 't':
- {
- result.push_back('\t');
- break;
- }
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- {
- int count = 0;
- int val = 0;
- while(count < 3 && start < end && s[start] >= '0' && s[start] <= '9')
- {
- val <<= 3;
- val |= s[start] - '0';
- start++;
- count++;
- }
- if(val > 255)
- {
- return false; // Octal value out of range.
- }
- result.push_back((string::value_type)val);
- continue; // Don't increment start.
- }
- default:
- {
- unsigned char i = static_cast<unsigned char>(ch);
- if(i < 32 || i > 126)
- {
- return false; // Malformed encoding.
- }
- else
- {
- result.push_back(ch);
- }
- }
- }
- }
- else
- {
- result.push_back(ch);
- }
- start++;
+ result.clear();
+ decodeString(s, start, end, result);
+ return true;
+ }
+ catch(...)
+ {
+ return false;
}
-
- return true;
}
//
@@ -246,17 +320,17 @@ IceUtil::checkQuote(const string& s, string::size_type start)
string::value_type quoteChar = s[start];
if(quoteChar == '"' || quoteChar == '\'')
{
- start++;
- string::size_type pos;
- while(start < s.size() && (pos = s.find(quoteChar, start)) != string::npos)
- {
- if(s[pos - 1] != '\\')
- {
- return pos;
- }
- start = pos + 1;
- }
- return string::npos; // Unmatched quote.
+ start++;
+ string::size_type pos;
+ while(start < s.size() && (pos = s.find(quoteChar, start)) != string::npos)
+ {
+ if(s[pos - 1] != '\\')
+ {
+ return pos;
+ }
+ start = pos + 1;
+ }
+ return string::npos; // Unmatched quote.
}
return 0; // Not quoted.
}