summaryrefslogtreecommitdiff
path: root/cppe/src/IceE/StringUtil.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cppe/src/IceE/StringUtil.cpp')
-rw-r--r--cppe/src/IceE/StringUtil.cpp415
1 files changed, 247 insertions, 168 deletions
diff --git a/cppe/src/IceE/StringUtil.cpp b/cppe/src/IceE/StringUtil.cpp
index 262636c2fd5..c1ba12f5ec7 100644
--- a/cppe/src/IceE/StringUtil.cpp
+++ b/cppe/src/IceE/StringUtil.cpp
@@ -8,8 +8,10 @@
// **********************************************************************
#include <IceE/StringUtil.h>
+#include <IceE/ExceptionBase.h>
using namespace std;
+using namespace IceUtil;
static string
toOctalString(unsigned int n)
@@ -29,204 +31,281 @@ toOctalString(unsigned int n)
return string(s, charPos, (32 - charPos));
}
+//
+// Write the byte b as an escape sequence if it isn't a printable ASCII
+// character and append the escape sequence to s. Additional characters
+// that should be escaped can be passed in special. If b is any of these
+// characters, b is preceded by a backslash in s.
+//
static void
-escapeChar(string::value_type b, string& s, const string& special)
+encodeChar(string::value_type b, string& s, const string& special)
{
switch(b)
{
- case '\\':
- {
- s.append("\\\\");
- break;
- }
- case '\'':
- {
- s.append("\\'");
- break;
- }
- case '"':
- {
- s.append("\\\"");
- break;
- }
- case '\b':
- {
- s.append("\\b");
- break;
+ case '\\':
+ {
+ s.append("\\\\");
+ break;
+ }
+
+ case '\'':
+ {
+ s.append("\\'");
+ break;
+ }
+
+ case '"':
+ {
+ s.append("\\\"");
+ break;
+ }
+
+ case '\b':
+ {
+ s.append("\\b");
+ break;
+ }
+
+ case '\f':
+ {
+ s.append("\\f");
+ break;
+ }
+
+ case '\n':
+ {
+ s.append("\\n");
+ break;
+ }
+
+ case '\r':
+ {
+ s.append("\\r");
+ break;
+ }
+
+ case '\t':
+ {
+ s.append("\\t");
+ break;
+ }
+
+ default:
+ {
+ unsigned char i = static_cast<unsigned char>(b);
+ if(!(i >= 32 && i <= 126))
+ {
+ s.push_back('\\');
+ string octal = toOctalString(i);
+ //
+ // Add leading zeroes so that we avoid problems during
+ // decoding. For example, consider the escaped string
+ // \0013 (i.e., a character with value 1 followed by the
+ // character '3'). If the leading zeroes were omitted, the
+ // result would be incorrectly interpreted as a single
+ // character with value 11.
+ //
+ for(string::size_type j = octal.size(); j < 3; j++)
+ {
+ s.push_back('0');
+ }
+ s.append(octal);
+ }
+ else if(special.find(b) != string::npos)
+ {
+ s.push_back('\\');
+ s.push_back(b);
+ }
+ else
+ {
+ s.push_back(b);
+ }
+ break;
+ }
}
- case '\f':
+}
+
+//
+// Add escape sequences (such as "\n", or "\007") to make a string
+// readable in ASCII. Any characters that appear in special are
+// prefixed with a backslash in the returned string.
+//
+string
+IceUtil::escapeString(const string& s, const string& special)
+{
+ string::size_type i;
+ for(i = 0; i < special.size(); ++i)
{
- s.append("\\f");
- break;
+ if(static_cast<unsigned char>(special[i]) < 32 || static_cast<unsigned char>(special[i]) > 126)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "special characters must be in ASCII range 32-126");
+ }
}
- case '\n':
+
+ string result;
+ for(i = 0; i < s.size(); ++i)
{
- s.append("\\n");
- break;
+ encodeChar(s[i], result, special);
}
- case '\r':
+
+ return result;
+}
+
+static char
+checkChar(char c)
+{
+ if(!(static_cast<unsigned char>(c) >= 32 && static_cast<unsigned char>(c) <= 126))
{
- s.append("\\r");
- break;
+ throw IllegalArgumentException(__FILE__, __LINE__, "illegal input character");
}
- case '\t':
+ return c;
+}
+
+//
+// Decode the character or escape sequence starting at start and return it.
+// end marks the one-past-the-end position of the substring to be scanned.
+// nextStart is set to the index of the first character following the decoded
+// character or escape sequence.
+//
+static char
+decodeChar(const string& s, string::size_type start, string::size_type end, string::size_type& nextStart)
+{
+ assert(start >= 0);
+ assert(start < end);
+ assert(end <= s.size());
+
+ char c;
+
+ if(s[start] != '\\')
{
- s.append("\\t");
- break;
+ c = checkChar(s[start++]);
}
- default:
+ else
{
- if(static_cast<signed char>(b) <= 31 || b == 127)
- {
- s.push_back('\\');
- string octal = toOctalString(b);
-
- //
- // Add leading zeroes so that we avoid problems during
- // decoding. For example, consider the escaped string
- // \0013 (i.e., a character with value 1 followed by the
- // character '3'). If the leading zeroes were omitted, the
- // result would be incorrectly interpreted as a single
- // character with value 11.
- //
- for(string::size_type j = octal.size(); j < 3; j++)
- {
- s.push_back('0');
- }
- s.append(octal);
- }
- else if(special.find(b) != string::npos)
- {
- s.push_back('\\');
- escapeChar(b, s, "");
- }
- else
- {
- s.push_back(b);
- }
- }
+ if(start + 1 == end)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "trailing backslash in argument");
+ }
+ switch(s[++start])
+ {
+ case '\\':
+ case '\'':
+ case '"':
+ {
+ c = s[start++];
+ break;
+ }
+ case 'b':
+ {
+ ++start;
+ c = '\b';
+ break;
+ }
+ case 'f':
+ {
+ ++start;
+ c = '\f';
+ break;
+ }
+ case 'n':
+ {
+ ++start;
+ c = '\n';
+ break;
+ }
+ case 'r':
+ {
+ ++start;
+ c = '\r';
+ break;
+ }
+ case 't':
+ {
+ ++start;
+ c = '\t';
+ break;
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ int oct = 0;
+ for(int j = 0; j < 3 && start < end; ++j)
+ {
+ int charVal = s[start++] - '0';
+ if(charVal < 0 || charVal > 7)
+ {
+ --start;
+ break;
+ }
+ oct = oct * 8 + charVal;
+ }
+ if(oct > 255)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "octal value out of range");
+ }
+ c = (char)oct;
+ break;
+ }
+ default:
+ {
+ c = checkChar(s[start++]);
+ break;
+ }
+ }
}
+ nextStart = start;
+ return c;
}
-string
-IceUtil::escapeString(const string& s, const string& special)
+//
+// Remove escape sequences from s and append the result to sb.
+// Return true if successful, false otherwise.
+//
+static void decodeString(const string& s, string::size_type start, string::size_type end, string& sb)
{
- string result;
- result.reserve(s.size());
-
- for(string::size_type i = 0; i < s.size(); i++)
+ while(start < end)
{
- escapeChar(s[i], result, special);
+ sb.push_back(decodeChar(s, start, end, start));
}
-
- return result;
}
+//
+// Remove escape sequences added by escapeString.
+//
bool
IceUtil::unescapeString(const string& s, string::size_type start, string::size_type end, string& result)
{
- assert(end <= s.size());
- assert(start <= end);
+ if(start < 0)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "start offset must be >= 0");
+ }
+ if(end > s.size())
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "end offset must be <= s.size()");
+ }
+ if(start > end)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "start offset must <= end offset");
+ }
result.reserve(end - start);
- while(start < end)
+ try
{
- char ch = s[start];
- if(ch == '\\')
- {
- start++;
- if(start == end)
- {
- return false; // Missing character.
- }
- ch = s[start];
- switch(ch)
- {
- case '\\':
- {
- result.push_back('\\');
- break;
- }
- case '\'':
- case '"':
- {
- result.push_back(ch);
- break;
- }
- case 'b':
- {
- result.push_back('\b');
- break;
- }
- case 'f':
- {
- result.push_back('\f');
- break;
- }
- case 'n':
- {
- result.push_back('\n');
- break;
- }
- case 'r':
- {
- result.push_back('\r');
- break;
- }
- case 't':
- {
- result.push_back('\t');
- break;
- }
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- {
- int count = 0;
- int val = 0;
- while(count < 3 && start < end && s[start] >= '0' && s[start] <= '9')
- {
- val <<= 3;
- val |= s[start] - '0';
- start++;
- count++;
- }
- if(val > 255)
- {
- return false; // Octal value out of range.
- }
- result.push_back((string::value_type)val);
- continue; // Don't increment start.
- }
- default:
- {
- if(static_cast<signed char>(ch) <= 31 || ch == 127)
- {
- return false; // Malformed encoding.
- }
- else
- {
- result.push_back(ch);
- }
- }
- }
- }
- else
- {
- result.push_back(ch);
- }
- start++;
+ result.clear();
+ decodeString(s, start, end, result);
+ return true;
+ }
+ catch(...)
+ {
+ return false;
}
-
- return true;
}
//