diff options
author | Michi Henning <michi@zeroc.com> | 2006-02-21 02:17:16 +0000 |
---|---|---|
committer | Michi Henning <michi@zeroc.com> | 2006-02-21 02:17:16 +0000 |
commit | 2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f (patch) | |
tree | 78fa7550ce16ede027333c9e12b2b571d5fbf154 /cpp/src/IceUtil/StringUtil.cpp | |
parent | Fixed Equals() so it can't throw an exception. (diff) | |
download | ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.bz2 ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.xz ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.zip |
Bug 867.
Diffstat (limited to 'cpp/src/IceUtil/StringUtil.cpp')
-rw-r--r-- | cpp/src/IceUtil/StringUtil.cpp | 442 |
1 files changed, 258 insertions, 184 deletions
diff --git a/cpp/src/IceUtil/StringUtil.cpp b/cpp/src/IceUtil/StringUtil.cpp index 954394fcc25..89d9efdaae3 100644 --- a/cpp/src/IceUtil/StringUtil.cpp +++ b/cpp/src/IceUtil/StringUtil.cpp @@ -8,8 +8,10 @@ // ********************************************************************** #include <IceUtil/StringUtil.h> +#include <IceUtil/Unicode.h> using namespace std; +using namespace IceUtil; static string toOctalString(unsigned int n) @@ -29,209 +31,281 @@ toOctalString(unsigned int n) return string(s, charPos, (32 - charPos)); } +// +// Write the byte b as an escape sequence if it isn't a printable ASCII +// character and append the escape sequence to s. Additional characters +// that should be escaped can be passed in special. If b is any of these +// characters, b is preceded by a backslash in s. +// static void -escapeChar(string::value_type b, string& s, const string& special) +encodeChar(string::value_type b, string& s, const string& special) { switch(b) { - case '\\': - { - s.append("\\\\"); - break; - } - case '\'': - { - s.append("\\'"); - break; - } - case '"': - { - s.append("\\\""); - break; - } - case '\b': - { - s.append("\\b"); - break; + case '\\': + { + s.append("\\\\"); + break; + } + + case '\'': + { + s.append("\\'"); + break; + } + + case '"': + { + s.append("\\\""); + break; + } + + case '\b': + { + s.append("\\b"); + break; + } + + case '\f': + { + s.append("\\f"); + break; + } + + case '\n': + { + s.append("\\n"); + break; + } + + case '\r': + { + s.append("\\r"); + break; + } + + case '\t': + { + s.append("\\t"); + break; + } + + default: + { + unsigned char i = static_cast<unsigned char>(b); + if(!(i >= 32 && i <= 126)) + { + s.push_back('\\'); + string octal = toOctalString(i); + // + // Add leading zeroes so that we avoid problems during + // decoding. For example, consider the escaped string + // \0013 (i.e., a character with value 1 followed by the + // character '3'). If the leading zeroes were omitted, the + // result would be incorrectly interpreted as a single + // character with value 11. + // + for(string::size_type j = octal.size(); j < 3; j++) + { + s.push_back('0'); + } + s.append(octal); + } + else if(special.find(b) != string::npos) + { + s.push_back('\\'); + s.push_back(b); + } + else + { + s.push_back(b); + } + break; + } } - case '\f': +} + +// +// Add escape sequences (such as "\n", or "\007") to make a string +// readable in ASCII. Any characters that appear in special are +// prefixed with a backslash in the returned string. +// +string +IceUtil::escapeString(const string& s, const string& special) +{ + string::size_type i; + for(i = 0; i < special.size(); ++i) { - s.append("\\f"); - break; + if(static_cast<unsigned char>(special[i]) < 32 || static_cast<unsigned char>(special[i]) > 126) + { + throw IllegalArgumentException(__FILE__, __LINE__, "special characters must be in ASCII range 32-126"); + } } - case '\n': + + string result; + for(i = 0; i < s.size(); ++i) { - s.append("\\n"); - break; + encodeChar(s[i], result, special); } - case '\r': + + return result; +} + +static char +checkChar(char c) +{ + if(!(static_cast<unsigned char>(c) >= 32 && static_cast<unsigned char>(c) <= 126)) { - s.append("\\r"); - break; + throw IllegalArgumentException(__FILE__, __LINE__, "illegal input character"); } - case '\t': + return c; +} + +// +// Decode the character or escape sequence starting at start and return it. +// end marks the one-past-the-end position of the substring to be scanned. +// nextStart is set to the index of the first character following the decoded +// character or escape sequence. +// +static char +decodeChar(const string& s, string::size_type start, string::size_type end, string::size_type& nextStart) +{ + assert(start >= 0); + assert(start < end); + assert(end <= s.size()); + + char c; + + if(s[start] != '\\') { - s.append("\\t"); - break; + c = checkChar(s[start++]); } - default: + else { - // - // Octal encode anything that is outside 32 to 126. - // - unsigned char i = static_cast<unsigned char>(b); - if(i < 32 || i > 126) - { - s.push_back('\\'); - string octal = toOctalString(i); - - // - // Add leading zeroes so that we avoid problems during - // decoding. For example, consider the escaped string - // \0013 (i.e., a character with value 1 followed by the - // character '3'). If the leading zeroes were omitted, the - // result would be incorrectly interpreted as a single - // character with value 11. - // - for(string::size_type j = octal.size(); j < 3; j++) - { - s.push_back('0'); - } - s.append(octal); - } - else if(special.find(b) != string::npos) - { - s.push_back('\\'); - escapeChar(b, s, ""); - } - else - { - s.push_back(b); - } - } + if(start + 1 == end) + { + throw IllegalArgumentException(__FILE__, __LINE__, "trailing backslash in argument"); + } + switch(s[++start]) + { + case '\\': + case '\'': + case '"': + { + c = s[start++]; + break; + } + case 'b': + { + ++start; + c = '\b'; + break; + } + case 'f': + { + ++start; + c = '\f'; + break; + } + case 'n': + { + ++start; + c = '\n'; + break; + } + case 'r': + { + ++start; + c = '\r'; + break; + } + case 't': + { + ++start; + c = '\t'; + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + int oct = 0; + for(int j = 0; j < 3 && start < end; ++j) + { + int charVal = s[start++] - '0'; + if(charVal < 0 || charVal > 7) + { + --start; + break; + } + oct = oct * 8 + charVal; + } + if(oct > 255) + { + throw IllegalArgumentException(__FILE__, __LINE__, "octal value out of range"); + } + c = (char)oct; + break; + } + default: + { + c = checkChar(s[start++]); + break; + } + } } + nextStart = start; + return c; } -string -IceUtil::escapeString(const string& s, const string& special) +// +// Remove escape sequences from s and append the result to sb. +// Return true if successful, false otherwise. +// +static void decodeString(const string& s, string::size_type start, string::size_type end, string& sb) { - string result; - result.reserve(s.size()); - - for(string::size_type i = 0; i < s.size(); i++) + while(start < end) { - escapeChar(s[i], result, special); + sb.push_back(decodeChar(s, start, end, start)); } - - return result; } +// +// Remove escape sequences added by escapeString. +// bool IceUtil::unescapeString(const string& s, string::size_type start, string::size_type end, string& result) { - assert(end <= s.size()); - assert(start <= end); + if(start < 0) + { + throw IllegalArgumentException(__FILE__, __LINE__, "start offset must be >= 0"); + } + if(end > s.size()) + { + throw IllegalArgumentException(__FILE__, __LINE__, "end offset must be <= s.size()"); + } + if(start > end) + { + throw IllegalArgumentException(__FILE__, __LINE__, "start offset must <= end offset"); + } result.reserve(end - start); - while(start < end) + try { - char ch = s[start]; - if(ch == '\\') - { - start++; - if(start == end) - { - return false; // Missing character. - } - ch = s[start]; - switch(ch) - { - case '\\': - { - result.push_back('\\'); - break; - } - case '\'': - case '"': - { - result.push_back(ch); - break; - } - case 'b': - { - result.push_back('\b'); - break; - } - case 'f': - { - result.push_back('\f'); - break; - } - case 'n': - { - result.push_back('\n'); - break; - } - case 'r': - { - result.push_back('\r'); - break; - } - case 't': - { - result.push_back('\t'); - break; - } - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - int count = 0; - int val = 0; - while(count < 3 && start < end && s[start] >= '0' && s[start] <= '9') - { - val <<= 3; - val |= s[start] - '0'; - start++; - count++; - } - if(val > 255) - { - return false; // Octal value out of range. - } - result.push_back((string::value_type)val); - continue; // Don't increment start. - } - default: - { - unsigned char i = static_cast<unsigned char>(ch); - if(i < 32 || i > 126) - { - return false; // Malformed encoding. - } - else - { - result.push_back(ch); - } - } - } - } - else - { - result.push_back(ch); - } - start++; + result.clear(); + decodeString(s, start, end, result); + return true; + } + catch(...) + { + return false; } - - return true; } // @@ -246,17 +320,17 @@ IceUtil::checkQuote(const string& s, string::size_type start) string::value_type quoteChar = s[start]; if(quoteChar == '"' || quoteChar == '\'') { - start++; - string::size_type pos; - while(start < s.size() && (pos = s.find(quoteChar, start)) != string::npos) - { - if(s[pos - 1] != '\\') - { - return pos; - } - start = pos + 1; - } - return string::npos; // Unmatched quote. + start++; + string::size_type pos; + while(start < s.size() && (pos = s.find(quoteChar, start)) != string::npos) + { + if(s[pos - 1] != '\\') + { + return pos; + } + start = pos + 1; + } + return string::npos; // Unmatched quote. } return 0; // Not quoted. } |