Bug 867.

author: Michi Henning <michi@zeroc.com> 2006-02-21 02:17:16 +0000
committer: Michi Henning <michi@zeroc.com> 2006-02-21 02:17:16 +0000
commit: 2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f (patch)
tree: 78fa7550ce16ede027333c9e12b2b571d5fbf154 /cpp/src/IceUtil/StringUtil.cpp
parent: Fixed Equals() so it can't throw an exception. (diff)
download: ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.bz2
ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.xz
ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.zip
1 files changed, 258 insertions, 184 deletions
diff --git a/cpp/src/IceUtil/StringUtil.cpp b/cpp/src/IceUtil/StringUtil.cpp
index 954394fcc25..89d9efdaae3 100644
--- a/cpp/src/IceUtil/StringUtil.cpp
+++ b/cpp/src/IceUtil/StringUtil.cpp
@@ -8,8 +8,10 @@
 // **********************************************************************
 
 #include <IceUtil/StringUtil.h>
+#include <IceUtil/Unicode.h>
 
 using namespace std;
+using namespace IceUtil;
 
 static string
 toOctalString(unsigned int n)
@@ -29,209 +31,281 @@ toOctalString(unsigned int n)
     return string(s, charPos, (32 - charPos));
 }
 
+//
+// Write the byte b as an escape sequence if it isn't a printable ASCII
+// character and append the escape sequence to s. Additional characters
+// that should be escaped can be passed in special. If b is any of these
+// characters, b is preceded by a backslash in s.
+//
 static void
-escapeChar(string::value_type b, string& s, const string& special)
+encodeChar(string::value_type b, string& s, const string& special)
 {
     switch(b)
     {
-    case '\\':
-    {
-        s.append("\\\\");
-        break;
-    }
-    case '\'':
-    {
-        s.append("\\'");
-        break;
-    }
-    case '"':
-    {
-        s.append("\\\"");
-        break;
-    }
-    case '\b':
-    {
-        s.append("\\b");
-        break;
+	case '\\': 
+	{
+	    s.append("\\\\");
+	    break;
+	}
+	
+	case '\'': 
+	{
+	    s.append("\\'");
+	    break;
+	}
+	
+	case '"': 
+	{
+	    s.append("\\\"");
+	    break;
+	}
+	
+	case '\b': 
+	{
+	    s.append("\\b");
+	    break;
+	}
+	
+	case '\f': 
+	{
+	    s.append("\\f");
+	    break;
+	}
+	
+	case '\n': 
+	{
+	    s.append("\\n");
+	    break;
+	}
+	
+	case '\r': 
+	{
+	    s.append("\\r");
+	    break;
+	}
+	
+	case '\t': 
+	{
+	    s.append("\\t");
+	    break;
+	}
+	
+	default: 
+	{
+	    unsigned char i = static_cast<unsigned char>(b);
+	    if(!(i >= 32 && i <= 126))
+	    {
+		s.push_back('\\');
+		string octal = toOctalString(i);
+		//
+		// Add leading zeroes so that we avoid problems during
+		// decoding. For example, consider the escaped string
+		// \0013 (i.e., a character with value 1 followed by the
+		// character '3'). If the leading zeroes were omitted, the
+		// result would be incorrectly interpreted as a single
+		// character with value 11.
+		//
+		for(string::size_type j = octal.size(); j < 3; j++)
+		{
+		    s.push_back('0');
+		}
+		s.append(octal);
+	    }
+	    else if(special.find(b) != string::npos)
+	    {
+		s.push_back('\\');
+		s.push_back(b);
+	    }
+	    else
+	    {
+		s.push_back(b);
+	    }
+	    break;
+	}
     }
-    case '\f':
+}
+
+//
+// Add escape sequences (such as "\n", or "\007") to make a string
+// readable in ASCII. Any characters that appear in special are
+// prefixed with a backslash in the returned string.
+//
+string
+IceUtil::escapeString(const string& s, const string& special)
+{
+    string::size_type i;
+    for(i = 0; i < special.size(); ++i)
     {
-        s.append("\\f");
-        break;
+	if(static_cast<unsigned char>(special[i]) < 32 || static_cast<unsigned char>(special[i]) > 126)
+	{
+	    throw IllegalArgumentException(__FILE__, __LINE__, "special characters must be in ASCII range 32-126");
+	}
     }
-    case '\n':
+    
+    string result;
+    for(i = 0; i < s.size(); ++i)
     {
-        s.append("\\n");
-        break;
+	encodeChar(s[i], result, special);
     }
-    case '\r':
+    
+    return result;
+}
+
+static char
+checkChar(char c)
+{
+    if(!(static_cast<unsigned char>(c) >= 32 && static_cast<unsigned char>(c) <= 126))
     {
-        s.append("\\r");
-        break;
+        throw IllegalArgumentException(__FILE__, __LINE__, "illegal input character");
     }
-    case '\t':
+    return c;
+}
+
+//
+// Decode the character or escape sequence starting at start and return it.
+// end marks the one-past-the-end position of the substring to be scanned.
+// nextStart is set to the index of the first character following the decoded
+// character or escape sequence.
+//
+static char
+decodeChar(const string& s, string::size_type start, string::size_type end, string::size_type& nextStart)
+{
+    assert(start >= 0);
+    assert(start < end);
+    assert(end <= s.size());
+
+    char c;
+
+    if(s[start] != '\\')
     {
-        s.append("\\t");
-        break;
+	c = checkChar(s[start++]);
     }
-    default:
+    else
     {
-	//
-	// Octal encode anything that is outside 32 to 126.
-	//
-    	unsigned char i = static_cast<unsigned char>(b);
-        if(i < 32 || i > 126)
-        {
-            s.push_back('\\');
-            string octal = toOctalString(i);
-
-            //
-            // Add leading zeroes so that we avoid problems during
-            // decoding. For example, consider the escaped string
-            // \0013 (i.e., a character with value 1 followed by the
-            // character '3'). If the leading zeroes were omitted, the
-            // result would be incorrectly interpreted as a single
-            // character with value 11.
-            //
-            for(string::size_type j = octal.size(); j < 3; j++)
-            {
-                s.push_back('0');
-            }
-            s.append(octal);
-        }
-        else if(special.find(b) != string::npos)
-        {
-            s.push_back('\\');
-            escapeChar(b, s, "");
-        }
-        else
-        {
-            s.push_back(b);
-        }
-    }
+	if(start + 1 == end)
+	{
+	    throw IllegalArgumentException(__FILE__, __LINE__, "trailing backslash in argument");
+	}
+	switch(s[++start])
+	{
+	    case '\\': 
+	    case '\'': 
+	    case '"': 
+	    {
+		c = s[start++];
+		break;
+	    }
+	    case 'b': 
+	    {
+		++start;
+		c = '\b';
+		break;
+	    }
+	    case 'f': 
+	    {
+		++start;
+		c = '\f';
+		break;
+	    }
+	    case 'n': 
+	    {
+		++start;
+		c = '\n';
+		break;
+	    }
+	    case 'r': 
+	    {
+		++start;
+		c = '\r';
+		break;
+	    }
+	    case 't': 
+	    {
+		++start;
+		c = '\t';
+		break;
+	    }
+	    case '0':
+	    case '1':
+	    case '2':
+	    case '3':
+	    case '4':
+	    case '5':
+	    case '6':
+	    case '7':
+	    {
+		int oct = 0;
+		for(int j = 0; j < 3 && start < end; ++j)
+		{
+		    int charVal = s[start++] - '0';
+		    if(charVal < 0 || charVal > 7)
+		    {
+		        --start;
+			break;
+		    }
+		    oct = oct * 8 + charVal;
+		}
+		if(oct > 255)
+		{
+		    throw IllegalArgumentException(__FILE__, __LINE__, "octal value out of range");
+		}
+		c = (char)oct;
+		break;
+	    }
+	    default:
+	    {
+		c = checkChar(s[start++]);
+		break;
+	    }
+	}
     }
+    nextStart = start;
+    return c;
 }
 
-string
-IceUtil::escapeString(const string& s, const string& special)
+//
+// Remove escape sequences from s and append the result to sb.
+// Return true if successful, false otherwise.
+//
+static void decodeString(const string& s, string::size_type start, string::size_type end, string& sb)
 {
-    string result;
-    result.reserve(s.size());
-
-    for(string::size_type i = 0; i < s.size(); i++)
+    while(start < end)
     {
-        escapeChar(s[i], result, special);
+	sb.push_back(decodeChar(s, start, end, start));
     }
-
-    return result;
 }
 
+//
+// Remove escape sequences added by escapeString.
+//
 bool
 IceUtil::unescapeString(const string& s, string::size_type start, string::size_type end, string& result)
 {
-    assert(end <= s.size());
-    assert(start <= end);
+    if(start < 0)
+    {
+        throw IllegalArgumentException(__FILE__, __LINE__, "start offset must be >= 0");
+    }
+    if(end > s.size())
+    {
+        throw IllegalArgumentException(__FILE__, __LINE__, "end offset must be <= s.size()");
+    }
+    if(start > end)
+    {
+	throw IllegalArgumentException(__FILE__, __LINE__, "start offset must <= end offset");
+    }
 
     result.reserve(end - start);
 
-    while(start < end)
+    try
     {
-        char ch = s[start];
-        if(ch == '\\')
-        {
-            start++;
-            if(start == end)
-            {
-                return false; // Missing character.
-            }
-            ch = s[start];
-            switch(ch)
-            {
-            case '\\':
-            {
-                result.push_back('\\');
-                break;
-            }
-            case '\'':
-            case '"':
-            {
-                result.push_back(ch);
-                break;
-            }
-            case 'b':
-            {
-                result.push_back('\b');
-                break;
-            }
-            case 'f':
-            {
-                result.push_back('\f');
-                break;
-            }
-            case 'n':
-            {
-                result.push_back('\n');
-                break;
-            }
-            case 'r':
-            {
-                result.push_back('\r');
-                break;
-            }
-            case 't':
-            {
-                result.push_back('\t');
-                break;
-            }
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                int count = 0;
-                int val = 0;
-                while(count < 3 && start < end && s[start] >= '0' && s[start] <= '9')
-                {
-                    val <<= 3;
-                    val |= s[start] - '0';
-                    start++;
-                    count++;
-                }
-                if(val > 255)
-                {
-                    return false; // Octal value out of range.
-                }
-                result.push_back((string::value_type)val);
-                continue; // Don't increment start.
-            }
-            default:
-            {
-		unsigned char i = static_cast<unsigned char>(ch);
-		if(i < 32 || i > 126)
-		{
-		    return false; // Malformed encoding.
-		}
-		else
-		{
-		    result.push_back(ch);
-		}
-            }
-            }
-        }
-        else
-        {
-            result.push_back(ch);
-        }
-        start++;
+	result.clear();
+	decodeString(s, start, end, result);
+	return true;
+    }
+    catch(...)
+    {
+	return false;
     }
-
-    return true;
 }
 
 //
@@ -246,17 +320,17 @@ IceUtil::checkQuote(const string& s, string::size_type start)
     string::value_type quoteChar = s[start];
     if(quoteChar == '"' || quoteChar == '\'')
     {
-        start++;
-        string::size_type pos;
-        while(start < s.size() && (pos = s.find(quoteChar, start)) != string::npos)
-        {
-            if(s[pos - 1] != '\\')
-            {
-                return pos;
-            }
-            start = pos + 1;
-        }
-        return string::npos; // Unmatched quote.
+	start++;
+	string::size_type pos;
+	while(start < s.size() && (pos = s.find(quoteChar, start)) != string::npos)
+	{
+	    if(s[pos - 1] != '\\')
+	    {
+		return pos;
+	    }
+	    start = pos + 1;
+	}
+	return string::npos; // Unmatched quote.
     }
     return 0; // Not quoted.
 }
author	Michi Henning <michi@zeroc.com>	2006-02-21 02:17:16 +0000
committer	Michi Henning <michi@zeroc.com>	2006-02-21 02:17:16 +0000
commit	2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f (patch)
tree	78fa7550ce16ede027333c9e12b2b571d5fbf154 /cpp/src/IceUtil/StringUtil.cpp
parent	Fixed Equals() so it can't throw an exception. (diff)
download	ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.bz2 ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.tar.xz ice-2bd1d1b7c21b50d1f643962bcd1c44fb94dee44f.zip