summaryrefslogtreecommitdiff
path: root/cpp/src/Slice/PythonUtil.cpp
diff options
context:
space:
mode:
authorJose <jose@zeroc.com>2016-03-08 13:46:55 +0100
committerJose <jose@zeroc.com>2016-03-08 13:46:55 +0100
commit2bd402833bfdb54c1940dd0038be8af05d6f5e6f (patch)
treeeb7be3853dc45452397b730e586434f6e859efb3 /cpp/src/Slice/PythonUtil.cpp
parentWindows fixes for icegriddb/icestormdb (diff)
downloadice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.bz2
ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.xz
ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.zip
ICE-6991 - Add support for unicode escape sequences
Diffstat (limited to 'cpp/src/Slice/PythonUtil.cpp')
-rw-r--r--cpp/src/Slice/PythonUtil.cpp177
1 files changed, 124 insertions, 53 deletions
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp
index 220e521d85d..92f657e8221 100644
--- a/cpp/src/Slice/PythonUtil.cpp
+++ b/cpp/src/Slice/PythonUtil.cpp
@@ -13,6 +13,7 @@
#include <IceUtil/IceUtil.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <climits>
#include <iterator>
@@ -1879,68 +1880,138 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
- _out << "\\\\";
- break;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\b':
- {
- _out << "\\b";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(*c) == charSet.end())
+ case '"':
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ _out << "\\\"";
+ break;
}
- else
+ case '\\':
{
- _out << *c; // Print normally if in basic source character set.
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(v);
+
+ vector<unsigned char> u8buffer;
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
+ case '\r':
+ {
+ _out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ _out << "\\n";
+ break;
+ }
+ case '\t':
+ {
+ _out << "\\t";
+ break;
+ }
+ case '\b':
+ {
+ _out << "\\b";
+ break;
+ }
+ case '\f':
+ {
+ _out << "\\f";
+ break;
+ }
+ default:
+ {
+ if(charSet.find(c) == charSet.end())
+ {
+ unsigned char uc = c; // Char may be signed, so make it positive.
+ stringstream s;
+ s << "\\"; // Print as octal if not in basic source character set.
+ s.flags(ios_base::oct);
+ s.width(3);
+ s.fill('0');
+ s << static_cast<unsigned>(uc);
+ _out << s.str();
+ }
+ else
+ {
+ _out << c; // Print normally if in basic source character set.
+ }
+ break;
}
- break;
- }
}
+ ++i;
}
- _out << "\""; // Closing "
+ _out << "\""; // Closing "
break;
}
case Slice::Builtin::KindObject: