summaryrefslogtreecommitdiff
path: root/cpp/src/slice2java/Gen.cpp
diff options
context:
space:
mode:
authorJose <jose@zeroc.com>2016-03-08 22:09:37 +0100
committerJose <jose@zeroc.com>2016-03-08 22:09:37 +0100
commitcfa587bc0be11012b9ab4e8fd333e96080eb71e2 (patch)
tree31ecc86af1a9a84d09eb3c62cd302830d02a74ce /cpp/src/slice2java/Gen.cpp
parentminor fixes to icegriddb/icestormdb (diff)
parentmore icegriddb/icestormdb fixes (diff)
downloadice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.tar.bz2
ice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.tar.xz
ice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.zip
Merge remote-tracking branch 'origin/3.6'
Diffstat (limited to 'cpp/src/slice2java/Gen.cpp')
-rw-r--r--cpp/src/slice2java/Gen.cpp192
1 files changed, 165 insertions, 27 deletions
diff --git a/cpp/src/slice2java/Gen.cpp b/cpp/src/slice2java/Gen.cpp
index 4c5cf6f2477..038568cf64c 100644
--- a/cpp/src/slice2java/Gen.cpp
+++ b/cpp/src/slice2java/Gen.cpp
@@ -14,6 +14,7 @@
#include <IceUtil/Iterator.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <cstring>
#include <limits>
@@ -23,6 +24,44 @@ using namespace Slice;
using namespace IceUtil;
using namespace IceUtilInternal;
+string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
static string
sliceModeToIceMode(Operation::Mode opMode)
{
@@ -1729,53 +1768,152 @@ Slice::JavaVisitor::writeConstantValue(Output& out, const TypePtr& type, const S
static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
out << "\"";
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- switch(*c)
+ char c = value[i];
+ if(static_cast<unsigned char>(c) < 128) // Single byte character
{
//
- // Java doesn't want '\n' or '\r\n' encoded as universal
- // characters, that gives an error "unclosed string literal"
+ // Print as unicode if not in basic source character set
//
- case '\r':
+ switch(c)
{
- out << "\\r";
- break;
- }
- case '\n':
- {
- out << "\\n";
- break;
- }
- default:
- {
- unsigned char uc = *c;
- ostringstream s;
- s << "\\u";
- s.flags(ios_base::hex);
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- out << s.str();
- break;
+ //
+ // Java doesn't want '\n' or '\r\n' encoded as universal
+ // characters, that gives an error "unclosed string literal"
+ //
+ case '\r':
+ {
+ out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ out << "\\n";
+ break;
+ }
+ default:
+ {
+ out << u16CodePoint(c);
+ break;
+ }
}
}
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Java doesn't like this special characters encoded as universal characters
+ //
+ if(v == 0x5c)
+ {
+ out << "\\\\";
+ }
+ else if(v == 0xa)
+ {
+ out << "\\n";
+ }
+ else if(v == 0xd)
+ {
+ out << "\\r";
+ }
+ else if(v == 0x22)
+ {
+ out << "\\\"";
+ }
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ else if(v > 0xFFFF)
+ {
+ unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
+ out << u16CodePoint(high);
+ out << u16CodePoint(low);
+ }
+ else
+ {
+ out << u16CodePoint(static_cast<unsigned int>(v));
+ }
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
out << "\\";
break;
}
}
- out << *c;
+ out << value[i]; // Print normally if in basic source character set
}
+ i++;
+ }
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
}
out << "\"";