summaryrefslogtreecommitdiff
path: root/cpp/src
diff options
context:
space:
mode:
authorJose <jose@zeroc.com>2016-03-08 22:09:37 +0100
committerJose <jose@zeroc.com>2016-03-08 22:09:37 +0100
commitcfa587bc0be11012b9ab4e8fd333e96080eb71e2 (patch)
tree31ecc86af1a9a84d09eb3c62cd302830d02a74ce /cpp/src
parentminor fixes to icegriddb/icestormdb (diff)
parentmore icegriddb/icestormdb fixes (diff)
downloadice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.tar.bz2
ice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.tar.xz
ice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.zip
Merge remote-tracking branch 'origin/3.6'
Diffstat (limited to 'cpp/src')
-rw-r--r--cpp/src/IceGrid/.gitignore2
-rw-r--r--cpp/src/IceStorm/.gitignore2
-rw-r--r--cpp/src/IceUtil/Unicode.cpp38
-rw-r--r--cpp/src/IceUtil/Unicode.h9
-rw-r--r--cpp/src/Slice/PythonUtil.cpp177
-rw-r--r--cpp/src/Slice/RubyUtil.cpp176
-rw-r--r--cpp/src/Slice/Scanner.cpp231
-rw-r--r--cpp/src/Slice/Scanner.l181
-rw-r--r--cpp/src/slice2cpp/Gen.cpp88
-rw-r--r--cpp/src/slice2cpp/Makefile2
-rw-r--r--cpp/src/slice2cs/Gen.cpp153
-rw-r--r--cpp/src/slice2cs/Makefile2
-rw-r--r--cpp/src/slice2java/Gen.cpp192
-rw-r--r--cpp/src/slice2java/Makefile2
-rw-r--r--cpp/src/slice2js/Gen.cpp141
-rw-r--r--cpp/src/slice2js/Makefile2
-rw-r--r--cpp/src/slice2php/Main.cpp93
-rw-r--r--cpp/src/slice2php/Makefile2
18 files changed, 1229 insertions, 264 deletions
diff --git a/cpp/src/IceGrid/.gitignore b/cpp/src/IceGrid/.gitignore
index dd791e621d9..41d881d4e8a 100644
--- a/cpp/src/IceGrid/.gitignore
+++ b/cpp/src/IceGrid/.gitignore
@@ -1,6 +1,8 @@
// Generated by makegitignore.py
// IMPORTANT: Do not edit this file -- any edits made here will be lost!
+DBTypes.cpp
+DBTypes.h
IceLocatorDiscovery.cpp
IceLocatorDiscovery.h
Internal.cpp
diff --git a/cpp/src/IceStorm/.gitignore b/cpp/src/IceStorm/.gitignore
index 6ede462d77f..4fad7b4af9d 100644
--- a/cpp/src/IceStorm/.gitignore
+++ b/cpp/src/IceStorm/.gitignore
@@ -1,6 +1,8 @@
// Generated by makegitignore.py
// IMPORTANT: Do not edit this file -- any edits made here will be lost!
+DBTypes.h
+DBTypes.cpp
Instrumentation.cpp
Election.cpp
IceStormInternal.cpp
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp
index cae3476e277..7bad1d67c17 100644
--- a/cpp/src/IceUtil/Unicode.cpp
+++ b/cpp/src/IceUtil/Unicode.cpp
@@ -128,4 +128,42 @@ IceUtilInternal::convertUTF8ToUTFWstring(const Byte*& sourceStart, const Byte* s
return result;
}
+ConversionResult
+IceUtilInternal::convertUTF8ToUTF16(const vector<unsigned char>& source, vector<unsigned short>& target, ConversionFlags flags)
+{
+ target.resize(source.size());
+ const unsigned char* sourceStart = &source[0];
+ const unsigned char* sourceEnd = &source[0] + source.size();
+
+ unsigned short* targetStart = &target[0];
+ unsigned short* targetEnd = &target[0] + target.size();
+ ConversionResult result = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+
+ if(result == conversionOK)
+ {
+ target.resize(targetStart - &target[0]);
+ }
+ return result;
+}
+
+ConversionResult
+IceUtilInternal::convertUTF32ToUTF8(const vector<unsigned int>& source, vector<unsigned char>& target, ConversionFlags flags)
+{
+ target.resize(source.size() * 4);
+
+ const unsigned int* sourceStart = &source[0];
+ const unsigned int* sourceEnd = &source[0] + source.size();
+
+ unsigned char* targetStart = &target[0];
+ unsigned char* targetEnd = &target[0] + target.size();
+ ConversionResult result = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+
+ if(result == conversionOK)
+ {
+ target.resize(targetStart - &target[0]);
+ }
+ return result;
+}
+
+
diff --git a/cpp/src/IceUtil/Unicode.h b/cpp/src/IceUtil/Unicode.h
index 00333ce8a44..2c96d6c6448 100644
--- a/cpp/src/IceUtil/Unicode.h
+++ b/cpp/src/IceUtil/Unicode.h
@@ -44,6 +44,15 @@ ConversionResult
convertUTF8ToUTFWstring(const IceUtil::Byte*& sourceStart, const IceUtil::Byte* sourceEnd,
std::wstring& target, IceUtil::ConversionFlags flags);
+
+ICE_UTIL_API ConversionResult
+convertUTF8ToUTF16(const std::vector<unsigned char>&, std::vector<unsigned short>&,
+ IceUtil::ConversionFlags);
+
+ICE_UTIL_API ConversionResult
+convertUTF32ToUTF8(const std::vector<unsigned int>&, std::vector<unsigned char>&,
+ IceUtil::ConversionFlags);
+
}
#endif
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp
index a3abf0de03a..23b95898317 100644
--- a/cpp/src/Slice/PythonUtil.cpp
+++ b/cpp/src/Slice/PythonUtil.cpp
@@ -13,6 +13,7 @@
#include <IceUtil/IceUtil.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <climits>
#include <iterator>
@@ -1881,68 +1882,138 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
- _out << "\\\\";
- break;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\b':
- {
- _out << "\\b";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(*c) == charSet.end())
+ case '"':
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ _out << "\\\"";
+ break;
}
- else
+ case '\\':
{
- _out << *c; // Print normally if in basic source character set.
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(static_cast<unsigned int>(v));
+
+ vector<unsigned char> u8buffer;
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
+ case '\r':
+ {
+ _out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ _out << "\\n";
+ break;
+ }
+ case '\t':
+ {
+ _out << "\\t";
+ break;
+ }
+ case '\b':
+ {
+ _out << "\\b";
+ break;
+ }
+ case '\f':
+ {
+ _out << "\\f";
+ break;
+ }
+ default:
+ {
+ if(charSet.find(c) == charSet.end())
+ {
+ unsigned char uc = c; // Char may be signed, so make it positive.
+ stringstream s;
+ s << "\\"; // Print as octal if not in basic source character set.
+ s.flags(ios_base::oct);
+ s.width(3);
+ s.fill('0');
+ s << static_cast<unsigned>(uc);
+ _out << s.str();
+ }
+ else
+ {
+ _out << c; // Print normally if in basic source character set.
+ }
+ break;
}
- break;
- }
}
+ ++i;
}
- _out << "\""; // Closing "
+ _out << "\""; // Closing "
break;
}
case Slice::Builtin::KindValue:
diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp
index 0c9cfd49bd5..2c066a8efbe 100644
--- a/cpp/src/Slice/RubyUtil.cpp
+++ b/cpp/src/Slice/RubyUtil.cpp
@@ -12,6 +12,7 @@
#include <Slice/Util.h>
#include <IceUtil/Functional.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <iterator>
using namespace std;
@@ -1472,68 +1473,137 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
- _out << "\\\\";
- break;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\b':
- {
- _out << "\\b";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(*c) == charSet.end())
+ case '"':
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ _out << "\\\"";
+ break;
}
- else
+ case '\\':
{
- _out << *c; // Print normally if in basic source character set.
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(static_cast<unsigned int>(v));
+
+ vector<unsigned char> u8buffer;
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
+ case '\r':
+ {
+ _out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ _out << "\\n";
+ break;
+ }
+ case '\t':
+ {
+ _out << "\\t";
+ break;
+ }
+ case '\b':
+ {
+ _out << "\\b";
+ break;
+ }
+ case '\f':
+ {
+ _out << "\\f";
+ break;
+ }
+ default:
+ {
+ if(charSet.find(c) == charSet.end())
+ {
+ unsigned char uc = c; // Char may be signed, so make it positive.
+ stringstream s;
+ s << "\\"; // Print as octal if not in basic source character set.
+ s.flags(ios_base::oct);
+ s.width(3);
+ s.fill('0');
+ s << static_cast<unsigned>(uc);
+ _out << s.str();
+ }
+ else
+ {
+ _out << c; // Print normally if in basic source character set.
+ }
+ break;
}
- break;
- }
}
+ ++i;
}
- _out << "\""; // Closing "
+ _out << "\".force_encoding(\"utf-8\")"; // Closing "
break;
}
diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp
index a49c6c0433a..27d362e04c9 100644
--- a/cpp/src/Slice/Scanner.cpp
+++ b/cpp/src/Slice/Scanner.cpp
@@ -568,6 +568,8 @@ char *slice_text;
#include <Slice/Grammar.h>
#include <IceUtil/InputUtil.h>
+#include <iomanip>
+
#include <stdlib.h>
#include <math.h>
@@ -630,7 +632,7 @@ int checkKeyword(string&);
-#line 633 "lex.yy.c"
+#line 635 "lex.yy.c"
#define INITIAL 0
#define BOMSCAN 1
@@ -849,10 +851,10 @@ YY_DECL
}
{
-#line 92 "Scanner.l"
+#line 94 "Scanner.l"
-#line 855 "lex.yy.c"
+#line 857 "lex.yy.c"
while ( 1 ) /* loops until end-of-file is reached */
{
@@ -911,7 +913,7 @@ case 1:
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 94 "Scanner.l"
+#line 96 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -926,7 +928,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1);
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 101 "Scanner.l"
+#line 103 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -939,7 +941,7 @@ case 3:
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 108 "Scanner.l"
+#line 110 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -954,7 +956,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1);
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 115 "Scanner.l"
+#line 117 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -964,7 +966,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 122 "Scanner.l"
+#line 124 "Scanner.l"
{
// C++-style comment
BEGIN(MAINSCAN);
@@ -982,7 +984,7 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
-#line 137 "Scanner.l"
+#line 139 "Scanner.l"
{
// C-style comment
BEGIN(MAINSCAN);
@@ -1026,7 +1028,7 @@ YY_RULE_SETUP
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 178 "Scanner.l"
+#line 180 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_SCOPE_DELIMITER;
@@ -1034,7 +1036,7 @@ YY_RULE_SETUP
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 183 "Scanner.l"
+#line 185 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_METADATA_OPEN;
@@ -1042,7 +1044,7 @@ YY_RULE_SETUP
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 188 "Scanner.l"
+#line 190 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_METADATA_CLOSE;
@@ -1050,7 +1052,7 @@ YY_RULE_SETUP
YY_BREAK
case 10:
YY_RULE_SETUP
-#line 193 "Scanner.l"
+#line 195 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_OPEN;
@@ -1058,7 +1060,7 @@ YY_RULE_SETUP
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 198 "Scanner.l"
+#line 200 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_CLOSE;
@@ -1067,7 +1069,7 @@ YY_RULE_SETUP
case 12:
/* rule 12 can match eol */
YY_RULE_SETUP
-#line 203 "Scanner.l"
+#line 205 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr ident = new StringTok;
@@ -1095,7 +1097,7 @@ YY_RULE_SETUP
YY_BREAK
case 13:
YY_RULE_SETUP
-#line 228 "Scanner.l"
+#line 230 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr ident = new StringTok;
@@ -1106,7 +1108,7 @@ YY_RULE_SETUP
YY_BREAK
case 14:
YY_RULE_SETUP
-#line 236 "Scanner.l"
+#line 238 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr str = new StringTok;
@@ -1135,32 +1137,42 @@ YY_RULE_SETUP
switch(next)
{
case '\\':
+ {
+ str->v += '\\';
+ str->v += '\\';
+ break;
+ }
case '"':
case '\'':
{
str->v += next;
break;
}
+
case 'n':
{
str->v += '\n';
break;
}
+
case 'r':
{
str->v += '\r';
break;
}
+
case 't':
{
str->v += '\t';
break;
}
+
case 'v':
{
str->v += '\v';
break;
}
+
case 'f':
{
str->v += '\f';
@@ -1185,68 +1197,181 @@ YY_RULE_SETUP
break;
}
+ //
+ // Octal value \nnn limited to three octal digits but terminate at the first character
+ // that is not a valid octal digit if encountered sooner.
+ //
case '0':
case '1':
case '2':
case '3':
+ case '4':
+ case '5':
+ case '7':
{
static string octalDigits = "01234567";
- unsigned short us = next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
+ IceUtil::Int64 value = 0;
+ string escape;
+ escape += next;
+ for(int i = 0; i < 2; ++i)
{
- str->literal += next;
- us = us * 8 + next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
- us = us * 8 + next - '0';
- }
- else
+ next = static_cast<char>(yyinput());
+ if(octalDigits.find_first_of(next) == string::npos)
{
unput(next);
+ break;
}
+ escape += next;
}
- else
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+
+ if(value == 0)
{
- unput(next);
+ unit->error("illegal NUL character in string constant");
}
- if(us == 0)
+ else if(value > 255)
{
- unit->error("illegal NUL character in string constant");
+ ostringstream os;
+ os << "octal escape sequence out of range: '\\" << oct << value << "'";
+ unit->warning(os.str());
}
- str->v += static_cast<char>(us);
+ str->v += static_cast<char>(value);
break;
}
case 'x':
{
- IceUtil::Int64 ull = 0;
+ IceUtil::Int64 value = 0;
+ string escape = "";
while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
{
+ escape += next;
+ }
+ unput(next);
+
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+
+ //
+ // Universal character name \unnnn code point U+nnnn
+ //
+ case 'u':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 4; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- ull *= 16;
- if(isdigit(static_cast<unsigned char>(next)))
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - '0';
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
- else if(islower(static_cast<unsigned char>(next)))
- {
- ull += next - 'a' + 10;
- }
- else
+ escape += next;
+ }
+
+ value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'u';
+ os.fill('0');
+ os.width(4);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+
+ break;
+ }
+
+ case 'U':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 8; ++i)
+ {
+ next = static_cast<char>(yyinput());
+ str->literal += next;
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - 'A' + 10;
+
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
+ escape += next;
}
- unput(next);
- if(ull == 0)
+
+ value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'U';
+ os.fill('0');
+ os.width(8);
+ os << hex << value;
+
+ if(value == 0)
{
unit->error("illegal NUL character in string constant");
}
- str->v += static_cast<char>(ull);
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
break;
}
- // TODO: add universal character names
+
default:
{
+ ostringstream os;
+ os << "unknown escape sequence '\\" << next << "'";
+ unit->warning(os.str());
+ //
+ // We escape the backslack in a unknown escape sequence
+ // to keep compativility with 3.6"
+ //
+ str->v += '\\';
str->v += c;
unput(next);
}
@@ -1263,7 +1388,7 @@ YY_RULE_SETUP
YY_BREAK
case 15:
YY_RULE_SETUP
-#line 390 "Scanner.l"
+#line 515 "Scanner.l"
{
BEGIN(MAINSCAN);
IntegerTokPtr itp = new IntegerTok;
@@ -1282,7 +1407,7 @@ YY_RULE_SETUP
YY_BREAK
case 16:
YY_RULE_SETUP
-#line 406 "Scanner.l"
+#line 531 "Scanner.l"
{
BEGIN(MAINSCAN);
errno = 0;
@@ -1316,7 +1441,7 @@ YY_RULE_SETUP
case 17:
/* rule 17 can match eol */
YY_RULE_SETUP
-#line 436 "Scanner.l"
+#line 561 "Scanner.l"
{
// Ignore white-space
@@ -1332,7 +1457,7 @@ YY_RULE_SETUP
YY_BREAK
case 18:
YY_RULE_SETUP
-#line 449 "Scanner.l"
+#line 574 "Scanner.l"
{
// Ignore UTF-8 BOM, rule only active when parsing start of file.
@@ -1341,7 +1466,7 @@ YY_RULE_SETUP
YY_BREAK
case 19:
YY_RULE_SETUP
-#line 455 "Scanner.l"
+#line 580 "Scanner.l"
{
BEGIN(MAINSCAN);
if(slice_text[0] < 32 || slice_text[0] > 126)
@@ -1360,10 +1485,10 @@ YY_RULE_SETUP
YY_BREAK
case 20:
YY_RULE_SETUP
-#line 471 "Scanner.l"
+#line 596 "Scanner.l"
ECHO;
YY_BREAK
-#line 1366 "lex.yy.c"
+#line 1491 "lex.yy.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(BOMSCAN):
case YY_STATE_EOF(MAINSCAN):
@@ -2364,7 +2489,7 @@ void slice_free (void * ptr )
#define YYTABLES_NAME "yytables"
-#line 470 "Scanner.l"
+#line 595 "Scanner.l"
diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l
index 190c00bcf5c..a9c381b7260 100644
--- a/cpp/src/Slice/Scanner.l
+++ b/cpp/src/Slice/Scanner.l
@@ -13,6 +13,8 @@
#include <Slice/Grammar.h>
#include <IceUtil/InputUtil.h>
+#include <iomanip>
+
#include <stdlib.h>
#include <math.h>
@@ -261,32 +263,42 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
switch(next)
{
case '\\':
+ {
+ str->v += '\\';
+ str->v += '\\';
+ break;
+ }
case '"':
case '\'':
{
str->v += next;
break;
}
+
case 'n':
{
str->v += '\n';
break;
}
+
case 'r':
{
str->v += '\r';
break;
}
+
case 't':
{
str->v += '\t';
break;
}
+
case 'v':
{
str->v += '\v';
break;
}
+
case 'f':
{
str->v += '\f';
@@ -311,68 +323,181 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
break;
}
+ //
+ // Octal value \nnn limited to three octal digits but terminate at the first character
+ // that is not a valid octal digit if encountered sooner.
+ //
case '0':
case '1':
case '2':
case '3':
+ case '4':
+ case '5':
+ case '7':
{
static string octalDigits = "01234567";
- unsigned short us = next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
+ IceUtil::Int64 value = 0;
+ string escape;
+ escape += next;
+ for(int i = 0; i < 2; ++i)
{
- str->literal += next;
- us = us * 8 + next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
- us = us * 8 + next - '0';
- }
- else
+ next = static_cast<char>(yyinput());
+ if(octalDigits.find_first_of(next) == string::npos)
{
unput(next);
+ break;
}
+ escape += next;
}
- else
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+
+ if(value == 0)
{
- unput(next);
+ unit->error("illegal NUL character in string constant");
}
- if(us == 0)
+ else if(value > 255)
{
- unit->error("illegal NUL character in string constant");
+ ostringstream os;
+ os << "octal escape sequence out of range: '\\" << oct << value << "'";
+ unit->warning(os.str());
}
- str->v += static_cast<char>(us);
+ str->v += static_cast<char>(value);
break;
}
case 'x':
{
- IceUtil::Int64 ull = 0;
+ IceUtil::Int64 value = 0;
+ string escape = "";
while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
{
+ escape += next;
+ }
+ unput(next);
+
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+
+ //
+ // Universal character name \unnnn code point U+nnnn
+ //
+ case 'u':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 4; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- ull *= 16;
- if(isdigit(static_cast<unsigned char>(next)))
- {
- ull += next - '0';
- }
- else if(islower(static_cast<unsigned char>(next)))
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - 'a' + 10;
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
- else
+ escape += next;
+ }
+
+ value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'u';
+ os.fill('0');
+ os.width(4);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+
+ break;
+ }
+
+ case 'U':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 8; ++i)
+ {
+ next = static_cast<char>(yyinput());
+ str->literal += next;
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - 'A' + 10;
+
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
+ escape += next;
}
- unput(next);
- if(ull == 0)
+
+ value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'U';
+ os.fill('0');
+ os.width(8);
+ os << hex << value;
+
+ if(value == 0)
{
unit->error("illegal NUL character in string constant");
}
- str->v += static_cast<char>(ull);
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
break;
}
- // TODO: add universal character names
+
default:
{
+ ostringstream os;
+ os << "unknown escape sequence '\\" << next << "'";
+ unit->warning(os.str());
+ //
+ // We escape the backslack in a unknown escape sequence
+ // to keep compativility with 3.6"
+ //
+ str->v += '\\';
str->v += c;
unput(next);
}
diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp
index 98dc43e28aa..c90dc87088b 100644
--- a/cpp/src/slice2cpp/Gen.cpp
+++ b/cpp/src/slice2cpp/Gen.cpp
@@ -13,6 +13,8 @@
#include <Slice/CPlusPlusUtil.h>
#include <IceUtil/Functional.h>
#include <IceUtil/Iterator.h>
+#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -126,11 +128,11 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
}
out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- unsigned char uc = *c; // char may be signed, so make it positive
+ unsigned char uc = value[i]; // char may be signed, so make it positive
ostringstream s;
s << "\\"; // Print as octal if not in basic source character set
s.width(3);
@@ -141,17 +143,93 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
}
else
{
- switch(*c)
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(static_cast<unsigned int>(v));
+
+ vector<unsigned char> u8buffer;
+
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
out << "\\";
break;
}
}
- out << *c; // Print normally if in basic source character set
+
+ out << value[i]; // Print normally if in basic source character set
}
+ ++i;
}
out << "\""; // Closing "
diff --git a/cpp/src/slice2cpp/Makefile b/cpp/src/slice2cpp/Makefile
index 790ad0aecc3..3af754207f8 100644
--- a/cpp/src/slice2cpp/Makefile
+++ b/cpp/src/slice2cpp/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2cs/Gen.cpp b/cpp/src/slice2cs/Gen.cpp
index 50076dc087c..85f10179740 100644
--- a/cpp/src/slice2cs/Gen.cpp
+++ b/cpp/src/slice2cs/Gen.cpp
@@ -10,7 +10,9 @@
#include <IceUtil/DisableWarnings.h>
#include <IceUtil/Functional.h>
#include <IceUtil/StringUtil.h>
-#include "Gen.h"
+#include <IceUtil/InputUtil.h>
+#include <Gen.h>
+
#include <limits>
#include <sys/stat.h>
#ifndef _WIN32
@@ -20,6 +22,7 @@
#endif
#include <IceUtil/Iterator.h>
#include <IceUtil/UUID.h>
+#include <IceUtil/Unicode.h>
#include <Slice/Checksum.h>
#include <Slice/DotNetNames.h>
#include <Slice/FileTracker.h>
@@ -35,6 +38,45 @@ namespace
{
string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
+string
sliceModeToIceMode(Operation::Mode opMode)
{
string mode;
@@ -1544,39 +1586,118 @@ Slice::CsVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePt
// here because they are sensitive to the current locale.
//
static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
+
static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- unsigned char uc = *c; // char may be signed, so make it positive
- ostringstream s;
- s << "\\u"; // Print as unicode if not in basic source character set
- s << hex;
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+ {
+ //
+ // Print as unicode if not in basic source character set
+ //
+ _out << u16CodePoint(static_cast<unsigned int>(value[i]));
+ }
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && value[j] == 'U')
+ {
+ _out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, 8);
+ assert(codepoint.size() == 8);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ if(v > 0xFFFF)
+ {
+ unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
+ _out << u16CodePoint(high);
+ _out << u16CodePoint(low);
+ }
+ else
+ {
+ _out << "\\U" << codepoint;
+ }
+
+ i = j + 1 + 8;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
_out << "\\";
break;
}
}
- _out << *c; // Print normally if in basic source character set
+ _out << value[i]; // Print normally if in basic source character set
}
+ i++;
+ }
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
}
_out << "\""; // Closing "
diff --git a/cpp/src/slice2cs/Makefile b/cpp/src/slice2cs/Makefile
index e51e24c0445..e46c1005dd6 100644
--- a/cpp/src/slice2cs/Makefile
+++ b/cpp/src/slice2cs/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2java/Gen.cpp b/cpp/src/slice2java/Gen.cpp
index 4c5cf6f2477..038568cf64c 100644
--- a/cpp/src/slice2java/Gen.cpp
+++ b/cpp/src/slice2java/Gen.cpp
@@ -14,6 +14,7 @@
#include <IceUtil/Iterator.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <cstring>
#include <limits>
@@ -23,6 +24,44 @@ using namespace Slice;
using namespace IceUtil;
using namespace IceUtilInternal;
+string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
static string
sliceModeToIceMode(Operation::Mode opMode)
{
@@ -1729,53 +1768,152 @@ Slice::JavaVisitor::writeConstantValue(Output& out, const TypePtr& type, const S
static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
out << "\"";
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- switch(*c)
+ char c = value[i];
+ if(static_cast<unsigned char>(c) < 128) // Single byte character
{
//
- // Java doesn't want '\n' or '\r\n' encoded as universal
- // characters, that gives an error "unclosed string literal"
+ // Print as unicode if not in basic source character set
//
- case '\r':
+ switch(c)
{
- out << "\\r";
- break;
- }
- case '\n':
- {
- out << "\\n";
- break;
- }
- default:
- {
- unsigned char uc = *c;
- ostringstream s;
- s << "\\u";
- s.flags(ios_base::hex);
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- out << s.str();
- break;
+ //
+ // Java doesn't want '\n' or '\r\n' encoded as universal
+ // characters, that gives an error "unclosed string literal"
+ //
+ case '\r':
+ {
+ out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ out << "\\n";
+ break;
+ }
+ default:
+ {
+ out << u16CodePoint(c);
+ break;
+ }
}
}
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Java doesn't like this special characters encoded as universal characters
+ //
+ if(v == 0x5c)
+ {
+ out << "\\\\";
+ }
+ else if(v == 0xa)
+ {
+ out << "\\n";
+ }
+ else if(v == 0xd)
+ {
+ out << "\\r";
+ }
+ else if(v == 0x22)
+ {
+ out << "\\\"";
+ }
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ else if(v > 0xFFFF)
+ {
+ unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
+ out << u16CodePoint(high);
+ out << u16CodePoint(low);
+ }
+ else
+ {
+ out << u16CodePoint(static_cast<unsigned int>(v));
+ }
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
out << "\\";
break;
}
}
- out << *c;
+ out << value[i]; // Print normally if in basic source character set
}
+ i++;
+ }
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
}
out << "\"";
diff --git a/cpp/src/slice2java/Makefile b/cpp/src/slice2java/Makefile
index 010554d8e12..e2a882bfb6f 100644
--- a/cpp/src/slice2java/Makefile
+++ b/cpp/src/slice2java/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2js/Gen.cpp b/cpp/src/slice2js/Gen.cpp
index 11bd0f608e5..573d0180c67 100644
--- a/cpp/src/slice2js/Gen.cpp
+++ b/cpp/src/slice2js/Gen.cpp
@@ -20,6 +20,7 @@
#include <direct.h>
#endif
#include <IceUtil/Iterator.h>
+#include <IceUtil/Unicode.h>
#include <IceUtil/UUID.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -35,6 +36,44 @@ namespace
{
string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
+string
sliceModeToIceMode(Operation::Mode opMode)
{
switch(opMode)
@@ -479,32 +518,110 @@ Slice::JsVisitor::writeConstantValue(const string& scope, const TypePtr& type, c
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- unsigned char uc = *c; // char may be signed, so make it positive
- ostringstream s;
- s << "\\u"; // Print as unicode if not in basic source character set
- s << hex;
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+ {
+ //
+ // Print as unicode if not in basic source character set
+ //
+ _out << u16CodePoint(static_cast<unsigned int>(value[i]));
+ }
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && value[j] == 'U')
+ {
+ _out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, 8);
+ assert(codepoint.size() == 8);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ if(v > 0xFFFF)
+ {
+ unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
+ _out << u16CodePoint(high);
+ _out << u16CodePoint(low);
+ }
+ else
+ {
+ _out << u16CodePoint(static_cast<unsigned int>(v));
+ }
+
+ i = j + 1 + 8;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
_out << "\\";
break;
}
}
- _out << *c; // Print normally if in basic source character set
+ _out << value[i]; // Print normally if in basic source character set
}
+ i++;
+ }
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
}
_out << "\""; // Closing "
diff --git a/cpp/src/slice2js/Makefile b/cpp/src/slice2js/Makefile
index bd1bbe967f8..0aaf14a7a0d 100644
--- a/cpp/src/slice2js/Makefile
+++ b/cpp/src/slice2js/Makefile
@@ -21,7 +21,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2php/Main.cpp b/cpp/src/slice2php/Main.cpp
index 2838863454a..35117e68f36 100644
--- a/cpp/src/slice2php/Main.cpp
+++ b/cpp/src/slice2php/Main.cpp
@@ -16,6 +16,7 @@
#include <IceUtil/StringUtil.h>
#include <IceUtil/Mutex.h>
#include <IceUtil/MutexPtrLock.h>
+#include <IceUtil/Unicode.h>
#include <Slice/Checksum.h>
#include <Slice/Preprocessor.h>
#include <Slice/FileTracker.h>
@@ -1273,9 +1274,10 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
case '$':
{
@@ -1289,8 +1291,79 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
case '\\':
{
- _out << "\\\\";
- break;
+
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(static_cast<unsigned int>(v));
+
+ vector<unsigned char> u8buffer;
+
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
}
case '\r':
{
@@ -1307,11 +1380,6 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
_out << "\\t";
break;
}
- case '\b':
- {
- _out << "\\b";
- break;
- }
case '\f':
{
_out << "\\f";
@@ -1319,9 +1387,9 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
default:
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(c) == charSet.end())
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
+ unsigned char uc = c; // Char may be signed, so make it positive.
stringstream s;
s << "\\"; // Print as octal if not in basic source character set.
s.flags(ios_base::oct);
@@ -1332,11 +1400,12 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
else
{
- _out << *c; // Print normally if in basic source character set.
+ _out << c; // Print normally if in basic source character set.
}
break;
}
}
+ ++i;
}
_out << "\""; // Closing "
diff --git a/cpp/src/slice2php/Makefile b/cpp/src/slice2php/Makefile
index 8bc91a8cd66..62f82531201 100644
--- a/cpp/src/slice2php/Makefile
+++ b/cpp/src/slice2php/Makefile
@@ -19,7 +19,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@