summaryrefslogtreecommitdiff
path: root/cpp
diff options
context:
space:
mode:
authorJose <jose@zeroc.com>2016-03-08 13:46:55 +0100
committerJose <jose@zeroc.com>2016-03-08 13:46:55 +0100
commit2bd402833bfdb54c1940dd0038be8af05d6f5e6f (patch)
treeeb7be3853dc45452397b730e586434f6e859efb3 /cpp
parentWindows fixes for icegriddb/icestormdb (diff)
downloadice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.bz2
ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.xz
ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.zip
ICE-6991 - Add support for unicode escape sequences
Diffstat (limited to 'cpp')
-rw-r--r--cpp/src/IceUtil/Unicode.cpp38
-rw-r--r--cpp/src/IceUtil/Unicode.h9
-rw-r--r--cpp/src/Slice/PythonUtil.cpp177
-rw-r--r--cpp/src/Slice/RubyUtil.cpp175
-rw-r--r--cpp/src/Slice/Scanner.cpp543
-rw-r--r--cpp/src/Slice/Scanner.l410
-rw-r--r--cpp/src/slice2cpp/Gen.cpp2
-rw-r--r--cpp/src/slice2cs/Gen.cpp152
-rw-r--r--cpp/src/slice2cs/Makefile2
-rw-r--r--cpp/src/slice2cs/Makefile.mak2
-rw-r--r--cpp/src/slice2java/Gen.cpp194
-rw-r--r--cpp/src/slice2java/Makefile2
-rw-r--r--cpp/src/slice2java/Makefile.mak2
-rw-r--r--cpp/src/slice2js/Gen.cpp145
-rw-r--r--cpp/src/slice2js/Makefile2
-rw-r--r--cpp/src/slice2js/Makefile.mak2
-rw-r--r--cpp/src/slice2php/Main.cpp93
-rw-r--r--cpp/src/slice2php/Makefile2
-rw-r--r--cpp/src/slice2php/Makefile.mak2
-rw-r--r--cpp/test/Ice/operations/Test.ice67
-rw-r--r--cpp/test/Ice/operations/TestAMD.ice69
-rw-r--r--cpp/test/Ice/operations/TestAMDI.cpp45
-rw-r--r--cpp/test/Ice/operations/TestAMDI.h3
-rw-r--r--cpp/test/Ice/operations/TestI.cpp45
-rw-r--r--cpp/test/Ice/operations/TestI.h2
-rw-r--r--cpp/test/Ice/operations/Twoways.cpp79
-rw-r--r--cpp/test/Slice/errorDetection/ConstDef.err15
-rw-r--r--cpp/test/Slice/errorDetection/ConstDef.ice11
28 files changed, 1752 insertions, 538 deletions
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp
index cae3476e277..7bad1d67c17 100644
--- a/cpp/src/IceUtil/Unicode.cpp
+++ b/cpp/src/IceUtil/Unicode.cpp
@@ -128,4 +128,42 @@ IceUtilInternal::convertUTF8ToUTFWstring(const Byte*& sourceStart, const Byte* s
return result;
}
+ConversionResult
+IceUtilInternal::convertUTF8ToUTF16(const vector<unsigned char>& source, vector<unsigned short>& target, ConversionFlags flags)
+{
+ target.resize(source.size());
+ const unsigned char* sourceStart = &source[0];
+ const unsigned char* sourceEnd = &source[0] + source.size();
+
+ unsigned short* targetStart = &target[0];
+ unsigned short* targetEnd = &target[0] + target.size();
+ ConversionResult result = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+
+ if(result == conversionOK)
+ {
+ target.resize(targetStart - &target[0]);
+ }
+ return result;
+}
+
+ConversionResult
+IceUtilInternal::convertUTF32ToUTF8(const vector<unsigned int>& source, vector<unsigned char>& target, ConversionFlags flags)
+{
+ target.resize(source.size() * 4);
+
+ const unsigned int* sourceStart = &source[0];
+ const unsigned int* sourceEnd = &source[0] + source.size();
+
+ unsigned char* targetStart = &target[0];
+ unsigned char* targetEnd = &target[0] + target.size();
+ ConversionResult result = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+
+ if(result == conversionOK)
+ {
+ target.resize(targetStart - &target[0]);
+ }
+ return result;
+}
+
+
diff --git a/cpp/src/IceUtil/Unicode.h b/cpp/src/IceUtil/Unicode.h
index 00333ce8a44..2c96d6c6448 100644
--- a/cpp/src/IceUtil/Unicode.h
+++ b/cpp/src/IceUtil/Unicode.h
@@ -44,6 +44,15 @@ ConversionResult
convertUTF8ToUTFWstring(const IceUtil::Byte*& sourceStart, const IceUtil::Byte* sourceEnd,
std::wstring& target, IceUtil::ConversionFlags flags);
+
+ICE_UTIL_API ConversionResult
+convertUTF8ToUTF16(const std::vector<unsigned char>&, std::vector<unsigned short>&,
+ IceUtil::ConversionFlags);
+
+ICE_UTIL_API ConversionResult
+convertUTF32ToUTF8(const std::vector<unsigned int>&, std::vector<unsigned char>&,
+ IceUtil::ConversionFlags);
+
}
#endif
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp
index 220e521d85d..92f657e8221 100644
--- a/cpp/src/Slice/PythonUtil.cpp
+++ b/cpp/src/Slice/PythonUtil.cpp
@@ -13,6 +13,7 @@
#include <IceUtil/IceUtil.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <climits>
#include <iterator>
@@ -1879,68 +1880,138 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
- _out << "\\\\";
- break;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\b':
- {
- _out << "\\b";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(*c) == charSet.end())
+ case '"':
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ _out << "\\\"";
+ break;
}
- else
+ case '\\':
{
- _out << *c; // Print normally if in basic source character set.
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(v);
+
+ vector<unsigned char> u8buffer;
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
+ case '\r':
+ {
+ _out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ _out << "\\n";
+ break;
+ }
+ case '\t':
+ {
+ _out << "\\t";
+ break;
+ }
+ case '\b':
+ {
+ _out << "\\b";
+ break;
+ }
+ case '\f':
+ {
+ _out << "\\f";
+ break;
+ }
+ default:
+ {
+ if(charSet.find(c) == charSet.end())
+ {
+ unsigned char uc = c; // Char may be signed, so make it positive.
+ stringstream s;
+ s << "\\"; // Print as octal if not in basic source character set.
+ s.flags(ios_base::oct);
+ s.width(3);
+ s.fill('0');
+ s << static_cast<unsigned>(uc);
+ _out << s.str();
+ }
+ else
+ {
+ _out << c; // Print normally if in basic source character set.
+ }
+ break;
}
- break;
- }
}
+ ++i;
}
- _out << "\""; // Closing "
+ _out << "\""; // Closing "
break;
}
case Slice::Builtin::KindObject:
diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp
index 3e6fe7557ac..fbed4764bbe 100644
--- a/cpp/src/Slice/RubyUtil.cpp
+++ b/cpp/src/Slice/RubyUtil.cpp
@@ -12,6 +12,7 @@
#include <Slice/Util.h>
#include <IceUtil/Functional.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <iterator>
using namespace std;
@@ -1470,65 +1471,134 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
- _out << "\\\\";
- break;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\b':
- {
- _out << "\\b";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(*c) == charSet.end())
+ case '"':
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ _out << "\\\"";
+ break;
}
- else
+ case '\\':
{
- _out << *c; // Print normally if in basic source character set.
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(v);
+
+ vector<unsigned char> u8buffer;
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
+ case '\r':
+ {
+ _out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ _out << "\\n";
+ break;
+ }
+ case '\t':
+ {
+ _out << "\\t";
+ break;
+ }
+ case '\b':
+ {
+ _out << "\\b";
+ break;
+ }
+ case '\f':
+ {
+ _out << "\\f";
+ break;
+ }
+ default:
+ {
+ if(charSet.find(c) == charSet.end())
+ {
+ unsigned char uc = c; // Char may be signed, so make it positive.
+ stringstream s;
+ s << "\\"; // Print as octal if not in basic source character set.
+ s.flags(ios_base::oct);
+ s.width(3);
+ s.fill('0');
+ s << static_cast<unsigned>(uc);
+ _out << s.str();
+ }
+ else
+ {
+ _out << c; // Print normally if in basic source character set.
+ }
+ break;
}
- break;
- }
}
+ ++i;
}
_out << "\""; // Closing "
@@ -1635,6 +1705,7 @@ Slice::Ruby::CodeVisitor::collectExceptionMembers(const ExceptionPtr& p, MemberI
void
Slice::Ruby::generate(const UnitPtr& un, bool all, bool checksum, const vector<string>& includePaths, Output& out)
{
+ out <<"# encoding: utf-8";
out << nl << "require 'Ice'";
if(!all)
diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp
index 0a4fdad538d..fc2fc290dac 100644
--- a/cpp/src/Slice/Scanner.cpp
+++ b/cpp/src/Slice/Scanner.cpp
@@ -29,7 +29,7 @@
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
+#define YY_FLEX_SUBMINOR_VERSION 39
#if YY_FLEX_SUBMINOR_VERSION > 0
#define FLEX_BETA
#endif
@@ -74,7 +74,6 @@ typedef int flex_int32_t;
typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
/* Limits of integral types. */
#ifndef INT8_MIN
@@ -105,6 +104,8 @@ typedef unsigned int flex_uint32_t;
#define UINT32_MAX (4294967295U)
#endif
+#endif /* ! C99 */
+
#endif /* ! FLEXINT_H */
#ifdef __cplusplus
@@ -161,7 +162,15 @@ typedef unsigned int flex_uint32_t;
/* Size of default input buffer. */
#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
#endif
/* The state buf must be large enough to hold one state per character in the main buffer.
@@ -173,7 +182,12 @@ typedef unsigned int flex_uint32_t;
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
-extern int slice_leng;
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t slice_leng;
extern FILE *slice_in, *slice_out;
@@ -182,6 +196,7 @@ extern FILE *slice_in, *slice_out;
#define EOB_ACT_LAST_MATCH 2
#define YY_LESS_LINENO(n)
+ #define YY_LINENO_REWIND_TO(ptr)
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
@@ -199,11 +214,6 @@ extern FILE *slice_in, *slice_out;
#define unput(c) yyunput( c, (yytext_ptr) )
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
#ifndef YY_STRUCT_YY_BUFFER_STATE
#define YY_STRUCT_YY_BUFFER_STATE
struct yy_buffer_state
@@ -221,7 +231,7 @@ struct yy_buffer_state
/* Number of characters read into yy_ch_buf, not including EOB
* characters.
*/
- int yy_n_chars;
+ yy_size_t yy_n_chars;
/* Whether we "own" the buffer - i.e., we know we created it,
* and can realloc() it to grow it, and should free() it to
@@ -291,8 +301,8 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
/* yy_hold_char holds the character lost when slice_text is formed. */
static char yy_hold_char;
-static int yy_n_chars; /* number of characters read into yy_ch_buf */
-int slice_leng;
+static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */
+yy_size_t slice_leng;
/* Points to current character in buffer. */
static char *yy_c_buf_p = (char *) 0;
@@ -320,7 +330,7 @@ static void slice__init_buffer (YY_BUFFER_STATE b,FILE *file );
YY_BUFFER_STATE slice__scan_buffer (char *base,yy_size_t size );
YY_BUFFER_STATE slice__scan_string (yyconst char *yy_str );
-YY_BUFFER_STATE slice__scan_bytes (yyconst char *bytes,int len );
+YY_BUFFER_STATE slice__scan_bytes (yyconst char *bytes,yy_size_t len );
void *slice_alloc (yy_size_t );
void *slice_realloc (void *,yy_size_t );
@@ -352,7 +362,7 @@ void slice_free (void * );
/* Begin user sect3 */
-#define slice_wrap(n) 1
+#define slice_wrap() 1
#define YY_SKIP_YYWRAP
typedef unsigned char YY_CHAR;
@@ -558,6 +568,8 @@ char *slice_text;
#include <Slice/Grammar.h>
#include <IceUtil/InputUtil.h>
+#include <iomanip>
+
#include <stdlib.h>
#include <math.h>
@@ -620,7 +632,7 @@ int checkKeyword(string&);
-#line 623 "lex.yy.c"
+#line 635 "lex.yy.c"
#define INITIAL 0
#define BOMSCAN 1
@@ -661,7 +673,7 @@ FILE *slice_get_out (void );
void slice_set_out (FILE * out_str );
-int slice_get_leng (void );
+yy_size_t slice_get_leng (void );
char *slice_get_text (void );
@@ -703,7 +715,12 @@ static int input (void );
/* Amount of stuff to slurp up with each read. */
#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
#endif
/* Copy whatever the last rule matched to the standard output. */
@@ -722,7 +739,7 @@ static int input (void );
if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
{ \
int c = '*'; \
- unsigned n; \
+ size_t n; \
for ( n = 0; n < max_size && \
(c = getc( slice_in )) != EOF && c != '\n'; ++n ) \
buf[n] = (char) c; \
@@ -807,11 +824,6 @@ YY_DECL
register char *yy_cp, *yy_bp;
register int yy_act;
-#line 92 "Scanner.l"
-
-
-#line 813 "lex.yy.c"
-
if ( !(yy_init) )
{
(yy_init) = 1;
@@ -838,6 +850,12 @@ YY_DECL
slice__load_buffer_state( );
}
+ {
+#line 94 "Scanner.l"
+
+
+#line 857 "lex.yy.c"
+
while ( 1 ) /* loops until end-of-file is reached */
{
yy_cp = (yy_c_buf_p);
@@ -855,7 +873,7 @@ YY_DECL
yy_match:
do
{
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
+ register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
if ( yy_accept[yy_current_state] )
{
(yy_last_accepting_state) = yy_current_state;
@@ -895,7 +913,7 @@ case 1:
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 94 "Scanner.l"
+#line 96 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -906,10 +924,11 @@ YY_RULE_SETUP
case 2:
/* rule 2 can match eol */
*yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
+YY_LINENO_REWIND_TO(yy_cp - 1);
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 101 "Scanner.l"
+#line 103 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -922,7 +941,7 @@ case 3:
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 108 "Scanner.l"
+#line 110 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -933,10 +952,11 @@ YY_RULE_SETUP
case 4:
/* rule 4 can match eol */
*yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
+YY_LINENO_REWIND_TO(yy_cp - 1);
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 115 "Scanner.l"
+#line 117 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -946,7 +966,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 122 "Scanner.l"
+#line 124 "Scanner.l"
{
// C++-style comment
BEGIN(MAINSCAN);
@@ -964,7 +984,7 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
-#line 137 "Scanner.l"
+#line 139 "Scanner.l"
{
// C-style comment
BEGIN(MAINSCAN);
@@ -1008,7 +1028,7 @@ YY_RULE_SETUP
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 178 "Scanner.l"
+#line 180 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_SCOPE_DELIMITER;
@@ -1016,7 +1036,7 @@ YY_RULE_SETUP
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 183 "Scanner.l"
+#line 185 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_METADATA_OPEN;
@@ -1024,7 +1044,7 @@ YY_RULE_SETUP
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 188 "Scanner.l"
+#line 190 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_METADATA_CLOSE;
@@ -1032,7 +1052,7 @@ YY_RULE_SETUP
YY_BREAK
case 10:
YY_RULE_SETUP
-#line 193 "Scanner.l"
+#line 195 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_OPEN;
@@ -1040,7 +1060,7 @@ YY_RULE_SETUP
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 198 "Scanner.l"
+#line 200 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_CLOSE;
@@ -1049,7 +1069,7 @@ YY_RULE_SETUP
case 12:
/* rule 12 can match eol */
YY_RULE_SETUP
-#line 203 "Scanner.l"
+#line 205 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr ident = new StringTok;
@@ -1077,7 +1097,7 @@ YY_RULE_SETUP
YY_BREAK
case 13:
YY_RULE_SETUP
-#line 228 "Scanner.l"
+#line 230 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr ident = new StringTok;
@@ -1088,163 +1108,279 @@ YY_RULE_SETUP
YY_BREAK
case 14:
YY_RULE_SETUP
-#line 236 "Scanner.l"
+#line 238 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr str = new StringTok;
str->literal = "\"";
while(true)
{
- char c = static_cast<char>(yyinput());
+ char c = static_cast<char>(yyinput());
str->literal += c;
- if(c == '"')
- {
- break;
- }
- else if(c == EOF)
- {
- unit->error("EOF in string");
- break;
- }
- else if(c == '\n')
- {
- unit->error("newline in string");
- }
- else if(c == '\\')
- {
- char next = static_cast<char>(yyinput());
+ if(c == '"')
+ {
+ break;
+ }
+ else if(c == EOF)
+ {
+ unit->error("EOF in string");
+ break;
+ }
+ else if(c == '\n')
+ {
+ unit->error("newline in string");
+ }
+ else if(c == '\\')
+ {
+ char next = static_cast<char>(yyinput());
str->literal += next;
- switch(next)
- {
- case '\\':
- case '"':
- case '\'':
- {
- str->v += next;
- break;
- }
-
- case 'n':
- {
- str->v += '\n';
- break;
- }
-
- case 'r':
- {
- str->v += '\r';
- break;
- }
-
- case 't':
- {
- str->v += '\t';
- break;
- }
-
- case 'v':
- {
- str->v += '\v';
- break;
- }
-
- case 'f':
- {
- str->v += '\f';
- break;
- }
-
- case 'a':
- {
- str->v += '\a';
- break;
- }
-
- case 'b':
- {
- str->v += '\b';
- break;
- }
-
- case '?':
- {
- str->v += '\?';
- break;
- }
-
- case '0':
- case '1':
- case '2':
- case '3':
- {
- static string octalDigits = "01234567";
- unsigned short us = next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
+ switch(next)
+ {
+ case '\\':
+ {
+ str->v += '\\';
+ str->v += '\\';
+ break;
+ }
+ case '"':
+ case '\'':
+ {
+ str->v += next;
+ break;
+ }
+
+ case 'n':
+ {
+ str->v += '\n';
+ break;
+ }
+
+ case 'r':
+ {
+ str->v += '\r';
+ break;
+ }
+
+ case 't':
+ {
+ str->v += '\t';
+ break;
+ }
+
+ case 'v':
+ {
+ str->v += '\v';
+ break;
+ }
+
+ case 'f':
+ {
+ str->v += '\f';
+ break;
+ }
+
+ case 'a':
+ {
+ str->v += '\a';
+ break;
+ }
+
+ case 'b':
+ {
+ str->v += '\b';
+ break;
+ }
+
+ case '?':
+ {
+ str->v += '\?';
+ break;
+ }
+
+ //
+ // Octal value \nnn limited to three octal digits but terminate at the first character
+ // that is not a valid octal digit if encountered sooner.
+ //
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '7':
+ {
+ static string octalDigits = "01234567";
+ IceUtil::Int64 value = 0;
+ string escape;
+ escape += next;
+ for(int i = 0; i < 2; ++i)
+ {
+ next = static_cast<char>(yyinput());
+ if(octalDigits.find_first_of(next) == string::npos)
+ {
+ unput(next);
+ break;
+ }
+ escape += next;
+ }
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "octal escape sequence out of range: '\\" << oct << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+ case 'x':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+ while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
+ {
+ escape += next;
+ }
+ unput(next);
+
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+
+ //
+ // Universal character name \unnnn code point U+nnnn
+ //
+ case 'u':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 4; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- us = us * 8 + next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
- us = us * 8 + next - '0';
- }
- else
- {
- unput(next);
- }
- }
- else
- {
- unput(next);
- }
- if(us == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
- str->v += static_cast<char>(us);
- break;
- }
- case 'x':
- {
- IceUtil::Int64 ull = 0;
- while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
- {
+ if(!isxdigit(static_cast<unsigned char>(next)))
+ {
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
+ }
+ escape += next;
+ }
+
+ value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'u';
+ os.fill('0');
+ os.width(4);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+
+ break;
+ }
+
+ case 'U':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 8; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- ull *= 16;
- if(isdigit(static_cast<unsigned char>(next)))
- {
- ull += next - '0';
- }
- else if(islower(static_cast<unsigned char>(next)))
- {
- ull += next - 'a' + 10;
- }
- else
- {
- ull += next - 'A' + 10;
- }
- }
- unput(next);
- if(ull == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
- str->v += static_cast<char>(ull);
- break;
- }
-
- // TODO: add universal character names
-
- default:
- {
- str->v += c;
- unput(next);
- }
- }
- }
- else
- {
- str->v += c;
- }
+ if(!isxdigit(static_cast<unsigned char>(next)))
+ {
+
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
+ }
+ escape += next;
+ }
+
+ value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'U';
+ os.fill('0');
+ os.width(8);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+ break;
+ }
+
+ default:
+ {
+ ostringstream os;
+ os << "unknown escape sequence '\\" << next << "'";
+ unit->warning(os.str());
+ //
+ // We escape the backslack in a unknown escape sequence
+ // to keep compativility with 3.6"
+ //
+ str->v += '\\';
+ str->v += c;
+ unput(next);
+ }
+ }
+ }
+ else
+ {
+ str->v += c;
+ }
}
*yylvalp = str;
return ICE_STRING_LITERAL;
@@ -1252,7 +1388,7 @@ YY_RULE_SETUP
YY_BREAK
case 15:
YY_RULE_SETUP
-#line 397 "Scanner.l"
+#line 515 "Scanner.l"
{
BEGIN(MAINSCAN);
IntegerTokPtr itp = new IntegerTok;
@@ -1271,7 +1407,7 @@ YY_RULE_SETUP
YY_BREAK
case 16:
YY_RULE_SETUP
-#line 413 "Scanner.l"
+#line 531 "Scanner.l"
{
BEGIN(MAINSCAN);
errno = 0;
@@ -1305,7 +1441,7 @@ YY_RULE_SETUP
case 17:
/* rule 17 can match eol */
YY_RULE_SETUP
-#line 443 "Scanner.l"
+#line 561 "Scanner.l"
{
// Ignore white-space
@@ -1321,7 +1457,7 @@ YY_RULE_SETUP
YY_BREAK
case 18:
YY_RULE_SETUP
-#line 456 "Scanner.l"
+#line 574 "Scanner.l"
{
// Ignore UTF-8 BOM, rule only active when parsing start of file.
@@ -1330,7 +1466,7 @@ YY_RULE_SETUP
YY_BREAK
case 19:
YY_RULE_SETUP
-#line 462 "Scanner.l"
+#line 580 "Scanner.l"
{
BEGIN(MAINSCAN);
if(slice_text[0] < 32 || slice_text[0] > 126)
@@ -1349,10 +1485,10 @@ YY_RULE_SETUP
YY_BREAK
case 20:
YY_RULE_SETUP
-#line 478 "Scanner.l"
+#line 596 "Scanner.l"
ECHO;
YY_BREAK
-#line 1355 "lex.yy.c"
+#line 1491 "lex.yy.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(BOMSCAN):
case YY_STATE_EOF(MAINSCAN):
@@ -1486,6 +1622,7 @@ case YY_STATE_EOF(MAINSCAN):
"fatal flex scanner internal error--no action found" );
} /* end of action switch */
} /* end of scanning one token */
+ } /* end of user's declarations */
} /* end of slice_lex */
/* yy_get_next_buffer - try to read in a new buffer
@@ -1541,21 +1678,21 @@ static int yy_get_next_buffer (void)
else
{
- int num_to_read =
+ yy_size_t num_to_read =
YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
while ( num_to_read <= 0 )
{ /* Not enough room in the buffer - grow it. */
/* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
int yy_c_buf_p_offset =
(int) ((yy_c_buf_p) - b->yy_ch_buf);
if ( b->yy_is_our_buffer )
{
- int new_size = b->yy_buf_size * 2;
+ yy_size_t new_size = b->yy_buf_size * 2;
if ( new_size <= 0 )
b->yy_buf_size += b->yy_buf_size / 8;
@@ -1586,7 +1723,7 @@ static int yy_get_next_buffer (void)
/* Read in more data. */
YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), (size_t) num_to_read );
+ (yy_n_chars), num_to_read );
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
@@ -1682,7 +1819,7 @@ static int yy_get_next_buffer (void)
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
yy_is_jam = (yy_current_state == 72);
- return yy_is_jam ? 0 : yy_current_state;
+ return yy_is_jam ? 0 : yy_current_state;
}
static void yyunput (int c, register char * yy_bp )
@@ -1697,7 +1834,7 @@ static int yy_get_next_buffer (void)
if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
{ /* need to shift things up to make room */
/* +2 for EOB chars. */
- register int number_to_move = (yy_n_chars) + 2;
+ register yy_size_t number_to_move = (yy_n_chars) + 2;
register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
register char *source =
@@ -1746,7 +1883,7 @@ static int yy_get_next_buffer (void)
else
{ /* need more input */
- int offset = (yy_c_buf_p) - (yytext_ptr);
+ yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
++(yy_c_buf_p);
switch ( yy_get_next_buffer( ) )
@@ -2020,7 +2157,7 @@ void slice_pop_buffer_state (void)
*/
static void slice_ensure_buffer_stack (void)
{
- int num_to_alloc;
+ yy_size_t num_to_alloc;
if (!(yy_buffer_stack)) {
@@ -2112,17 +2249,17 @@ YY_BUFFER_STATE slice__scan_string (yyconst char * yystr )
/** Setup the input buffer state to scan the given bytes. The next call to slice_lex() will
* scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
*
* @return the newly allocated buffer state object.
*/
-YY_BUFFER_STATE slice__scan_bytes (yyconst char * yybytes, int _yybytes_len )
+YY_BUFFER_STATE slice__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len )
{
YY_BUFFER_STATE b;
char *buf;
yy_size_t n;
- int i;
+ yy_size_t i;
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
@@ -2204,7 +2341,7 @@ FILE *slice_get_out (void)
/** Get the length of the current token.
*
*/
-int slice_get_leng (void)
+yy_size_t slice_get_leng (void)
{
return slice_leng;
}
@@ -2352,7 +2489,7 @@ void slice_free (void * ptr )
#define YYTABLES_NAME "yytables"
-#line 478 "Scanner.l"
+#line 595 "Scanner.l"
diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l
index a5f8d439a2d..9054c1e90ed 100644
--- a/cpp/src/Slice/Scanner.l
+++ b/cpp/src/Slice/Scanner.l
@@ -13,6 +13,8 @@
#include <Slice/Grammar.h>
#include <IceUtil/InputUtil.h>
+#include <iomanip>
+
#include <stdlib.h>
#include <math.h>
@@ -239,156 +241,272 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]+{e
str->literal = "\"";
while(true)
{
- char c = static_cast<char>(yyinput());
+ char c = static_cast<char>(yyinput());
str->literal += c;
- if(c == '"')
- {
- break;
- }
- else if(c == EOF)
- {
- unit->error("EOF in string");
- break;
- }
- else if(c == '\n')
- {
- unit->error("newline in string");
- }
- else if(c == '\\')
- {
- char next = static_cast<char>(yyinput());
+ if(c == '"')
+ {
+ break;
+ }
+ else if(c == EOF)
+ {
+ unit->error("EOF in string");
+ break;
+ }
+ else if(c == '\n')
+ {
+ unit->error("newline in string");
+ }
+ else if(c == '\\')
+ {
+ char next = static_cast<char>(yyinput());
str->literal += next;
- switch(next)
- {
- case '\\':
- case '"':
- case '\'':
- {
- str->v += next;
- break;
- }
-
- case 'n':
- {
- str->v += '\n';
- break;
- }
-
- case 'r':
- {
- str->v += '\r';
- break;
- }
-
- case 't':
- {
- str->v += '\t';
- break;
- }
-
- case 'v':
- {
- str->v += '\v';
- break;
- }
-
- case 'f':
- {
- str->v += '\f';
- break;
- }
-
- case 'a':
- {
- str->v += '\a';
- break;
- }
-
- case 'b':
- {
- str->v += '\b';
- break;
- }
-
- case '?':
- {
- str->v += '\?';
- break;
- }
-
- case '0':
- case '1':
- case '2':
- case '3':
- {
- static string octalDigits = "01234567";
- unsigned short us = next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
+ switch(next)
+ {
+ case '\\':
+ {
+ str->v += '\\';
+ str->v += '\\';
+ break;
+ }
+ case '"':
+ case '\'':
+ {
+ str->v += next;
+ break;
+ }
+
+ case 'n':
+ {
+ str->v += '\n';
+ break;
+ }
+
+ case 'r':
+ {
+ str->v += '\r';
+ break;
+ }
+
+ case 't':
+ {
+ str->v += '\t';
+ break;
+ }
+
+ case 'v':
+ {
+ str->v += '\v';
+ break;
+ }
+
+ case 'f':
+ {
+ str->v += '\f';
+ break;
+ }
+
+ case 'a':
+ {
+ str->v += '\a';
+ break;
+ }
+
+ case 'b':
+ {
+ str->v += '\b';
+ break;
+ }
+
+ case '?':
+ {
+ str->v += '\?';
+ break;
+ }
+
+ //
+ // Octal value \nnn limited to three octal digits but terminate at the first character
+ // that is not a valid octal digit if encountered sooner.
+ //
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '7':
+ {
+ static string octalDigits = "01234567";
+ IceUtil::Int64 value = 0;
+ string escape;
+ escape += next;
+ for(int i = 0; i < 2; ++i)
+ {
+ next = static_cast<char>(yyinput());
+ if(octalDigits.find_first_of(next) == string::npos)
+ {
+ unput(next);
+ break;
+ }
+ escape += next;
+ }
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "octal escape sequence out of range: '\\" << oct << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+ case 'x':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+ while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
+ {
+ escape += next;
+ }
+ unput(next);
+
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+
+ //
+ // Universal character name \unnnn code point U+nnnn
+ //
+ case 'u':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 4; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- us = us * 8 + next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
- us = us * 8 + next - '0';
- }
- else
- {
- unput(next);
- }
- }
- else
- {
- unput(next);
- }
- if(us == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
- str->v += static_cast<char>(us);
- break;
- }
- case 'x':
- {
- IceUtil::Int64 ull = 0;
- while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
- {
+ if(!isxdigit(static_cast<unsigned char>(next)))
+ {
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
+ }
+ escape += next;
+ }
+
+ value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'u';
+ os.fill('0');
+ os.width(4);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+
+ break;
+ }
+
+ case 'U':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 8; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- ull *= 16;
- if(isdigit(static_cast<unsigned char>(next)))
- {
- ull += next - '0';
- }
- else if(islower(static_cast<unsigned char>(next)))
- {
- ull += next - 'a' + 10;
- }
- else
- {
- ull += next - 'A' + 10;
- }
- }
- unput(next);
- if(ull == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
- str->v += static_cast<char>(ull);
- break;
- }
-
- // TODO: add universal character names
-
- default:
- {
- str->v += c;
- unput(next);
- }
- }
- }
- else
- {
- str->v += c;
- }
+ if(!isxdigit(static_cast<unsigned char>(next)))
+ {
+
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
+ }
+ escape += next;
+ }
+
+ value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'U';
+ os.fill('0');
+ os.width(8);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+ break;
+ }
+
+ default:
+ {
+ ostringstream os;
+ os << "unknown escape sequence '\\" << next << "'";
+ unit->warning(os.str());
+ //
+ // We escape the backslack in a unknown escape sequence
+ // to keep compativility with 3.6"
+ //
+ str->v += '\\';
+ str->v += c;
+ unput(next);
+ }
+ }
+ }
+ else
+ {
+ str->v += c;
+ }
}
*yylvalp = str;
return ICE_STRING_LITERAL;
diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp
index 9937c9b0f30..bcb5d5d2475 100644
--- a/cpp/src/slice2cpp/Gen.cpp
+++ b/cpp/src/slice2cpp/Gen.cpp
@@ -93,13 +93,13 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
{
switch(*c)
{
- case '\\':
case '"':
{
out << "\\";
break;
}
}
+
out << *c; // Print normally if in basic source character set
}
}
diff --git a/cpp/src/slice2cs/Gen.cpp b/cpp/src/slice2cs/Gen.cpp
index cf09aa9df1b..e537f90f3ea 100644
--- a/cpp/src/slice2cs/Gen.cpp
+++ b/cpp/src/slice2cs/Gen.cpp
@@ -10,6 +10,7 @@
#include <IceUtil/DisableWarnings.h>
#include <IceUtil/Functional.h>
#include <IceUtil/StringUtil.h>
+#include <IceUtil/InputUtil.h>
#include <Gen.h>
#include <limits>
#include <sys/stat.h>
@@ -20,6 +21,7 @@
#endif
#include <IceUtil/Iterator.h>
#include <IceUtil/UUID.h>
+#include <IceUtil/Unicode.h>
#include <Slice/Checksum.h>
#include <Slice/DotNetNames.h>
#include <Slice/FileTracker.h>
@@ -35,6 +37,45 @@ namespace
{
string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
+string
sliceModeToIceMode(Operation::Mode opMode)
{
string mode;
@@ -1983,41 +2024,120 @@ Slice::CsVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePt
// here because they are sensitive to the current locale.
//
static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
+
static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- unsigned char uc = *c; // char may be signed, so make it positive
- ostringstream s;
- s << "\\u"; // Print as unicode if not in basic source character set
- s << hex;
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+ {
+ //
+ // Print as unicode if not in basic source character set
+ //
+ _out << u16CodePoint(static_cast<unsigned int>(value[i]));
+ }
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && value[j] == 'U')
+ {
+ _out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, 8);
+ assert(codepoint.size() == 8);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ if(v > 0xFFFF)
+ {
+ unsigned int high = ((v - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00;
+ _out << u16CodePoint(high);
+ _out << u16CodePoint(low);
+ }
+ else
+ {
+ _out << "\\U" << codepoint;
+ }
+
+ i = j + 1 + 8;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
_out << "\\";
break;
}
}
- _out << *c; // Print normally if in basic source character set
+ _out << value[i]; // Print normally if in basic source character set
}
+ i++;
}
-
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
+ }
+
_out << "\""; // Closing "
}
else if(bp && bp->kind() == Builtin::KindLong)
diff --git a/cpp/src/slice2cs/Makefile b/cpp/src/slice2cs/Makefile
index e51e24c0445..e46c1005dd6 100644
--- a/cpp/src/slice2cs/Makefile
+++ b/cpp/src/slice2cs/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2cs/Makefile.mak b/cpp/src/slice2cs/Makefile.mak
index 0a422c08b34..ac748e4fabd 100644
--- a/cpp/src/slice2cs/Makefile.mak
+++ b/cpp/src/slice2cs/Makefile.mak
@@ -18,7 +18,7 @@ OBJS = .\Gen.obj \
!include $(top_srcdir)/config/Make.rules.mak
-CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
!if "$(GENERATE_PDB)" == "yes"
PDBFLAGS = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/src/slice2java/Gen.cpp b/cpp/src/slice2java/Gen.cpp
index e6f2bc91571..eba540f57a6 100644
--- a/cpp/src/slice2java/Gen.cpp
+++ b/cpp/src/slice2java/Gen.cpp
@@ -14,6 +14,7 @@
#include <IceUtil/Iterator.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
+#include <IceUtil/Unicode.h>
#include <cstring>
#include <limits>
@@ -23,6 +24,44 @@ using namespace Slice;
using namespace IceUtil;
using namespace IceUtilInternal;
+string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
static string
sliceModeToIceMode(Operation::Mode opMode)
{
@@ -1863,54 +1902,153 @@ Slice::JavaVisitor::writeConstantValue(Output& out, const TypePtr& type, const S
"_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
out << "\"";
-
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(value[i]) == charSet.end())
{
- switch(*c)
+ char c = value[i];
+ if(static_cast<unsigned char>(c) < 128) // Single byte character
{
//
- // Java doesn't want '\n' or '\r\n' encoded as universal
- // characters, that gives an error "unclosed string literal"
+ // Print as unicode if not in basic source character set
//
- case '\r':
- {
- out << "\\r";
- break;
- }
- case '\n':
+ switch(c)
{
- out << "\\n";
- break;
- }
- default:
- {
- unsigned char uc = *c;
- ostringstream s;
- s << "\\u";
- s.flags(ios_base::hex);
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- out << s.str();
- break;
+ //
+ // Java doesn't want '\n' or '\r\n' encoded as universal
+ // characters, that gives an error "unclosed string literal"
+ //
+ case '\r':
+ {
+ out << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ out << "\\n";
+ break;
+ }
+ default:
+ {
+ out << u16CodePoint(c);
+ break;
+ }
}
}
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Java doesn't like this special characters encoded as universal characters
+ //
+ if(v == 0x5c)
+ {
+ out << "\\\\";
+ }
+ else if(v == 0xa)
+ {
+ out << "\\n";
+ }
+ else if(v == 0xd)
+ {
+ out << "\\r";
+ }
+ else if(v == 0x22)
+ {
+ out << "\\\"";
+ }
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ else if(v > 0xFFFF)
+ {
+ unsigned int high = ((v - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00;
+ out << u16CodePoint(high);
+ out << u16CodePoint(low);
+ }
+ else
+ {
+ out << u16CodePoint(v);
+ }
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
out << "\\";
break;
}
}
- out << *c;
+ out << value[i]; // Print normally if in basic source character set
}
+ i++;
+ }
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
}
out << "\"";
diff --git a/cpp/src/slice2java/Makefile b/cpp/src/slice2java/Makefile
index 010554d8e12..e2a882bfb6f 100644
--- a/cpp/src/slice2java/Makefile
+++ b/cpp/src/slice2java/Makefile
@@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2java/Makefile.mak b/cpp/src/slice2java/Makefile.mak
index 26a40e3aa55..1d93e22c79d 100644
--- a/cpp/src/slice2java/Makefile.mak
+++ b/cpp/src/slice2java/Makefile.mak
@@ -18,7 +18,7 @@ OBJS = .\Gen.obj \
!include $(top_srcdir)/config/Make.rules.mak
-CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
!if "$(GENERATE_PDB)" == "yes"
PDBFLAGS = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/src/slice2js/Gen.cpp b/cpp/src/slice2js/Gen.cpp
index 11bd0f608e5..871dd7d47b3 100644
--- a/cpp/src/slice2js/Gen.cpp
+++ b/cpp/src/slice2js/Gen.cpp
@@ -20,6 +20,7 @@
#include <direct.h>
#endif
#include <IceUtil/Iterator.h>
+#include <IceUtil/Unicode.h>
#include <IceUtil/UUID.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -35,6 +36,44 @@ namespace
{
string
+u16CodePoint(unsigned short value)
+{
+ ostringstream s;
+ s << "\\u";
+ s << hex;
+ s.width(4);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned short> u16buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
+ {
+ out << u16CodePoint(*c);
+ }
+}
+
+string
sliceModeToIceMode(Operation::Mode opMode)
{
switch(opMode)
@@ -479,34 +518,112 @@ Slice::JsVisitor::writeConstantValue(const string& scope, const TypePtr& type, c
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
{
- if(charSet.find(*c) == charSet.end())
- {
- unsigned char uc = *c; // char may be signed, so make it positive
- ostringstream s;
- s << "\\u"; // Print as unicode if not in basic source character set
- s << hex;
- s.width(4);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
+ if(charSet.find(value[i]) == charSet.end())
+ {
+ if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+ {
+ //
+ // Print as unicode if not in basic source character set
+ //
+ _out << u16CodePoint(static_cast<unsigned int>(value[i]));
+ }
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
}
else
{
- switch(*c)
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
+ }
+ switch(value[i])
{
case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && value[j] == 'U')
+ {
+ _out << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, 8);
+ assert(codepoint.size() == 8);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ //
+ // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
+ // and is represented using a Unicode surrogate pair.
+ //
+ if(v > 0xFFFF)
+ {
+ unsigned int high = ((v - 0x10000) / 0x400) + 0xD800;
+ unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00;
+ _out << u16CodePoint(high);
+ _out << u16CodePoint(low);
+ }
+ else
+ {
+ _out << u16CodePoint(v);
+ }
+
+ i = j + 1 + 8;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
+ }
case '"':
{
_out << "\\";
break;
}
}
- _out << *c; // Print normally if in basic source character set
+ _out << value[i]; // Print normally if in basic source character set
}
+ i++;
}
-
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, _out);
+ u8buffer.clear();
+ }
+
_out << "\""; // Closing "
}
else if(bp && bp->kind() == Builtin::KindLong)
diff --git a/cpp/src/slice2js/Makefile b/cpp/src/slice2js/Makefile
index bd1bbe967f8..0aaf14a7a0d 100644
--- a/cpp/src/slice2js/Makefile
+++ b/cpp/src/slice2js/Makefile
@@ -21,7 +21,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2js/Makefile.mak b/cpp/src/slice2js/Makefile.mak
index 0014e231948..79bd9f23cca 100644
--- a/cpp/src/slice2js/Makefile.mak
+++ b/cpp/src/slice2js/Makefile.mak
@@ -19,7 +19,7 @@ OBJS = .\Gen.obj \
!include $(top_srcdir)/config/Make.rules.mak
-CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
!if "$(GENERATE_PDB)" == "yes"
PDBFLAGS = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/src/slice2php/Main.cpp b/cpp/src/slice2php/Main.cpp
index 77e2a93e921..6de877cec54 100644
--- a/cpp/src/slice2php/Main.cpp
+++ b/cpp/src/slice2php/Main.cpp
@@ -16,6 +16,7 @@
#include <IceUtil/StringUtil.h>
#include <IceUtil/Mutex.h>
#include <IceUtil/MutexPtrLock.h>
+#include <IceUtil/Unicode.h>
#include <Slice/Checksum.h>
#include <Slice/Preprocessor.h>
#include <Slice/FileTracker.h>
@@ -1270,9 +1271,10 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
_out << "\""; // Opening "
- for(string::const_iterator c = value.begin(); c != value.end(); ++c)
+ for(size_t i = 0; i < value.size();)
{
- switch(*c)
+ char c = value[i];
+ switch(c)
{
case '$':
{
@@ -1286,8 +1288,79 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
case '\\':
{
- _out << "\\\\";
- break;
+
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ _out << s.substr(0, s.size() - 1);
+
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(v);
+
+ vector<unsigned char> u8buffer;
+
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ _out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ _out << s;
+ i = j;
+ }
+ continue;
}
case '\r':
{
@@ -1304,11 +1377,6 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
_out << "\\t";
break;
}
- case '\b':
- {
- _out << "\\b";
- break;
- }
case '\f':
{
_out << "\\f";
@@ -1316,9 +1384,9 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
default:
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(c) == charSet.end())
{
- unsigned char uc = *c; // Char may be signed, so make it positive.
+ unsigned char uc = c; // Char may be signed, so make it positive.
stringstream s;
s << "\\"; // Print as octal if not in basic source character set.
s.flags(ios_base::oct);
@@ -1329,11 +1397,12 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
else
{
- _out << *c; // Print normally if in basic source character set.
+ _out << c; // Print normally if in basic source character set.
}
break;
}
}
+ ++i;
}
_out << "\""; // Closing "
diff --git a/cpp/src/slice2php/Makefile b/cpp/src/slice2php/Makefile
index 8bc91a8cd66..62f82531201 100644
--- a/cpp/src/slice2php/Makefile
+++ b/cpp/src/slice2php/Makefile
@@ -19,7 +19,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir)
include $(top_srcdir)/config/Make.rules
-CPPFLAGS := -I. $(CPPFLAGS)
+CPPFLAGS := -I. -I.. $(CPPFLAGS)
$(NAME): $(OBJS)
rm -f $@
diff --git a/cpp/src/slice2php/Makefile.mak b/cpp/src/slice2php/Makefile.mak
index 47cab5776bd..1218590c485 100644
--- a/cpp/src/slice2php/Makefile.mak
+++ b/cpp/src/slice2php/Makefile.mak
@@ -17,7 +17,7 @@ OBJS = .\Main.obj
!include $(top_srcdir)/config/Make.rules.mak
-CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
+CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN
!if "$(GENERATE_PDB)" == "yes"
PDBFLAGS = /pdb:$(NAME:.exe=.pdb)
diff --git a/cpp/test/Ice/operations/Test.ice b/cpp/test/Ice/operations/Test.ice
index eec8f42e853..cf44e14a50d 100644
--- a/cpp/test/Ice/operations/Test.ice
+++ b/cpp/test/Ice/operations/Test.ice
@@ -251,6 +251,8 @@ class MyClass
ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1);
StringS opStringS2(StringS stringS);
ByteBoolD opByteBoolD2(ByteBoolD byteBoolD);
+
+ StringS opStringLiterals();
};
struct MyStruct1
@@ -274,5 +276,70 @@ class MyDerivedClass extends MyClass
MyStruct1 opMyStruct1(MyStruct1 c);
};
+//
+// String literals
+//
+
+const string s0 = "\u005c"; // backslash
+const string s1 = "\u0041"; // A
+const string s2 = "\u0049\u0063\u0065"; // Ice
+const string s3 = "\u004121"; // A21
+const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041
+const string s5 = "\u00FF"; // ÿ
+const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF)
+const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0)
+const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000)
+const string s9 = "\U0001F34C"; // BANANA (U+1F34C)
+const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna
+
+const string sw0 = "\U0000005c"; // backslash
+const string sw1 = "\U00000041"; // A
+const string sw2 = "\U00000049\U00000063\U00000065"; // Ice
+const string sw3 = "\U0000004121"; // A21
+const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041
+const string sw5 = "\U000000FF"; // ÿ
+const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF)
+const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0)
+const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000)
+const string sw9 = "\U0001F34C"; // BANANA (U+1F34C)
+const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna
+
+/**
+\' single quote byte 0x27 in ASCII encoding
+\" double quote byte 0x22 in ASCII encoding
+\? question mark byte 0x3f in ASCII encoding
+\\ backslash byte 0x5c in ASCII encoding
+\a audible bell byte 0x07 in ASCII encoding
+\b backspace byte 0x08 in ASCII encoding
+\f form feed - new page byte 0x0c in ASCII encoding
+\n line feed - new line byte 0x0a in ASCII encoding
+\r carriage return byte 0x0d in ASCII encoding
+\t horizontal tab byte 0x09 in ASCII encoding
+\v vertical tab byte 0x0b in ASCII encoding
+**/
+
+const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v";
+const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b";
+const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b";
+
+const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */
+const string ss4 = "\\\u0041\\"; /* \A\ */
+const string ss5 = "\\u0041\\"; /* \u0041\ */
+
+//
+// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF)
+// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100)
+// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00)
+// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194)
+// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A)
+// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198)
+// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340)
+// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341)
+// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342)
+// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343)
+const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃";
+const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343";
+const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343";
+
};
diff --git a/cpp/test/Ice/operations/TestAMD.ice b/cpp/test/Ice/operations/TestAMD.ice
index 615c2a81ad6..a65608ea4c7 100644
--- a/cpp/test/Ice/operations/TestAMD.ice
+++ b/cpp/test/Ice/operations/TestAMD.ice
@@ -252,6 +252,8 @@ dictionary<MyEnum, MyEnumS> MyEnumMyEnumSD;
StringS opStringS2(StringS stringS);
ByteBoolD opByteBoolD2(ByteBoolD byteBoolD);
+
+ StringS opStringLiterals();
};
struct MyStruct1
@@ -276,5 +278,72 @@ class MyClass1
MyStruct1 opMyStruct1(MyStruct1 c);
};
+
+//
+// String literals
+//
+
+const string s0 = "\u005c"; // backslash
+const string s1 = "\u0041"; // A
+const string s2 = "\u0049\u0063\u0065"; // Ice
+const string s3 = "\u004121"; // A21
+const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041
+const string s5 = "\u00FF"; // ÿ
+const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF)
+const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0)
+const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000)
+const string s9 = "\U0001F34C"; // BANANA (U+1F34C)
+const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna
+
+const string sw0 = "\U0000005c"; // backslash
+const string sw1 = "\U00000041"; // A
+const string sw2 = "\U00000049\U00000063\U00000065"; // Ice
+const string sw3 = "\U0000004121"; // A21
+const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041
+const string sw5 = "\U000000FF"; // ÿ
+const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF)
+const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0)
+const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000)
+const string sw9 = "\U0001F34C"; // BANANA (U+1F34C)
+const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna
+
+/**
+\' single quote byte 0x27 in ASCII encoding
+\" double quote byte 0x22 in ASCII encoding
+\? question mark byte 0x3f in ASCII encoding
+\\ backslash byte 0x5c in ASCII encoding
+\a audible bell byte 0x07 in ASCII encoding
+\b backspace byte 0x08 in ASCII encoding
+\f form feed - new page byte 0x0c in ASCII encoding
+\n line feed - new line byte 0x0a in ASCII encoding
+\r carriage return byte 0x0d in ASCII encoding
+\t horizontal tab byte 0x09 in ASCII encoding
+\v vertical tab byte 0x0b in ASCII encoding
+**/
+
+const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v";
+const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b";
+const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b";
+
+const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */
+const string ss4 = "\\\u0041\\"; /* \A\ */
+const string ss5 = "\\u0041\\"; /* \u0041\ */
+
+//
+// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF)
+// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100)
+// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00)
+// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194)
+// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A)
+// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198)
+// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340)
+// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341)
+// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342)
+// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343)
+const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃";
+const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343";
+const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343";
+
+
};
diff --git a/cpp/test/Ice/operations/TestAMDI.cpp b/cpp/test/Ice/operations/TestAMDI.cpp
index 35c0c4af515..df030eb030d 100644
--- a/cpp/test/Ice/operations/TestAMDI.cpp
+++ b/cpp/test/Ice/operations/TestAMDI.cpp
@@ -13,6 +13,8 @@
#include <functional>
#include <iterator>
+using namespace std;
+
class Thread_opVoid : public IceUtil::Thread
{
public:
@@ -776,4 +778,47 @@ void
MyDerivedClassI::opMyClass1_async(const Test::AMD_MyDerivedClass_opMyClass1Ptr& cb, const Test::MyClass1Ptr& c, const Ice::Current&)
{
cb->ice_response(c);
+}
+
+
+void
+MyDerivedClassI::opStringLiterals_async(const Test::AMD_MyClass_opStringLiteralsPtr& cb,
+ const Ice::Current&)
+{
+ Test::StringS data;
+ data.push_back(Test::s0);
+ data.push_back(Test::s1);
+ data.push_back(Test::s2);
+ data.push_back(Test::s3);
+ data.push_back(Test::s4);
+ data.push_back(Test::s5);
+ data.push_back(Test::s6);
+ data.push_back(Test::s7);
+ data.push_back(Test::s8);
+ data.push_back(Test::s9);
+ data.push_back(Test::s10);
+
+ data.push_back(Test::sw0);
+ data.push_back(Test::sw1);
+ data.push_back(Test::sw2);
+ data.push_back(Test::sw3);
+ data.push_back(Test::sw4);
+ data.push_back(Test::sw5);
+ data.push_back(Test::sw6);
+ data.push_back(Test::sw7);
+ data.push_back(Test::sw8);
+ data.push_back(Test::sw9);
+ data.push_back(Test::sw10);
+
+ data.push_back(Test::ss0);
+ data.push_back(Test::ss1);
+ data.push_back(Test::ss2);
+ data.push_back(Test::ss3);
+ data.push_back(Test::ss4);
+ data.push_back(Test::ss5);
+
+ data.push_back(Test::su0);
+ data.push_back(Test::su1);
+ data.push_back(Test::su2);
+ cb->ice_response(data);
} \ No newline at end of file
diff --git a/cpp/test/Ice/operations/TestAMDI.h b/cpp/test/Ice/operations/TestAMDI.h
index 7fb17d30828..418d0a2aa36 100644
--- a/cpp/test/Ice/operations/TestAMDI.h
+++ b/cpp/test/Ice/operations/TestAMDI.h
@@ -275,6 +275,9 @@ public:
virtual void opMyClass1_async(const Test::AMD_MyDerivedClass_opMyClass1Ptr&,
const Test::MyClass1Ptr&,
const Ice::Current&);
+
+ virtual void opStringLiterals_async(const Test::AMD_MyClass_opStringLiteralsPtr&,
+ const Ice::Current&);
private:
IceUtil::ThreadPtr _opVoidThread;
diff --git a/cpp/test/Ice/operations/TestI.cpp b/cpp/test/Ice/operations/TestI.cpp
index cfc123c162e..29e6728047b 100644
--- a/cpp/test/Ice/operations/TestI.cpp
+++ b/cpp/test/Ice/operations/TestI.cpp
@@ -14,6 +14,8 @@
#include <functional>
#include <iterator>
+using namespace std;
+
MyDerivedClassI::MyDerivedClassI() : _opByteSOnewayCallCount(0)
{
}
@@ -738,4 +740,45 @@ MyDerivedClassI::opMyClass1(const Test::MyClass1Ptr& c, const Ice::Current&)
{
return c;
}
-
+
+Test::StringS
+MyDerivedClassI::opStringLiterals(const Ice::Current&)
+{
+ Test::StringS data;
+ data.push_back(Test::s0);
+ data.push_back(Test::s1);
+ data.push_back(Test::s2);
+ data.push_back(Test::s3);
+ data.push_back(Test::s4);
+ data.push_back(Test::s5);
+ data.push_back(Test::s6);
+ data.push_back(Test::s7);
+ data.push_back(Test::s8);
+ data.push_back(Test::s9);
+ data.push_back(Test::s10);
+
+ data.push_back(Test::sw0);
+ data.push_back(Test::sw1);
+ data.push_back(Test::sw2);
+ data.push_back(Test::sw3);
+ data.push_back(Test::sw4);
+ data.push_back(Test::sw5);
+ data.push_back(Test::sw6);
+ data.push_back(Test::sw7);
+ data.push_back(Test::sw8);
+ data.push_back(Test::sw9);
+ data.push_back(Test::sw10);
+
+ data.push_back(Test::ss0);
+ data.push_back(Test::ss1);
+ data.push_back(Test::ss2);
+ data.push_back(Test::ss3);
+ data.push_back(Test::ss4);
+ data.push_back(Test::ss5);
+
+ data.push_back(Test::su0);
+ data.push_back(Test::su1);
+ data.push_back(Test::su2);
+
+ return data;
+}
diff --git a/cpp/test/Ice/operations/TestI.h b/cpp/test/Ice/operations/TestI.h
index 44398bea49f..4f31f7eecfe 100644
--- a/cpp/test/Ice/operations/TestI.h
+++ b/cpp/test/Ice/operations/TestI.h
@@ -268,6 +268,8 @@ public:
virtual Test::MyStruct1 opMyStruct1(const Test::MyStruct1&, const Ice::Current&);
virtual Test::MyClass1Ptr opMyClass1(const Test::MyClass1Ptr&, const Ice::Current&);
+
+ virtual Test::StringS opStringLiterals(const Ice::Current&);
private:
diff --git a/cpp/test/Ice/operations/Twoways.cpp b/cpp/test/Ice/operations/Twoways.cpp
index ce82b3651bd..fe400c27f27 100644
--- a/cpp/test/Ice/operations/Twoways.cpp
+++ b/cpp/test/Ice/operations/Twoways.cpp
@@ -62,6 +62,85 @@ private:
void
twoways(const Ice::CommunicatorPtr& communicator, const Test::MyClassPrx& p)
{
+ Test::StringS literals = p->opStringLiterals();
+
+ test(Test::s0 == "\\" &&
+ Test::s0 == Test::sw0 &&
+ Test::s0 == literals[0] &&
+ Test::s0 == literals[11]);
+
+ test(Test::s1 == "A" &&
+ Test::s1 == Test::sw1 &&
+ Test::s1 == literals[1] &&
+ Test::s1 == literals[12]);
+
+ test(Test::s2 == "Ice" &&
+ Test::s2 == Test::sw2 &&
+ Test::s2 == literals[2] &&
+ Test::s2 == literals[13]);
+
+ test(Test::s3 == "A21" &&
+ Test::s3 == Test::sw3 &&
+ Test::s3 == literals[3] &&
+ Test::s3 == literals[14]);
+
+ test(Test::s4 == "\\u0041 \\U00000041" &&
+ Test::s4 == Test::sw4 &&
+ Test::s4 == literals[4] &&
+ Test::s4 == literals[15]);
+
+ test(Test::s5 == "\u00FF" &&
+ Test::s5 == Test::sw5 &&
+ Test::s5 == literals[5] &&
+ Test::s5 == literals[16]);
+
+ test(Test::s6 == "\u03FF" &&
+ Test::s6 == Test::sw6 &&
+ Test::s6 == literals[6] &&
+ Test::s6 == literals[17]);
+
+ test(Test::s7 == "\u05F0" &&
+ Test::s7 == Test::sw7 &&
+ Test::s7 == literals[7] &&
+ Test::s7 == literals[18]);
+
+ test(Test::s8 == "\U00010000" &&
+ Test::s8 == Test::sw8 &&
+ Test::s8 == literals[8] &&
+ Test::s8 == literals[19]);
+
+ test(Test::s9 == "\U0001F34C" &&
+ Test::s9 == Test::sw9 &&
+ Test::s9 == literals[9] &&
+ Test::s9 == literals[20]);
+
+ test(Test::s10 == "\u0DA7" &&
+ Test::s10 == Test::sw10 &&
+ Test::s10 == literals[10] &&
+ Test::s10 == literals[21]);
+
+ test(Test::ss0 == "\'\"\?\\\a\b\f\n\r\t\v" &&
+ Test::ss0 == Test::ss1 &&
+ Test::ss0 == Test::ss2 &&
+ Test::ss0 == literals[22] &&
+ Test::ss0 == literals[23] &&
+ Test::ss0 == literals[24]);
+
+ test(Test::ss3 == "\\\\U\\u\\" &&
+ Test::ss3 == literals[25]);
+
+ test(Test::ss4 == "\\A\\" &&
+ Test::ss4 == literals[26]);
+
+ test(Test::ss5 == "\\u0041\\" &&
+ Test::ss5 == literals[27]);
+
+ test(Test::su0 == Test::su1 &&
+ Test::su0 == Test::su2 &&
+ Test::su0 == literals[28] &&
+ Test::su0 == literals[29] &&
+ Test::su0 == literals[30]);
+
{
p->ice_ping();
}
diff --git a/cpp/test/Slice/errorDetection/ConstDef.err b/cpp/test/Slice/errorDetection/ConstDef.err
index 4765aa8da62..7784f8c8fef 100644
--- a/cpp/test/Slice/errorDetection/ConstDef.err
+++ b/cpp/test/Slice/errorDetection/ConstDef.err
@@ -21,6 +21,15 @@ ConstDef.ice:124: initializer `-1' for constant `b3' out of range for type byte
ConstDef.ice:125: initializer `256' for constant `b4' out of range for type byte
ConstDef.ice:127: illegal NUL character in string constant
ConstDef.ice:128: illegal NUL character in string constant
-ConstDef.ice:135: initializer `32767' for constant `c5' out of range for type byte
-ConstDef.ice:136: initializer `2147483647' for constant `c6' out of range for type short
-ConstDef.ice:137: initializer `9223372036854775807' for constant `c7' out of range for type int
+ConstDef.ice:129: illegal NUL character in string constant
+ConstDef.ice:130: illegal NUL character in string constant
+ConstDef.ice:137: initializer `32767' for constant `c5' out of range for type byte
+ConstDef.ice:138: initializer `2147483647' for constant `c6' out of range for type short
+ConstDef.ice:139: initializer `9223372036854775807' for constant `c7' out of range for type int
+ConstDef.ice:150: warning: unknown escape sequence '\g'
+ConstDef.ice:151: unknown escape sequence in string constant: "a\u000N
+ConstDef.ice:152: unknown escape sequence in string constant: "a\U0000000K
+ConstDef.ice:153: warning: octal escape sequence out of range: '\455'
+ConstDef.ice:154: warning: hex escape sequence out of range: '\xfff'
+ConstDef.ice:155: unknown escape sequence in string constant: '\ud83c'
+ConstDef.ice:155: unknown escape sequence in string constant: '\udf4c' \ No newline at end of file
diff --git a/cpp/test/Slice/errorDetection/ConstDef.ice b/cpp/test/Slice/errorDetection/ConstDef.ice
index 934423fade6..e6682e80300 100644
--- a/cpp/test/Slice/errorDetection/ConstDef.ice
+++ b/cpp/test/Slice/errorDetection/ConstDef.ice
@@ -23,7 +23,7 @@ const int intconst = 0;
const long longconst = 0;
const float floatconst = 0.;
const double doubleconst = 0.;
-const string stringconst = "X\aX\x00001X\rX\007\xffX\xffffX\xff7f";
+const string stringconst = "X\aX\x00001X\rX\007\xff\xff\xff";
const string stringconst2 = "Hello World!";
enum color { red, green, blue };
const color colorconst = blue;
@@ -126,6 +126,8 @@ const byte b4 = 256; // overflow
const string nullstring1 = "a\000";
const string nullstring2 = "a\x000";
+const string nullstring3 = "a\u0000";
+const string nullstring4 = "a\U00000000";
const byte c1 = l1; // OK
const short c2 = l1; // OK
@@ -145,4 +147,11 @@ const string c12 = stringconst; // OK
const color c13 = colorconst; // OK
+const string unknowescape = "a\g"; // Unknown escape sequence
+const string invalidCodepoint = "a\u000N"; // Invalid code point
+const string invalidCodepoint1 = "a\U0000000K"; // Invalid code point
+const string octalRange = "\455"; // OCT escape sequence out of range
+const string hexRange = "\xFFF"; // HEX escape sequence out of range
+const string surrogatePair = "\uD83C\uDF4C"; // surrogate pair not allow in slice
+
};