diff options
author | Jose <jose@zeroc.com> | 2016-03-08 13:46:55 +0100 |
---|---|---|
committer | Jose <jose@zeroc.com> | 2016-03-08 13:46:55 +0100 |
commit | 2bd402833bfdb54c1940dd0038be8af05d6f5e6f (patch) | |
tree | eb7be3853dc45452397b730e586434f6e859efb3 | |
parent | Windows fixes for icegriddb/icestormdb (diff) | |
download | ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.bz2 ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.xz ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.zip |
ICE-6991 - Add support for unicode escape sequences
53 files changed, 3221 insertions, 541 deletions
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp index cae3476e277..7bad1d67c17 100644 --- a/cpp/src/IceUtil/Unicode.cpp +++ b/cpp/src/IceUtil/Unicode.cpp @@ -128,4 +128,42 @@ IceUtilInternal::convertUTF8ToUTFWstring(const Byte*& sourceStart, const Byte* s return result; } +ConversionResult +IceUtilInternal::convertUTF8ToUTF16(const vector<unsigned char>& source, vector<unsigned short>& target, ConversionFlags flags) +{ + target.resize(source.size()); + const unsigned char* sourceStart = &source[0]; + const unsigned char* sourceEnd = &source[0] + source.size(); + + unsigned short* targetStart = &target[0]; + unsigned short* targetEnd = &target[0] + target.size(); + ConversionResult result = ConvertUTF8toUTF16(&sourceStart, sourceEnd, &targetStart, targetEnd, flags); + + if(result == conversionOK) + { + target.resize(targetStart - &target[0]); + } + return result; +} + +ConversionResult +IceUtilInternal::convertUTF32ToUTF8(const vector<unsigned int>& source, vector<unsigned char>& target, ConversionFlags flags) +{ + target.resize(source.size() * 4); + + const unsigned int* sourceStart = &source[0]; + const unsigned int* sourceEnd = &source[0] + source.size(); + + unsigned char* targetStart = &target[0]; + unsigned char* targetEnd = &target[0] + target.size(); + ConversionResult result = ConvertUTF32toUTF8(&sourceStart, sourceEnd, &targetStart, targetEnd, flags); + + if(result == conversionOK) + { + target.resize(targetStart - &target[0]); + } + return result; +} + + diff --git a/cpp/src/IceUtil/Unicode.h b/cpp/src/IceUtil/Unicode.h index 00333ce8a44..2c96d6c6448 100644 --- a/cpp/src/IceUtil/Unicode.h +++ b/cpp/src/IceUtil/Unicode.h @@ -44,6 +44,15 @@ ConversionResult convertUTF8ToUTFWstring(const IceUtil::Byte*& sourceStart, const IceUtil::Byte* sourceEnd, std::wstring& target, IceUtil::ConversionFlags flags); + +ICE_UTIL_API ConversionResult +convertUTF8ToUTF16(const std::vector<unsigned char>&, std::vector<unsigned short>&, + IceUtil::ConversionFlags); + +ICE_UTIL_API ConversionResult +convertUTF32ToUTF8(const std::vector<unsigned int>&, std::vector<unsigned char>&, + IceUtil::ConversionFlags); + } #endif diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp index 220e521d85d..92f657e8221 100644 --- a/cpp/src/Slice/PythonUtil.cpp +++ b/cpp/src/Slice/PythonUtil.cpp @@ -13,6 +13,7 @@ #include <IceUtil/IceUtil.h> #include <IceUtil/StringUtil.h> #include <IceUtil/InputUtil.h> +#include <IceUtil/Unicode.h> #include <climits> #include <iterator> @@ -1879,68 +1880,138 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax _out << "\""; // Opening " - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + for(size_t i = 0; i < value.size();) { - switch(*c) + char c = value[i]; + switch(c) { - case '"': - { - _out << "\\\""; - break; - } - case '\\': - { - _out << "\\\\"; - break; - } - case '\r': - { - _out << "\\r"; - break; - } - case '\n': - { - _out << "\\n"; - break; - } - case '\t': - { - _out << "\\t"; - break; - } - case '\b': - { - _out << "\\b"; - break; - } - case '\f': - { - _out << "\\f"; - break; - } - default: - { - if(charSet.find(*c) == charSet.end()) + case '"': { - unsigned char uc = *c; // Char may be signed, so make it positive. - stringstream s; - s << "\\"; // Print as octal if not in basic source character set. - s.flags(ios_base::oct); - s.width(3); - s.fill('0'); - s << static_cast<unsigned>(uc); - _out << s.str(); + _out << "\\\""; + break; } - else + case '\\': { - _out << *c; // Print normally if in basic source character set. + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) + { + // + // Convert codepoint to UTF8 bytes and write the escaped bytes + // + _out << s.substr(0, s.size() - 1); + + size_t sz = value[j] == 'U' ? 8 : 4; + string codepoint = value.substr(j + 1, sz); + assert(codepoint.size() == sz); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + vector<unsigned int> u32buffer; + u32buffer.push_back(v); + + vector<unsigned char> u8buffer; + IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + ostringstream s; + for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) + { + s << "\\"; + s.fill('0'); + s.width(3); + s << oct; + s << static_cast<unsigned int>(*q); + } + _out << s.str(); + + i = j + 1 + sz; + } + else + { + _out << s; + i = j; + } + continue; + } + case '\r': + { + _out << "\\r"; + break; + } + case '\n': + { + _out << "\\n"; + break; + } + case '\t': + { + _out << "\\t"; + break; + } + case '\b': + { + _out << "\\b"; + break; + } + case '\f': + { + _out << "\\f"; + break; + } + default: + { + if(charSet.find(c) == charSet.end()) + { + unsigned char uc = c; // Char may be signed, so make it positive. + stringstream s; + s << "\\"; // Print as octal if not in basic source character set. + s.flags(ios_base::oct); + s.width(3); + s.fill('0'); + s << static_cast<unsigned>(uc); + _out << s.str(); + } + else + { + _out << c; // Print normally if in basic source character set. + } + break; } - break; - } } + ++i; } - _out << "\""; // Closing " + _out << "\""; // Closing " break; } case Slice::Builtin::KindObject: diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp index 3e6fe7557ac..fbed4764bbe 100644 --- a/cpp/src/Slice/RubyUtil.cpp +++ b/cpp/src/Slice/RubyUtil.cpp @@ -12,6 +12,7 @@ #include <Slice/Util.h> #include <IceUtil/Functional.h> #include <IceUtil/InputUtil.h> +#include <IceUtil/Unicode.h> #include <iterator> using namespace std; @@ -1470,65 +1471,134 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr _out << "\""; // Opening " - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + for(size_t i = 0; i < value.size();) { - switch(*c) + char c = value[i]; + switch(c) { - case '"': - { - _out << "\\\""; - break; - } - case '\\': - { - _out << "\\\\"; - break; - } - case '\r': - { - _out << "\\r"; - break; - } - case '\n': - { - _out << "\\n"; - break; - } - case '\t': - { - _out << "\\t"; - break; - } - case '\b': - { - _out << "\\b"; - break; - } - case '\f': - { - _out << "\\f"; - break; - } - default: - { - if(charSet.find(*c) == charSet.end()) + case '"': { - unsigned char uc = *c; // Char may be signed, so make it positive. - stringstream s; - s << "\\"; // Print as octal if not in basic source character set. - s.flags(ios_base::oct); - s.width(3); - s.fill('0'); - s << static_cast<unsigned>(uc); - _out << s.str(); + _out << "\\\""; + break; } - else + case '\\': { - _out << *c; // Print normally if in basic source character set. + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) + { + // + // Convert codepoint to UTF8 bytes and write the escaped bytes + // + _out << s.substr(0, s.size() - 1); + + size_t sz = value[j] == 'U' ? 8 : 4; + string codepoint = value.substr(j + 1, sz); + assert(codepoint.size() == sz); + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + vector<unsigned int> u32buffer; + u32buffer.push_back(v); + + vector<unsigned char> u8buffer; + IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + ostringstream s; + for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) + { + s << "\\"; + s.fill('0'); + s.width(3); + s << oct; + s << static_cast<unsigned int>(*q); + } + _out << s.str(); + + i = j + 1 + sz; + } + else + { + _out << s; + i = j; + } + continue; + } + case '\r': + { + _out << "\\r"; + break; + } + case '\n': + { + _out << "\\n"; + break; + } + case '\t': + { + _out << "\\t"; + break; + } + case '\b': + { + _out << "\\b"; + break; + } + case '\f': + { + _out << "\\f"; + break; + } + default: + { + if(charSet.find(c) == charSet.end()) + { + unsigned char uc = c; // Char may be signed, so make it positive. + stringstream s; + s << "\\"; // Print as octal if not in basic source character set. + s.flags(ios_base::oct); + s.width(3); + s.fill('0'); + s << static_cast<unsigned>(uc); + _out << s.str(); + } + else + { + _out << c; // Print normally if in basic source character set. + } + break; } - break; - } } + ++i; } _out << "\""; // Closing " @@ -1635,6 +1705,7 @@ Slice::Ruby::CodeVisitor::collectExceptionMembers(const ExceptionPtr& p, MemberI void Slice::Ruby::generate(const UnitPtr& un, bool all, bool checksum, const vector<string>& includePaths, Output& out) { + out <<"# encoding: utf-8"; out << nl << "require 'Ice'"; if(!all) diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp index 0a4fdad538d..fc2fc290dac 100644 --- a/cpp/src/Slice/Scanner.cpp +++ b/cpp/src/Slice/Scanner.cpp @@ -29,7 +29,7 @@ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 +#define YY_FLEX_SUBMINOR_VERSION 39 #if YY_FLEX_SUBMINOR_VERSION > 0 #define FLEX_BETA #endif @@ -74,7 +74,6 @@ typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; -#endif /* ! C99 */ /* Limits of integral types. */ #ifndef INT8_MIN @@ -105,6 +104,8 @@ typedef unsigned int flex_uint32_t; #define UINT32_MAX (4294967295U) #endif +#endif /* ! C99 */ + #endif /* ! FLEXINT_H */ #ifdef __cplusplus @@ -161,7 +162,15 @@ typedef unsigned int flex_uint32_t; /* Size of default input buffer. */ #ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else #define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ #endif /* The state buf must be large enough to hold one state per character in the main buffer. @@ -173,7 +182,12 @@ typedef unsigned int flex_uint32_t; typedef struct yy_buffer_state *YY_BUFFER_STATE; #endif -extern int slice_leng; +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +extern yy_size_t slice_leng; extern FILE *slice_in, *slice_out; @@ -182,6 +196,7 @@ extern FILE *slice_in, *slice_out; #define EOB_ACT_LAST_MATCH 2 #define YY_LESS_LINENO(n) + #define YY_LINENO_REWIND_TO(ptr) /* Return all but the first "n" matched characters back to the input stream. */ #define yyless(n) \ @@ -199,11 +214,6 @@ extern FILE *slice_in, *slice_out; #define unput(c) yyunput( c, (yytext_ptr) ) -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - #ifndef YY_STRUCT_YY_BUFFER_STATE #define YY_STRUCT_YY_BUFFER_STATE struct yy_buffer_state @@ -221,7 +231,7 @@ struct yy_buffer_state /* Number of characters read into yy_ch_buf, not including EOB * characters. */ - int yy_n_chars; + yy_size_t yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to @@ -291,8 +301,8 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ /* yy_hold_char holds the character lost when slice_text is formed. */ static char yy_hold_char; -static int yy_n_chars; /* number of characters read into yy_ch_buf */ -int slice_leng; +static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */ +yy_size_t slice_leng; /* Points to current character in buffer. */ static char *yy_c_buf_p = (char *) 0; @@ -320,7 +330,7 @@ static void slice__init_buffer (YY_BUFFER_STATE b,FILE *file ); YY_BUFFER_STATE slice__scan_buffer (char *base,yy_size_t size ); YY_BUFFER_STATE slice__scan_string (yyconst char *yy_str ); -YY_BUFFER_STATE slice__scan_bytes (yyconst char *bytes,int len ); +YY_BUFFER_STATE slice__scan_bytes (yyconst char *bytes,yy_size_t len ); void *slice_alloc (yy_size_t ); void *slice_realloc (void *,yy_size_t ); @@ -352,7 +362,7 @@ void slice_free (void * ); /* Begin user sect3 */ -#define slice_wrap(n) 1 +#define slice_wrap() 1 #define YY_SKIP_YYWRAP typedef unsigned char YY_CHAR; @@ -558,6 +568,8 @@ char *slice_text; #include <Slice/Grammar.h> #include <IceUtil/InputUtil.h> +#include <iomanip> + #include <stdlib.h> #include <math.h> @@ -620,7 +632,7 @@ int checkKeyword(string&); -#line 623 "lex.yy.c" +#line 635 "lex.yy.c" #define INITIAL 0 #define BOMSCAN 1 @@ -661,7 +673,7 @@ FILE *slice_get_out (void ); void slice_set_out (FILE * out_str ); -int slice_get_leng (void ); +yy_size_t slice_get_leng (void ); char *slice_get_text (void ); @@ -703,7 +715,12 @@ static int input (void ); /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else #define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ #endif /* Copy whatever the last rule matched to the standard output. */ @@ -722,7 +739,7 @@ static int input (void ); if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ { \ int c = '*'; \ - unsigned n; \ + size_t n; \ for ( n = 0; n < max_size && \ (c = getc( slice_in )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ @@ -807,11 +824,6 @@ YY_DECL register char *yy_cp, *yy_bp; register int yy_act; -#line 92 "Scanner.l" - - -#line 813 "lex.yy.c" - if ( !(yy_init) ) { (yy_init) = 1; @@ -838,6 +850,12 @@ YY_DECL slice__load_buffer_state( ); } + { +#line 94 "Scanner.l" + + +#line 857 "lex.yy.c" + while ( 1 ) /* loops until end-of-file is reached */ { yy_cp = (yy_c_buf_p); @@ -855,7 +873,7 @@ YY_DECL yy_match: do { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; @@ -895,7 +913,7 @@ case 1: (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up slice_text again */ YY_RULE_SETUP -#line 94 "Scanner.l" +#line 96 "Scanner.l" { if(unit->scanPosition(slice_text)) { @@ -906,10 +924,11 @@ YY_RULE_SETUP case 2: /* rule 2 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */ +YY_LINENO_REWIND_TO(yy_cp - 1); (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up slice_text again */ YY_RULE_SETUP -#line 101 "Scanner.l" +#line 103 "Scanner.l" { if(unit->scanPosition(slice_text)) { @@ -922,7 +941,7 @@ case 3: (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up slice_text again */ YY_RULE_SETUP -#line 108 "Scanner.l" +#line 110 "Scanner.l" { if(unit->scanPosition(slice_text)) { @@ -933,10 +952,11 @@ YY_RULE_SETUP case 4: /* rule 4 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */ +YY_LINENO_REWIND_TO(yy_cp - 1); (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up slice_text again */ YY_RULE_SETUP -#line 115 "Scanner.l" +#line 117 "Scanner.l" { if(unit->scanPosition(slice_text)) { @@ -946,7 +966,7 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 122 "Scanner.l" +#line 124 "Scanner.l" { // C++-style comment BEGIN(MAINSCAN); @@ -964,7 +984,7 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 137 "Scanner.l" +#line 139 "Scanner.l" { // C-style comment BEGIN(MAINSCAN); @@ -1008,7 +1028,7 @@ YY_RULE_SETUP YY_BREAK case 7: YY_RULE_SETUP -#line 178 "Scanner.l" +#line 180 "Scanner.l" { BEGIN(MAINSCAN); return ICE_SCOPE_DELIMITER; @@ -1016,7 +1036,7 @@ YY_RULE_SETUP YY_BREAK case 8: YY_RULE_SETUP -#line 183 "Scanner.l" +#line 185 "Scanner.l" { BEGIN(MAINSCAN); return ICE_METADATA_OPEN; @@ -1024,7 +1044,7 @@ YY_RULE_SETUP YY_BREAK case 9: YY_RULE_SETUP -#line 188 "Scanner.l" +#line 190 "Scanner.l" { BEGIN(MAINSCAN); return ICE_METADATA_CLOSE; @@ -1032,7 +1052,7 @@ YY_RULE_SETUP YY_BREAK case 10: YY_RULE_SETUP -#line 193 "Scanner.l" +#line 195 "Scanner.l" { BEGIN(MAINSCAN); return ICE_GLOBAL_METADATA_OPEN; @@ -1040,7 +1060,7 @@ YY_RULE_SETUP YY_BREAK case 11: YY_RULE_SETUP -#line 198 "Scanner.l" +#line 200 "Scanner.l" { BEGIN(MAINSCAN); return ICE_GLOBAL_METADATA_CLOSE; @@ -1049,7 +1069,7 @@ YY_RULE_SETUP case 12: /* rule 12 can match eol */ YY_RULE_SETUP -#line 203 "Scanner.l" +#line 205 "Scanner.l" { BEGIN(MAINSCAN); StringTokPtr ident = new StringTok; @@ -1077,7 +1097,7 @@ YY_RULE_SETUP YY_BREAK case 13: YY_RULE_SETUP -#line 228 "Scanner.l" +#line 230 "Scanner.l" { BEGIN(MAINSCAN); StringTokPtr ident = new StringTok; @@ -1088,163 +1108,279 @@ YY_RULE_SETUP YY_BREAK case 14: YY_RULE_SETUP -#line 236 "Scanner.l" +#line 238 "Scanner.l" { BEGIN(MAINSCAN); StringTokPtr str = new StringTok; str->literal = "\""; while(true) { - char c = static_cast<char>(yyinput()); + char c = static_cast<char>(yyinput()); str->literal += c; - if(c == '"') - { - break; - } - else if(c == EOF) - { - unit->error("EOF in string"); - break; - } - else if(c == '\n') - { - unit->error("newline in string"); - } - else if(c == '\\') - { - char next = static_cast<char>(yyinput()); + if(c == '"') + { + break; + } + else if(c == EOF) + { + unit->error("EOF in string"); + break; + } + else if(c == '\n') + { + unit->error("newline in string"); + } + else if(c == '\\') + { + char next = static_cast<char>(yyinput()); str->literal += next; - switch(next) - { - case '\\': - case '"': - case '\'': - { - str->v += next; - break; - } - - case 'n': - { - str->v += '\n'; - break; - } - - case 'r': - { - str->v += '\r'; - break; - } - - case 't': - { - str->v += '\t'; - break; - } - - case 'v': - { - str->v += '\v'; - break; - } - - case 'f': - { - str->v += '\f'; - break; - } - - case 'a': - { - str->v += '\a'; - break; - } - - case 'b': - { - str->v += '\b'; - break; - } - - case '?': - { - str->v += '\?'; - break; - } - - case '0': - case '1': - case '2': - case '3': - { - static string octalDigits = "01234567"; - unsigned short us = next - '0'; - if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos) - { + switch(next) + { + case '\\': + { + str->v += '\\'; + str->v += '\\'; + break; + } + case '"': + case '\'': + { + str->v += next; + break; + } + + case 'n': + { + str->v += '\n'; + break; + } + + case 'r': + { + str->v += '\r'; + break; + } + + case 't': + { + str->v += '\t'; + break; + } + + case 'v': + { + str->v += '\v'; + break; + } + + case 'f': + { + str->v += '\f'; + break; + } + + case 'a': + { + str->v += '\a'; + break; + } + + case 'b': + { + str->v += '\b'; + break; + } + + case '?': + { + str->v += '\?'; + break; + } + + // + // Octal value \nnn limited to three octal digits but terminate at the first character + // that is not a valid octal digit if encountered sooner. + // + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '7': + { + static string octalDigits = "01234567"; + IceUtil::Int64 value = 0; + string escape; + escape += next; + for(int i = 0; i < 2; ++i) + { + next = static_cast<char>(yyinput()); + if(octalDigits.find_first_of(next) == string::npos) + { + unput(next); + break; + } + escape += next; + } + str->literal += escape; + value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8); + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + else if(value > 255) + { + ostringstream os; + os << "octal escape sequence out of range: '\\" << oct << value << "'"; + unit->warning(os.str()); + } + str->v += static_cast<char>(value); + break; + } + case 'x': + { + IceUtil::Int64 value = 0; + string escape = ""; + while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) + { + escape += next; + } + unput(next); + + str->literal += escape; + value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16); + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + else if(value > 255) + { + ostringstream os; + os << "hex escape sequence out of range: '\\x" << hex << value << "'"; + unit->warning(os.str()); + } + str->v += static_cast<char>(value); + break; + } + + // + // Universal character name \unnnn code point U+nnnn + // + case 'u': + { + IceUtil::Int64 value = 0; + string escape = ""; + + for(int i = 0; i < 4; ++i) + { + next = static_cast<char>(yyinput()); str->literal += next; - us = us * 8 + next - '0'; - if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos) - { - us = us * 8 + next - '0'; - } - else - { - unput(next); - } - } - else - { - unput(next); - } - if(us == 0) - { - unit->error("illegal NUL character in string constant"); - } - str->v += static_cast<char>(us); - break; - } - case 'x': - { - IceUtil::Int64 ull = 0; - while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) - { + if(!isxdigit(static_cast<unsigned char>(next))) + { + unit->error("unknown escape sequence in string constant: " + str->literal); + break; + } + escape += next; + } + + value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; + + ostringstream os; + os << '\\' << 'u'; + os.fill('0'); + os.width(4); + os << hex << value; + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + + + // + // Determine if a character is a surrogate: + // + // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive + // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. + // + else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff)) + { + unit->error("unknown escape sequence in string constant: '" + os.str() + "'"); + } + + str->v += os.str(); + + break; + } + + case 'U': + { + IceUtil::Int64 value = 0; + string escape = ""; + + for(int i = 0; i < 8; ++i) + { + next = static_cast<char>(yyinput()); str->literal += next; - ull *= 16; - if(isdigit(static_cast<unsigned char>(next))) - { - ull += next - '0'; - } - else if(islower(static_cast<unsigned char>(next))) - { - ull += next - 'a' + 10; - } - else - { - ull += next - 'A' + 10; - } - } - unput(next); - if(ull == 0) - { - unit->error("illegal NUL character in string constant"); - } - str->v += static_cast<char>(ull); - break; - } - - // TODO: add universal character names - - default: - { - str->v += c; - unput(next); - } - } - } - else - { - str->v += c; - } + if(!isxdigit(static_cast<unsigned char>(next))) + { + + unit->error("unknown escape sequence in string constant: " + str->literal); + break; + } + escape += next; + } + + value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; + + ostringstream os; + os << '\\' << 'U'; + os.fill('0'); + os.width(8); + os << hex << value; + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + + // + // Determine if a character is a surrogate: + // + // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive + // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. + // + else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff)) + { + unit->error("unknown escape sequence in string constant: '" + os.str() + "'"); + } + + str->v += os.str(); + break; + } + + default: + { + ostringstream os; + os << "unknown escape sequence '\\" << next << "'"; + unit->warning(os.str()); + // + // We escape the backslack in a unknown escape sequence + // to keep compativility with 3.6" + // + str->v += '\\'; + str->v += c; + unput(next); + } + } + } + else + { + str->v += c; + } } *yylvalp = str; return ICE_STRING_LITERAL; @@ -1252,7 +1388,7 @@ YY_RULE_SETUP YY_BREAK case 15: YY_RULE_SETUP -#line 397 "Scanner.l" +#line 515 "Scanner.l" { BEGIN(MAINSCAN); IntegerTokPtr itp = new IntegerTok; @@ -1271,7 +1407,7 @@ YY_RULE_SETUP YY_BREAK case 16: YY_RULE_SETUP -#line 413 "Scanner.l" +#line 531 "Scanner.l" { BEGIN(MAINSCAN); errno = 0; @@ -1305,7 +1441,7 @@ YY_RULE_SETUP case 17: /* rule 17 can match eol */ YY_RULE_SETUP -#line 443 "Scanner.l" +#line 561 "Scanner.l" { // Ignore white-space @@ -1321,7 +1457,7 @@ YY_RULE_SETUP YY_BREAK case 18: YY_RULE_SETUP -#line 456 "Scanner.l" +#line 574 "Scanner.l" { // Ignore UTF-8 BOM, rule only active when parsing start of file. @@ -1330,7 +1466,7 @@ YY_RULE_SETUP YY_BREAK case 19: YY_RULE_SETUP -#line 462 "Scanner.l" +#line 580 "Scanner.l" { BEGIN(MAINSCAN); if(slice_text[0] < 32 || slice_text[0] > 126) @@ -1349,10 +1485,10 @@ YY_RULE_SETUP YY_BREAK case 20: YY_RULE_SETUP -#line 478 "Scanner.l" +#line 596 "Scanner.l" ECHO; YY_BREAK -#line 1355 "lex.yy.c" +#line 1491 "lex.yy.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(BOMSCAN): case YY_STATE_EOF(MAINSCAN): @@ -1486,6 +1622,7 @@ case YY_STATE_EOF(MAINSCAN): "fatal flex scanner internal error--no action found" ); } /* end of action switch */ } /* end of scanning one token */ + } /* end of user's declarations */ } /* end of slice_lex */ /* yy_get_next_buffer - try to read in a new buffer @@ -1541,21 +1678,21 @@ static int yy_get_next_buffer (void) else { - int num_to_read = + yy_size_t num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER; + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; int yy_c_buf_p_offset = (int) ((yy_c_buf_p) - b->yy_ch_buf); if ( b->yy_is_our_buffer ) { - int new_size = b->yy_buf_size * 2; + yy_size_t new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; @@ -1586,7 +1723,7 @@ static int yy_get_next_buffer (void) /* Read in more data. */ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - (yy_n_chars), (size_t) num_to_read ); + (yy_n_chars), num_to_read ); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } @@ -1682,7 +1819,7 @@ static int yy_get_next_buffer (void) yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; yy_is_jam = (yy_current_state == 72); - return yy_is_jam ? 0 : yy_current_state; + return yy_is_jam ? 0 : yy_current_state; } static void yyunput (int c, register char * yy_bp ) @@ -1697,7 +1834,7 @@ static int yy_get_next_buffer (void) if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) { /* need to shift things up to make room */ /* +2 for EOB chars. */ - register int number_to_move = (yy_n_chars) + 2; + register yy_size_t number_to_move = (yy_n_chars) + 2; register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; register char *source = @@ -1746,7 +1883,7 @@ static int yy_get_next_buffer (void) else { /* need more input */ - int offset = (yy_c_buf_p) - (yytext_ptr); + yy_size_t offset = (yy_c_buf_p) - (yytext_ptr); ++(yy_c_buf_p); switch ( yy_get_next_buffer( ) ) @@ -2020,7 +2157,7 @@ void slice_pop_buffer_state (void) */ static void slice_ensure_buffer_stack (void) { - int num_to_alloc; + yy_size_t num_to_alloc; if (!(yy_buffer_stack)) { @@ -2112,17 +2249,17 @@ YY_BUFFER_STATE slice__scan_string (yyconst char * yystr ) /** Setup the input buffer state to scan the given bytes. The next call to slice_lex() will * scan from a @e copy of @a bytes. - * @param bytes the byte buffer to scan - * @param len the number of bytes in the buffer pointed to by @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. * * @return the newly allocated buffer state object. */ -YY_BUFFER_STATE slice__scan_bytes (yyconst char * yybytes, int _yybytes_len ) +YY_BUFFER_STATE slice__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len ) { YY_BUFFER_STATE b; char *buf; yy_size_t n; - int i; + yy_size_t i; /* Get memory for full buffer, including space for trailing EOB's. */ n = _yybytes_len + 2; @@ -2204,7 +2341,7 @@ FILE *slice_get_out (void) /** Get the length of the current token. * */ -int slice_get_leng (void) +yy_size_t slice_get_leng (void) { return slice_leng; } @@ -2352,7 +2489,7 @@ void slice_free (void * ptr ) #define YYTABLES_NAME "yytables" -#line 478 "Scanner.l" +#line 595 "Scanner.l" diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l index a5f8d439a2d..9054c1e90ed 100644 --- a/cpp/src/Slice/Scanner.l +++ b/cpp/src/Slice/Scanner.l @@ -13,6 +13,8 @@ #include <Slice/Grammar.h> #include <IceUtil/InputUtil.h> +#include <iomanip> + #include <stdlib.h> #include <math.h> @@ -239,156 +241,272 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]+{e str->literal = "\""; while(true) { - char c = static_cast<char>(yyinput()); + char c = static_cast<char>(yyinput()); str->literal += c; - if(c == '"') - { - break; - } - else if(c == EOF) - { - unit->error("EOF in string"); - break; - } - else if(c == '\n') - { - unit->error("newline in string"); - } - else if(c == '\\') - { - char next = static_cast<char>(yyinput()); + if(c == '"') + { + break; + } + else if(c == EOF) + { + unit->error("EOF in string"); + break; + } + else if(c == '\n') + { + unit->error("newline in string"); + } + else if(c == '\\') + { + char next = static_cast<char>(yyinput()); str->literal += next; - switch(next) - { - case '\\': - case '"': - case '\'': - { - str->v += next; - break; - } - - case 'n': - { - str->v += '\n'; - break; - } - - case 'r': - { - str->v += '\r'; - break; - } - - case 't': - { - str->v += '\t'; - break; - } - - case 'v': - { - str->v += '\v'; - break; - } - - case 'f': - { - str->v += '\f'; - break; - } - - case 'a': - { - str->v += '\a'; - break; - } - - case 'b': - { - str->v += '\b'; - break; - } - - case '?': - { - str->v += '\?'; - break; - } - - case '0': - case '1': - case '2': - case '3': - { - static string octalDigits = "01234567"; - unsigned short us = next - '0'; - if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos) - { + switch(next) + { + case '\\': + { + str->v += '\\'; + str->v += '\\'; + break; + } + case '"': + case '\'': + { + str->v += next; + break; + } + + case 'n': + { + str->v += '\n'; + break; + } + + case 'r': + { + str->v += '\r'; + break; + } + + case 't': + { + str->v += '\t'; + break; + } + + case 'v': + { + str->v += '\v'; + break; + } + + case 'f': + { + str->v += '\f'; + break; + } + + case 'a': + { + str->v += '\a'; + break; + } + + case 'b': + { + str->v += '\b'; + break; + } + + case '?': + { + str->v += '\?'; + break; + } + + // + // Octal value \nnn limited to three octal digits but terminate at the first character + // that is not a valid octal digit if encountered sooner. + // + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '7': + { + static string octalDigits = "01234567"; + IceUtil::Int64 value = 0; + string escape; + escape += next; + for(int i = 0; i < 2; ++i) + { + next = static_cast<char>(yyinput()); + if(octalDigits.find_first_of(next) == string::npos) + { + unput(next); + break; + } + escape += next; + } + str->literal += escape; + value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8); + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + else if(value > 255) + { + ostringstream os; + os << "octal escape sequence out of range: '\\" << oct << value << "'"; + unit->warning(os.str()); + } + str->v += static_cast<char>(value); + break; + } + case 'x': + { + IceUtil::Int64 value = 0; + string escape = ""; + while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) + { + escape += next; + } + unput(next); + + str->literal += escape; + value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16); + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + else if(value > 255) + { + ostringstream os; + os << "hex escape sequence out of range: '\\x" << hex << value << "'"; + unit->warning(os.str()); + } + str->v += static_cast<char>(value); + break; + } + + // + // Universal character name \unnnn code point U+nnnn + // + case 'u': + { + IceUtil::Int64 value = 0; + string escape = ""; + + for(int i = 0; i < 4; ++i) + { + next = static_cast<char>(yyinput()); str->literal += next; - us = us * 8 + next - '0'; - if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos) - { - us = us * 8 + next - '0'; - } - else - { - unput(next); - } - } - else - { - unput(next); - } - if(us == 0) - { - unit->error("illegal NUL character in string constant"); - } - str->v += static_cast<char>(us); - break; - } - case 'x': - { - IceUtil::Int64 ull = 0; - while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) - { + if(!isxdigit(static_cast<unsigned char>(next))) + { + unit->error("unknown escape sequence in string constant: " + str->literal); + break; + } + escape += next; + } + + value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; + + ostringstream os; + os << '\\' << 'u'; + os.fill('0'); + os.width(4); + os << hex << value; + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + + + // + // Determine if a character is a surrogate: + // + // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive + // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. + // + else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff)) + { + unit->error("unknown escape sequence in string constant: '" + os.str() + "'"); + } + + str->v += os.str(); + + break; + } + + case 'U': + { + IceUtil::Int64 value = 0; + string escape = ""; + + for(int i = 0; i < 8; ++i) + { + next = static_cast<char>(yyinput()); str->literal += next; - ull *= 16; - if(isdigit(static_cast<unsigned char>(next))) - { - ull += next - '0'; - } - else if(islower(static_cast<unsigned char>(next))) - { - ull += next - 'a' + 10; - } - else - { - ull += next - 'A' + 10; - } - } - unput(next); - if(ull == 0) - { - unit->error("illegal NUL character in string constant"); - } - str->v += static_cast<char>(ull); - break; - } - - // TODO: add universal character names - - default: - { - str->v += c; - unput(next); - } - } - } - else - { - str->v += c; - } + if(!isxdigit(static_cast<unsigned char>(next))) + { + + unit->error("unknown escape sequence in string constant: " + str->literal); + break; + } + escape += next; + } + + value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; + + ostringstream os; + os << '\\' << 'U'; + os.fill('0'); + os.width(8); + os << hex << value; + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + + // + // Determine if a character is a surrogate: + // + // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive + // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. + // + else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff)) + { + unit->error("unknown escape sequence in string constant: '" + os.str() + "'"); + } + + str->v += os.str(); + break; + } + + default: + { + ostringstream os; + os << "unknown escape sequence '\\" << next << "'"; + unit->warning(os.str()); + // + // We escape the backslack in a unknown escape sequence + // to keep compativility with 3.6" + // + str->v += '\\'; + str->v += c; + unput(next); + } + } + } + else + { + str->v += c; + } } *yylvalp = str; return ICE_STRING_LITERAL; diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp index 9937c9b0f30..bcb5d5d2475 100644 --- a/cpp/src/slice2cpp/Gen.cpp +++ b/cpp/src/slice2cpp/Gen.cpp @@ -93,13 +93,13 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt { switch(*c) { - case '\\': case '"': { out << "\\"; break; } } + out << *c; // Print normally if in basic source character set } } diff --git a/cpp/src/slice2cs/Gen.cpp b/cpp/src/slice2cs/Gen.cpp index cf09aa9df1b..e537f90f3ea 100644 --- a/cpp/src/slice2cs/Gen.cpp +++ b/cpp/src/slice2cs/Gen.cpp @@ -10,6 +10,7 @@ #include <IceUtil/DisableWarnings.h> #include <IceUtil/Functional.h> #include <IceUtil/StringUtil.h> +#include <IceUtil/InputUtil.h> #include <Gen.h> #include <limits> #include <sys/stat.h> @@ -20,6 +21,7 @@ #endif #include <IceUtil/Iterator.h> #include <IceUtil/UUID.h> +#include <IceUtil/Unicode.h> #include <Slice/Checksum.h> #include <Slice/DotNetNames.h> #include <Slice/FileTracker.h> @@ -35,6 +37,45 @@ namespace { string +u16CodePoint(unsigned short value) +{ + ostringstream s; + s << "\\u"; + s << hex; + s.width(4); + s.fill('0'); + s << value; + return s.str(); +} + + +void +writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out) +{ + vector<unsigned short> u16buffer; + IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c) + { + out << u16CodePoint(*c); + } +} + +string sliceModeToIceMode(Operation::Mode opMode) { string mode; @@ -1983,41 +2024,120 @@ Slice::CsVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePt // here because they are sensitive to the current locale. // static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "0123456789" - "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' "; + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' "; + static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end()); _out << "\""; // Opening " - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + vector<unsigned char> u8buffer; // Buffer to convert multibyte characters + + for(size_t i = 0; i < value.size();) { - if(charSet.find(*c) == charSet.end()) + if(charSet.find(value[i]) == charSet.end()) { - unsigned char uc = *c; // char may be signed, so make it positive - ostringstream s; - s << "\\u"; // Print as unicode if not in basic source character set - s << hex; - s.width(4); - s.fill('0'); - s << static_cast<unsigned>(uc); - _out << s.str(); + if(static_cast<unsigned char>(value[i]) < 128) // Single byte character + { + // + // Print as unicode if not in basic source character set + // + _out << u16CodePoint(static_cast<unsigned int>(value[i])); + } + else + { + u8buffer.push_back(value[i]); + } } else { - switch(*c) + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, _out); + u8buffer.clear(); + } + switch(value[i]) { case '\\': + { + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && value[j] == 'U') + { + _out << s.substr(0, s.size() - 1); + i = j + 1; + + string codepoint = value.substr(j + 1, 8); + assert(codepoint.size() == 8); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + + // + // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal + // and is represented using a Unicode surrogate pair. + // + if(v > 0xFFFF) + { + unsigned int high = ((v - 0x10000) / 0x400) + 0xD800; + unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00; + _out << u16CodePoint(high); + _out << u16CodePoint(low); + } + else + { + _out << "\\U" << codepoint; + } + + i = j + 1 + 8; + } + else + { + _out << s; + i = j; + } + continue; + } case '"': { _out << "\\"; break; } } - _out << *c; // Print normally if in basic source character set + _out << value[i]; // Print normally if in basic source character set } + i++; } - + + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, _out); + u8buffer.clear(); + } + _out << "\""; // Closing " } else if(bp && bp->kind() == Builtin::KindLong) diff --git a/cpp/src/slice2cs/Makefile b/cpp/src/slice2cs/Makefile index e51e24c0445..e46c1005dd6 100644 --- a/cpp/src/slice2cs/Makefile +++ b/cpp/src/slice2cs/Makefile @@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir) include $(top_srcdir)/config/Make.rules -CPPFLAGS := -I. $(CPPFLAGS) +CPPFLAGS := -I. -I.. $(CPPFLAGS) $(NAME): $(OBJS) rm -f $@ diff --git a/cpp/src/slice2cs/Makefile.mak b/cpp/src/slice2cs/Makefile.mak index 0a422c08b34..ac748e4fabd 100644 --- a/cpp/src/slice2cs/Makefile.mak +++ b/cpp/src/slice2cs/Makefile.mak @@ -18,7 +18,7 @@ OBJS = .\Gen.obj \ !include $(top_srcdir)/config/Make.rules.mak -CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN +CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN !if "$(GENERATE_PDB)" == "yes" PDBFLAGS = /pdb:$(NAME:.exe=.pdb) diff --git a/cpp/src/slice2java/Gen.cpp b/cpp/src/slice2java/Gen.cpp index e6f2bc91571..eba540f57a6 100644 --- a/cpp/src/slice2java/Gen.cpp +++ b/cpp/src/slice2java/Gen.cpp @@ -14,6 +14,7 @@ #include <IceUtil/Iterator.h> #include <IceUtil/StringUtil.h> #include <IceUtil/InputUtil.h> +#include <IceUtil/Unicode.h> #include <cstring> #include <limits> @@ -23,6 +24,44 @@ using namespace Slice; using namespace IceUtil; using namespace IceUtilInternal; +string +u16CodePoint(unsigned short value) +{ + ostringstream s; + s << "\\u"; + s << hex; + s.width(4); + s.fill('0'); + s << value; + return s.str(); +} + +void +writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out) +{ + vector<unsigned short> u16buffer; + IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c) + { + out << u16CodePoint(*c); + } +} + static string sliceModeToIceMode(Operation::Mode opMode) { @@ -1863,54 +1902,153 @@ Slice::JavaVisitor::writeConstantValue(Output& out, const TypePtr& type, const S "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' "; static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end()); out << "\""; - - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + + vector<unsigned char> u8buffer; // Buffer to convert multibyte characters + + for(size_t i = 0; i < value.size();) { - if(charSet.find(*c) == charSet.end()) + if(charSet.find(value[i]) == charSet.end()) { - switch(*c) + char c = value[i]; + if(static_cast<unsigned char>(c) < 128) // Single byte character { // - // Java doesn't want '\n' or '\r\n' encoded as universal - // characters, that gives an error "unclosed string literal" + // Print as unicode if not in basic source character set // - case '\r': - { - out << "\\r"; - break; - } - case '\n': + switch(c) { - out << "\\n"; - break; - } - default: - { - unsigned char uc = *c; - ostringstream s; - s << "\\u"; - s.flags(ios_base::hex); - s.width(4); - s.fill('0'); - s << static_cast<unsigned>(uc); - out << s.str(); - break; + // + // Java doesn't want '\n' or '\r\n' encoded as universal + // characters, that gives an error "unclosed string literal" + // + case '\r': + { + out << "\\r"; + break; + } + case '\n': + { + out << "\\n"; + break; + } + default: + { + out << u16CodePoint(c); + break; + } } } + else + { + u8buffer.push_back(value[i]); + } } else { - switch(*c) + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, out); + u8buffer.clear(); + } + switch(value[i]) { case '\\': + { + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) + { + size_t sz = value[j] == 'U' ? 8 : 4; + out << s.substr(0, s.size() - 1); + i = j + 1; + + string codepoint = value.substr(j + 1, sz); + assert(codepoint.size() == sz); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + + // + // Java doesn't like this special characters encoded as universal characters + // + if(v == 0x5c) + { + out << "\\\\"; + } + else if(v == 0xa) + { + out << "\\n"; + } + else if(v == 0xd) + { + out << "\\r"; + } + else if(v == 0x22) + { + out << "\\\""; + } + // + // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal + // and is represented using a Unicode surrogate pair. + // + else if(v > 0xFFFF) + { + unsigned int high = ((v - 0x10000) / 0x400) + 0xD800; + unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00; + out << u16CodePoint(high); + out << u16CodePoint(low); + } + else + { + out << u16CodePoint(v); + } + + i = j + 1 + sz; + } + else + { + out << s; + i = j; + } + continue; + } case '"': { out << "\\"; break; } } - out << *c; + out << value[i]; // Print normally if in basic source character set } + i++; + } + + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, out); + u8buffer.clear(); } out << "\""; diff --git a/cpp/src/slice2java/Makefile b/cpp/src/slice2java/Makefile index 010554d8e12..e2a882bfb6f 100644 --- a/cpp/src/slice2java/Makefile +++ b/cpp/src/slice2java/Makefile @@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir) include $(top_srcdir)/config/Make.rules -CPPFLAGS := -I. $(CPPFLAGS) +CPPFLAGS := -I. -I.. $(CPPFLAGS) $(NAME): $(OBJS) rm -f $@ diff --git a/cpp/src/slice2java/Makefile.mak b/cpp/src/slice2java/Makefile.mak index 26a40e3aa55..1d93e22c79d 100644 --- a/cpp/src/slice2java/Makefile.mak +++ b/cpp/src/slice2java/Makefile.mak @@ -18,7 +18,7 @@ OBJS = .\Gen.obj \ !include $(top_srcdir)/config/Make.rules.mak -CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN +CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN !if "$(GENERATE_PDB)" == "yes" PDBFLAGS = /pdb:$(NAME:.exe=.pdb) diff --git a/cpp/src/slice2js/Gen.cpp b/cpp/src/slice2js/Gen.cpp index 11bd0f608e5..871dd7d47b3 100644 --- a/cpp/src/slice2js/Gen.cpp +++ b/cpp/src/slice2js/Gen.cpp @@ -20,6 +20,7 @@ #include <direct.h> #endif #include <IceUtil/Iterator.h> +#include <IceUtil/Unicode.h> #include <IceUtil/UUID.h> #include <Slice/Checksum.h> #include <Slice/FileTracker.h> @@ -35,6 +36,44 @@ namespace { string +u16CodePoint(unsigned short value) +{ + ostringstream s; + s << "\\u"; + s << hex; + s.width(4); + s.fill('0'); + s << value; + return s.str(); +} + +void +writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out) +{ + vector<unsigned short> u16buffer; + IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c) + { + out << u16CodePoint(*c); + } +} + +string sliceModeToIceMode(Operation::Mode opMode) { switch(opMode) @@ -479,34 +518,112 @@ Slice::JsVisitor::writeConstantValue(const string& scope, const TypePtr& type, c _out << "\""; // Opening " - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + vector<unsigned char> u8buffer; // Buffer to convert multibyte characters + + for(size_t i = 0; i < value.size();) { - if(charSet.find(*c) == charSet.end()) - { - unsigned char uc = *c; // char may be signed, so make it positive - ostringstream s; - s << "\\u"; // Print as unicode if not in basic source character set - s << hex; - s.width(4); - s.fill('0'); - s << static_cast<unsigned>(uc); - _out << s.str(); + if(charSet.find(value[i]) == charSet.end()) + { + if(static_cast<unsigned char>(value[i]) < 128) // Single byte character + { + // + // Print as unicode if not in basic source character set + // + _out << u16CodePoint(static_cast<unsigned int>(value[i])); + } + else + { + u8buffer.push_back(value[i]); + } } else { - switch(*c) + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, _out); + u8buffer.clear(); + } + switch(value[i]) { case '\\': + { + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && value[j] == 'U') + { + _out << s.substr(0, s.size() - 1); + i = j + 1; + + string codepoint = value.substr(j + 1, 8); + assert(codepoint.size() == 8); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + + // + // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal + // and is represented using a Unicode surrogate pair. + // + if(v > 0xFFFF) + { + unsigned int high = ((v - 0x10000) / 0x400) + 0xD800; + unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00; + _out << u16CodePoint(high); + _out << u16CodePoint(low); + } + else + { + _out << u16CodePoint(v); + } + + i = j + 1 + 8; + } + else + { + _out << s; + i = j; + } + continue; + } case '"': { _out << "\\"; break; } } - _out << *c; // Print normally if in basic source character set + _out << value[i]; // Print normally if in basic source character set } + i++; } - + + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, _out); + u8buffer.clear(); + } + _out << "\""; // Closing " } else if(bp && bp->kind() == Builtin::KindLong) diff --git a/cpp/src/slice2js/Makefile b/cpp/src/slice2js/Makefile index bd1bbe967f8..0aaf14a7a0d 100644 --- a/cpp/src/slice2js/Makefile +++ b/cpp/src/slice2js/Makefile @@ -21,7 +21,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir) include $(top_srcdir)/config/Make.rules -CPPFLAGS := -I. $(CPPFLAGS) +CPPFLAGS := -I. -I.. $(CPPFLAGS) $(NAME): $(OBJS) rm -f $@ diff --git a/cpp/src/slice2js/Makefile.mak b/cpp/src/slice2js/Makefile.mak index 0014e231948..79bd9f23cca 100644 --- a/cpp/src/slice2js/Makefile.mak +++ b/cpp/src/slice2js/Makefile.mak @@ -19,7 +19,7 @@ OBJS = .\Gen.obj \ !include $(top_srcdir)/config/Make.rules.mak -CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN +CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN !if "$(GENERATE_PDB)" == "yes" PDBFLAGS = /pdb:$(NAME:.exe=.pdb) diff --git a/cpp/src/slice2php/Main.cpp b/cpp/src/slice2php/Main.cpp index 77e2a93e921..6de877cec54 100644 --- a/cpp/src/slice2php/Main.cpp +++ b/cpp/src/slice2php/Main.cpp @@ -16,6 +16,7 @@ #include <IceUtil/StringUtil.h> #include <IceUtil/Mutex.h> #include <IceUtil/MutexPtrLock.h> +#include <IceUtil/Unicode.h> #include <Slice/Checksum.h> #include <Slice/Preprocessor.h> #include <Slice/FileTracker.h> @@ -1270,9 +1271,10 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va _out << "\""; // Opening " - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + for(size_t i = 0; i < value.size();) { - switch(*c) + char c = value[i]; + switch(c) { case '$': { @@ -1286,8 +1288,79 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va } case '\\': { - _out << "\\\\"; - break; + + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) + { + // + // Convert codepoint to UTF8 bytes and write the escaped bytes + // + _out << s.substr(0, s.size() - 1); + + size_t sz = value[j] == 'U' ? 8 : 4; + string codepoint = value.substr(j + 1, sz); + assert(codepoint.size() == sz); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + + vector<unsigned int> u32buffer; + u32buffer.push_back(v); + + vector<unsigned char> u8buffer; + + IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + ostringstream s; + for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) + { + s << "\\"; + s.fill('0'); + s.width(3); + s << oct; + s << static_cast<unsigned int>(*q); + } + _out << s.str(); + + i = j + 1 + sz; + } + else + { + _out << s; + i = j; + } + continue; } case '\r': { @@ -1304,11 +1377,6 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va _out << "\\t"; break; } - case '\b': - { - _out << "\\b"; - break; - } case '\f': { _out << "\\f"; @@ -1316,9 +1384,9 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va } default: { - if(charSet.find(*c) == charSet.end()) + if(charSet.find(c) == charSet.end()) { - unsigned char uc = *c; // Char may be signed, so make it positive. + unsigned char uc = c; // Char may be signed, so make it positive. stringstream s; s << "\\"; // Print as octal if not in basic source character set. s.flags(ios_base::oct); @@ -1329,11 +1397,12 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va } else { - _out << *c; // Print normally if in basic source character set. + _out << c; // Print normally if in basic source character set. } break; } } + ++i; } _out << "\""; // Closing " diff --git a/cpp/src/slice2php/Makefile b/cpp/src/slice2php/Makefile index 8bc91a8cd66..62f82531201 100644 --- a/cpp/src/slice2php/Makefile +++ b/cpp/src/slice2php/Makefile @@ -19,7 +19,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir) include $(top_srcdir)/config/Make.rules -CPPFLAGS := -I. $(CPPFLAGS) +CPPFLAGS := -I. -I.. $(CPPFLAGS) $(NAME): $(OBJS) rm -f $@ diff --git a/cpp/src/slice2php/Makefile.mak b/cpp/src/slice2php/Makefile.mak index 47cab5776bd..1218590c485 100644 --- a/cpp/src/slice2php/Makefile.mak +++ b/cpp/src/slice2php/Makefile.mak @@ -17,7 +17,7 @@ OBJS = .\Main.obj !include $(top_srcdir)/config/Make.rules.mak -CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN +CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN !if "$(GENERATE_PDB)" == "yes" PDBFLAGS = /pdb:$(NAME:.exe=.pdb) diff --git a/cpp/test/Ice/operations/Test.ice b/cpp/test/Ice/operations/Test.ice index eec8f42e853..cf44e14a50d 100644 --- a/cpp/test/Ice/operations/Test.ice +++ b/cpp/test/Ice/operations/Test.ice @@ -251,6 +251,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -274,5 +276,70 @@ class MyDerivedClass extends MyClass MyStruct1 opMyStruct1(MyStruct1 c); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/cpp/test/Ice/operations/TestAMD.ice b/cpp/test/Ice/operations/TestAMD.ice index 615c2a81ad6..a65608ea4c7 100644 --- a/cpp/test/Ice/operations/TestAMD.ice +++ b/cpp/test/Ice/operations/TestAMD.ice @@ -252,6 +252,8 @@ dictionary<MyEnum, MyEnumS> MyEnumMyEnumSD; StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -276,5 +278,72 @@ class MyClass1 MyStruct1 opMyStruct1(MyStruct1 c); }; + +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + + }; diff --git a/cpp/test/Ice/operations/TestAMDI.cpp b/cpp/test/Ice/operations/TestAMDI.cpp index 35c0c4af515..df030eb030d 100644 --- a/cpp/test/Ice/operations/TestAMDI.cpp +++ b/cpp/test/Ice/operations/TestAMDI.cpp @@ -13,6 +13,8 @@ #include <functional> #include <iterator> +using namespace std; + class Thread_opVoid : public IceUtil::Thread { public: @@ -776,4 +778,47 @@ void MyDerivedClassI::opMyClass1_async(const Test::AMD_MyDerivedClass_opMyClass1Ptr& cb, const Test::MyClass1Ptr& c, const Ice::Current&) { cb->ice_response(c); +} + + +void +MyDerivedClassI::opStringLiterals_async(const Test::AMD_MyClass_opStringLiteralsPtr& cb, + const Ice::Current&) +{ + Test::StringS data; + data.push_back(Test::s0); + data.push_back(Test::s1); + data.push_back(Test::s2); + data.push_back(Test::s3); + data.push_back(Test::s4); + data.push_back(Test::s5); + data.push_back(Test::s6); + data.push_back(Test::s7); + data.push_back(Test::s8); + data.push_back(Test::s9); + data.push_back(Test::s10); + + data.push_back(Test::sw0); + data.push_back(Test::sw1); + data.push_back(Test::sw2); + data.push_back(Test::sw3); + data.push_back(Test::sw4); + data.push_back(Test::sw5); + data.push_back(Test::sw6); + data.push_back(Test::sw7); + data.push_back(Test::sw8); + data.push_back(Test::sw9); + data.push_back(Test::sw10); + + data.push_back(Test::ss0); + data.push_back(Test::ss1); + data.push_back(Test::ss2); + data.push_back(Test::ss3); + data.push_back(Test::ss4); + data.push_back(Test::ss5); + + data.push_back(Test::su0); + data.push_back(Test::su1); + data.push_back(Test::su2); + cb->ice_response(data); }
\ No newline at end of file diff --git a/cpp/test/Ice/operations/TestAMDI.h b/cpp/test/Ice/operations/TestAMDI.h index 7fb17d30828..418d0a2aa36 100644 --- a/cpp/test/Ice/operations/TestAMDI.h +++ b/cpp/test/Ice/operations/TestAMDI.h @@ -275,6 +275,9 @@ public: virtual void opMyClass1_async(const Test::AMD_MyDerivedClass_opMyClass1Ptr&, const Test::MyClass1Ptr&, const Ice::Current&); + + virtual void opStringLiterals_async(const Test::AMD_MyClass_opStringLiteralsPtr&, + const Ice::Current&); private: IceUtil::ThreadPtr _opVoidThread; diff --git a/cpp/test/Ice/operations/TestI.cpp b/cpp/test/Ice/operations/TestI.cpp index cfc123c162e..29e6728047b 100644 --- a/cpp/test/Ice/operations/TestI.cpp +++ b/cpp/test/Ice/operations/TestI.cpp @@ -14,6 +14,8 @@ #include <functional> #include <iterator> +using namespace std; + MyDerivedClassI::MyDerivedClassI() : _opByteSOnewayCallCount(0) { } @@ -738,4 +740,45 @@ MyDerivedClassI::opMyClass1(const Test::MyClass1Ptr& c, const Ice::Current&) { return c; } - + +Test::StringS +MyDerivedClassI::opStringLiterals(const Ice::Current&) +{ + Test::StringS data; + data.push_back(Test::s0); + data.push_back(Test::s1); + data.push_back(Test::s2); + data.push_back(Test::s3); + data.push_back(Test::s4); + data.push_back(Test::s5); + data.push_back(Test::s6); + data.push_back(Test::s7); + data.push_back(Test::s8); + data.push_back(Test::s9); + data.push_back(Test::s10); + + data.push_back(Test::sw0); + data.push_back(Test::sw1); + data.push_back(Test::sw2); + data.push_back(Test::sw3); + data.push_back(Test::sw4); + data.push_back(Test::sw5); + data.push_back(Test::sw6); + data.push_back(Test::sw7); + data.push_back(Test::sw8); + data.push_back(Test::sw9); + data.push_back(Test::sw10); + + data.push_back(Test::ss0); + data.push_back(Test::ss1); + data.push_back(Test::ss2); + data.push_back(Test::ss3); + data.push_back(Test::ss4); + data.push_back(Test::ss5); + + data.push_back(Test::su0); + data.push_back(Test::su1); + data.push_back(Test::su2); + + return data; +} diff --git a/cpp/test/Ice/operations/TestI.h b/cpp/test/Ice/operations/TestI.h index 44398bea49f..4f31f7eecfe 100644 --- a/cpp/test/Ice/operations/TestI.h +++ b/cpp/test/Ice/operations/TestI.h @@ -268,6 +268,8 @@ public: virtual Test::MyStruct1 opMyStruct1(const Test::MyStruct1&, const Ice::Current&); virtual Test::MyClass1Ptr opMyClass1(const Test::MyClass1Ptr&, const Ice::Current&); + + virtual Test::StringS opStringLiterals(const Ice::Current&); private: diff --git a/cpp/test/Ice/operations/Twoways.cpp b/cpp/test/Ice/operations/Twoways.cpp index ce82b3651bd..fe400c27f27 100644 --- a/cpp/test/Ice/operations/Twoways.cpp +++ b/cpp/test/Ice/operations/Twoways.cpp @@ -62,6 +62,85 @@ private: void twoways(const Ice::CommunicatorPtr& communicator, const Test::MyClassPrx& p) { + Test::StringS literals = p->opStringLiterals(); + + test(Test::s0 == "\\" && + Test::s0 == Test::sw0 && + Test::s0 == literals[0] && + Test::s0 == literals[11]); + + test(Test::s1 == "A" && + Test::s1 == Test::sw1 && + Test::s1 == literals[1] && + Test::s1 == literals[12]); + + test(Test::s2 == "Ice" && + Test::s2 == Test::sw2 && + Test::s2 == literals[2] && + Test::s2 == literals[13]); + + test(Test::s3 == "A21" && + Test::s3 == Test::sw3 && + Test::s3 == literals[3] && + Test::s3 == literals[14]); + + test(Test::s4 == "\\u0041 \\U00000041" && + Test::s4 == Test::sw4 && + Test::s4 == literals[4] && + Test::s4 == literals[15]); + + test(Test::s5 == "\u00FF" && + Test::s5 == Test::sw5 && + Test::s5 == literals[5] && + Test::s5 == literals[16]); + + test(Test::s6 == "\u03FF" && + Test::s6 == Test::sw6 && + Test::s6 == literals[6] && + Test::s6 == literals[17]); + + test(Test::s7 == "\u05F0" && + Test::s7 == Test::sw7 && + Test::s7 == literals[7] && + Test::s7 == literals[18]); + + test(Test::s8 == "\U00010000" && + Test::s8 == Test::sw8 && + Test::s8 == literals[8] && + Test::s8 == literals[19]); + + test(Test::s9 == "\U0001F34C" && + Test::s9 == Test::sw9 && + Test::s9 == literals[9] && + Test::s9 == literals[20]); + + test(Test::s10 == "\u0DA7" && + Test::s10 == Test::sw10 && + Test::s10 == literals[10] && + Test::s10 == literals[21]); + + test(Test::ss0 == "\'\"\?\\\a\b\f\n\r\t\v" && + Test::ss0 == Test::ss1 && + Test::ss0 == Test::ss2 && + Test::ss0 == literals[22] && + Test::ss0 == literals[23] && + Test::ss0 == literals[24]); + + test(Test::ss3 == "\\\\U\\u\\" && + Test::ss3 == literals[25]); + + test(Test::ss4 == "\\A\\" && + Test::ss4 == literals[26]); + + test(Test::ss5 == "\\u0041\\" && + Test::ss5 == literals[27]); + + test(Test::su0 == Test::su1 && + Test::su0 == Test::su2 && + Test::su0 == literals[28] && + Test::su0 == literals[29] && + Test::su0 == literals[30]); + { p->ice_ping(); } diff --git a/cpp/test/Slice/errorDetection/ConstDef.err b/cpp/test/Slice/errorDetection/ConstDef.err index 4765aa8da62..7784f8c8fef 100644 --- a/cpp/test/Slice/errorDetection/ConstDef.err +++ b/cpp/test/Slice/errorDetection/ConstDef.err @@ -21,6 +21,15 @@ ConstDef.ice:124: initializer `-1' for constant `b3' out of range for type byte ConstDef.ice:125: initializer `256' for constant `b4' out of range for type byte ConstDef.ice:127: illegal NUL character in string constant ConstDef.ice:128: illegal NUL character in string constant -ConstDef.ice:135: initializer `32767' for constant `c5' out of range for type byte -ConstDef.ice:136: initializer `2147483647' for constant `c6' out of range for type short -ConstDef.ice:137: initializer `9223372036854775807' for constant `c7' out of range for type int +ConstDef.ice:129: illegal NUL character in string constant +ConstDef.ice:130: illegal NUL character in string constant +ConstDef.ice:137: initializer `32767' for constant `c5' out of range for type byte +ConstDef.ice:138: initializer `2147483647' for constant `c6' out of range for type short +ConstDef.ice:139: initializer `9223372036854775807' for constant `c7' out of range for type int +ConstDef.ice:150: warning: unknown escape sequence '\g' +ConstDef.ice:151: unknown escape sequence in string constant: "a\u000N +ConstDef.ice:152: unknown escape sequence in string constant: "a\U0000000K +ConstDef.ice:153: warning: octal escape sequence out of range: '\455' +ConstDef.ice:154: warning: hex escape sequence out of range: '\xfff' +ConstDef.ice:155: unknown escape sequence in string constant: '\ud83c' +ConstDef.ice:155: unknown escape sequence in string constant: '\udf4c'
\ No newline at end of file diff --git a/cpp/test/Slice/errorDetection/ConstDef.ice b/cpp/test/Slice/errorDetection/ConstDef.ice index 934423fade6..e6682e80300 100644 --- a/cpp/test/Slice/errorDetection/ConstDef.ice +++ b/cpp/test/Slice/errorDetection/ConstDef.ice @@ -23,7 +23,7 @@ const int intconst = 0; const long longconst = 0; const float floatconst = 0.; const double doubleconst = 0.; -const string stringconst = "X\aX\x00001X\rX\007\xffX\xffffX\xff7f"; +const string stringconst = "X\aX\x00001X\rX\007\xff\xff\xff"; const string stringconst2 = "Hello World!"; enum color { red, green, blue }; const color colorconst = blue; @@ -126,6 +126,8 @@ const byte b4 = 256; // overflow const string nullstring1 = "a\000"; const string nullstring2 = "a\x000"; +const string nullstring3 = "a\u0000"; +const string nullstring4 = "a\U00000000"; const byte c1 = l1; // OK const short c2 = l1; // OK @@ -145,4 +147,11 @@ const string c12 = stringconst; // OK const color c13 = colorconst; // OK +const string unknowescape = "a\g"; // Unknown escape sequence +const string invalidCodepoint = "a\u000N"; // Invalid code point +const string invalidCodepoint1 = "a\U0000000K"; // Invalid code point +const string octalRange = "\455"; // OCT escape sequence out of range +const string hexRange = "\xFFF"; // HEX escape sequence out of range +const string surrogatePair = "\uD83C\uDF4C"; // surrogate pair not allow in slice + }; diff --git a/csharp/test/Ice/operations/MyDerivedClassAMDI.cs b/csharp/test/Ice/operations/MyDerivedClassAMDI.cs index 41744244172..f7d9f4b267e 100644 --- a/csharp/test/Ice/operations/MyDerivedClassAMDI.cs +++ b/csharp/test/Ice/operations/MyDerivedClassAMDI.cs @@ -868,6 +868,48 @@ public sealed class MyDerivedClassI : Test.MyDerivedClass { cb.ice_response(value); } + + + public override void opStringLiterals_async(Test.AMD_MyClass_opStringLiterals cb, Ice.Current current) + { + cb.ice_response(new string[] + { + Test.s0.value, + Test.s1.value, + Test.s2.value, + Test.s3.value, + Test.s4.value, + Test.s5.value, + Test.s6.value, + Test.s7.value, + Test.s8.value, + Test.s9.value, + Test.s10.value, + + Test.sw0.value, + Test.sw1.value, + Test.sw2.value, + Test.sw3.value, + Test.sw4.value, + Test.sw5.value, + Test.sw6.value, + Test.sw7.value, + Test.sw8.value, + Test.sw9.value, + Test.sw10.value, + + Test.ss0.value, + Test.ss1.value, + Test.ss2.value, + Test.ss3.value, + Test.ss4.value, + Test.ss5.value, + + Test.su0.value, + Test.su1.value, + Test.su2.value + }); + } private Thread_opVoid _opVoidThread; private int _opByteSOnewayCallCount = 0; diff --git a/csharp/test/Ice/operations/MyDerivedClassAMDTieI.cs b/csharp/test/Ice/operations/MyDerivedClassAMDTieI.cs index 40c15c33790..984b168ba28 100644 --- a/csharp/test/Ice/operations/MyDerivedClassAMDTieI.cs +++ b/csharp/test/Ice/operations/MyDerivedClassAMDTieI.cs @@ -847,6 +847,47 @@ public sealed class MyDerivedClassTieI : Test.MyDerivedClassOperations_ { cb.ice_response(value); } + + public void opStringLiterals_async(Test.AMD_MyClass_opStringLiterals cb, Ice.Current current) + { + cb.ice_response(new string[] + { + Test.s0.value, + Test.s1.value, + Test.s2.value, + Test.s3.value, + Test.s4.value, + Test.s5.value, + Test.s6.value, + Test.s7.value, + Test.s8.value, + Test.s9.value, + Test.s10.value, + + Test.sw0.value, + Test.sw1.value, + Test.sw2.value, + Test.sw3.value, + Test.sw4.value, + Test.sw5.value, + Test.sw6.value, + Test.sw7.value, + Test.sw8.value, + Test.sw9.value, + Test.sw10.value, + + Test.ss0.value, + Test.ss1.value, + Test.ss2.value, + Test.ss3.value, + Test.ss4.value, + Test.ss5.value, + + Test.su0.value, + Test.su1.value, + Test.su2.value + }); + } private Thread_opVoid _opVoidThread; private int _opByteSOnewayCallCount = 0; diff --git a/csharp/test/Ice/operations/MyDerivedClassI.cs b/csharp/test/Ice/operations/MyDerivedClassI.cs index 640e73a07e2..07a182741cb 100644 --- a/csharp/test/Ice/operations/MyDerivedClassI.cs +++ b/csharp/test/Ice/operations/MyDerivedClassI.cs @@ -829,6 +829,47 @@ public sealed class MyDerivedClassI : Test.MyDerivedClass { return s; } + + public override string[] opStringLiterals(Ice.Current current) + { + return new string[] + { + Test.s0.value, + Test.s1.value, + Test.s2.value, + Test.s3.value, + Test.s4.value, + Test.s5.value, + Test.s6.value, + Test.s7.value, + Test.s8.value, + Test.s9.value, + Test.s10.value, + + Test.sw0.value, + Test.sw1.value, + Test.sw2.value, + Test.sw3.value, + Test.sw4.value, + Test.sw5.value, + Test.sw6.value, + Test.sw7.value, + Test.sw8.value, + Test.sw9.value, + Test.sw10.value, + + Test.ss0.value, + Test.ss1.value, + Test.ss2.value, + Test.ss3.value, + Test.ss4.value, + Test.ss5.value, + + Test.su0.value, + Test.su1.value, + Test.su2.value + }; + } private int _opByteSOnewayCallCount = 0; } diff --git a/csharp/test/Ice/operations/MyDerivedClassTieI.cs b/csharp/test/Ice/operations/MyDerivedClassTieI.cs index f3b602b6428..7d36de46c69 100644 --- a/csharp/test/Ice/operations/MyDerivedClassTieI.cs +++ b/csharp/test/Ice/operations/MyDerivedClassTieI.cs @@ -804,6 +804,47 @@ public sealed class MyDerivedClassTieI : Test.MyDerivedClassOperations_ { return s; } + + public string[] opStringLiterals(Ice.Current current) + { + return new string[] + { + Test.s0.value, + Test.s1.value, + Test.s2.value, + Test.s3.value, + Test.s4.value, + Test.s5.value, + Test.s6.value, + Test.s7.value, + Test.s8.value, + Test.s9.value, + Test.s10.value, + + Test.sw0.value, + Test.sw1.value, + Test.sw2.value, + Test.sw3.value, + Test.sw4.value, + Test.sw5.value, + Test.sw6.value, + Test.sw7.value, + Test.sw8.value, + Test.sw9.value, + Test.sw10.value, + + Test.ss0.value, + Test.ss1.value, + Test.ss2.value, + Test.ss3.value, + Test.ss4.value, + Test.ss5.value, + + Test.su0.value, + Test.su1.value, + Test.su2.value + }; + } private int _opByteSOnewayCallCount = 0; } diff --git a/csharp/test/Ice/operations/Test.ice b/csharp/test/Ice/operations/Test.ice index e8cbcc5a56e..d085b4b2cd3 100644 --- a/csharp/test/Ice/operations/Test.ice +++ b/csharp/test/Ice/operations/Test.ice @@ -251,6 +251,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -274,4 +276,69 @@ class MyDerivedClass extends MyClass MyStruct1 opMyStruct1(MyStruct1 s); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/csharp/test/Ice/operations/TestAMD.ice b/csharp/test/Ice/operations/TestAMD.ice index 86ff8eb92ee..179834bcd5f 100644 --- a/csharp/test/Ice/operations/TestAMD.ice +++ b/csharp/test/Ice/operations/TestAMD.ice @@ -249,6 +249,8 @@ dictionary<MyEnum, MyEnumS> MyEnumMyEnumSD; ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -277,4 +279,69 @@ class MyClass1 Ice::Context getContext(); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/csharp/test/Ice/operations/Twoways.cs b/csharp/test/Ice/operations/Twoways.cs index 5887239158c..25324cd2ca7 100644 --- a/csharp/test/Ice/operations/Twoways.cs +++ b/csharp/test/Ice/operations/Twoways.cs @@ -54,10 +54,91 @@ class Twoways internal static void twoways(Ice.Communicator communicator, Test.MyClassPrx p) { + + string[] literals = p.opStringLiterals(); + + test(Test.s0.value.Equals("\\") && + Test.s0.value.Equals(Test.sw0.value) && + Test.s0.value.Equals(literals[0]) && + Test.s0.value.Equals(literals[11])); + + test(Test.s1.value.Equals("A") && + Test.s1.value.Equals(Test.sw1.value) && + Test.s1.value.Equals(literals[1]) && + Test.s1.value.Equals(literals[12])); + + test(Test.s2.value.Equals("Ice") && + Test.s2.value.Equals(Test.sw2.value) && + Test.s2.value.Equals(literals[2]) && + Test.s2.value.Equals(literals[13])); + + test(Test.s3.value.Equals("A21") && + Test.s3.value.Equals(Test.sw3.value) && + Test.s3.value.Equals(literals[3]) && + Test.s3.value.Equals(literals[14])); + + test(Test.s4.value.Equals("\\u0041 \\U00000041") && + Test.s4.value.Equals(Test.sw4.value) && + Test.s4.value.Equals(literals[4]) && + Test.s4.value.Equals(literals[15])); + + test(Test.s5.value.Equals("\u00FF") && + Test.s5.value.Equals(Test.sw5.value) && + Test.s5.value.Equals(literals[5]) && + Test.s5.value.Equals(literals[16])); + + test(Test.s6.value.Equals("\u03FF") && + Test.s6.value.Equals(Test.sw6.value) && + Test.s6.value.Equals(literals[6]) && + Test.s6.value.Equals(literals[17])); + + test(Test.s7.value.Equals("\u05F0") && + Test.s7.value.Equals(Test.sw7.value) && + Test.s7.value.Equals(literals[7]) && + Test.s7.value.Equals(literals[18])); + + test(Test.s8.value.Equals("\U00010000") && + Test.s8.value.Equals(Test.sw8.value) && + Test.s8.value.Equals(literals[8]) && + Test.s8.value.Equals(literals[19])); + + test(Test.s9.value.Equals("\U0001F34C") && + Test.s9.value.Equals(Test.sw9.value) && + Test.s9.value.Equals(literals[9]) && + Test.s9.value.Equals(literals[20])); + + test(Test.s10.value.Equals("\u0DA7") && + Test.s10.value.Equals(Test.sw10.value) && + Test.s10.value.Equals(literals[10]) && + Test.s10.value.Equals(literals[21])); + + test(Test.ss0.value.Equals("\'\"\u003f\\\a\b\f\n\r\t\v") && + Test.ss0.value.Equals(Test.ss1.value) && + Test.ss0.value.Equals(Test.ss2.value) && + Test.ss0.value.Equals(literals[22]) && + Test.ss0.value.Equals(literals[23]) && + Test.ss0.value.Equals(literals[24])); + + test(Test.ss3.value.Equals("\\\\U\\u\\") && + Test.ss3.value.Equals(literals[25])); + + test(Test.ss4.value.Equals("\\A\\") && + Test.ss4.value.Equals(literals[26])); + + test(Test.ss5.value.Equals("\\u0041\\") && + Test.ss5.value.Equals(literals[27])); + + test(Test.su0.value.Equals(Test.su1.value) && + Test.su0.value.Equals(Test.su2.value) && + Test.su0.value.Equals(literals[28]) && + Test.su0.value.Equals(literals[29]) && + Test.su0.value.Equals(literals[30])); + p.ice_ping(); - test(Test.MyClassPrxHelper.ice_staticId().Equals(Test.MyClass.ice_staticId())); - test(Ice.ObjectPrxHelper.ice_staticId().Equals(Ice.ObjectImpl.ice_staticId())); + + test(Test.MyClassPrxHelper.ice_staticId().Equals(Test.MyClass.ice_staticId())); + test(Ice.ObjectPrxHelper.ice_staticId().Equals(Ice.ObjectImpl.ice_staticId())); test(p.ice_isA(Test.MyClass.ice_staticId())); diff --git a/java/test/src/main/java/test/Ice/operations/AMDMyDerivedClassI.java b/java/test/src/main/java/test/Ice/operations/AMDMyDerivedClassI.java index 2ad0c26d0d8..02b88e5d91a 100644 --- a/java/test/src/main/java/test/Ice/operations/AMDMyDerivedClassI.java +++ b/java/test/src/main/java/test/Ice/operations/AMDMyDerivedClassI.java @@ -888,6 +888,48 @@ public final class AMDMyDerivedClassI extends MyDerivedClass cb.ice_response(value); } + @Override + public void opStringLiterals_async(AMD_MyClass_opStringLiterals cb, Ice.Current current) + { + cb.ice_response(new String[] + { + s0.value, + s1.value, + s2.value, + s3.value, + s4.value, + s5.value, + s6.value, + s7.value, + s8.value, + s9.value, + s10.value, + + sw0.value, + sw1.value, + sw2.value, + sw3.value, + sw4.value, + sw5.value, + sw6.value, + sw7.value, + sw8.value, + sw9.value, + sw10.value, + + ss0.value, + ss1.value, + ss2.value, + ss3.value, + ss4.value, + ss5.value, + + su0.value, + su1.value, + su2.value + }); + } + private Thread _opVoidThread; private int _opByteSOnewayCallCount = 0; } diff --git a/java/test/src/main/java/test/Ice/operations/AMDTieMyDerivedClassI.java b/java/test/src/main/java/test/Ice/operations/AMDTieMyDerivedClassI.java index 4c82944f359..1e5ab277b56 100644 --- a/java/test/src/main/java/test/Ice/operations/AMDTieMyDerivedClassI.java +++ b/java/test/src/main/java/test/Ice/operations/AMDTieMyDerivedClassI.java @@ -851,6 +851,48 @@ public final class AMDTieMyDerivedClassI implements _MyDerivedClassOperations { cb.ice_response(value); } + + @Override + public void opStringLiterals_async(AMD_MyClass_opStringLiterals cb, Ice.Current current) + { + cb.ice_response(new String[] + { + s0.value, + s1.value, + s2.value, + s3.value, + s4.value, + s5.value, + s6.value, + s7.value, + s8.value, + s9.value, + s10.value, + + sw0.value, + sw1.value, + sw2.value, + sw3.value, + sw4.value, + sw5.value, + sw6.value, + sw7.value, + sw8.value, + sw9.value, + sw10.value, + + ss0.value, + ss1.value, + ss2.value, + ss3.value, + ss4.value, + ss5.value, + + su0.value, + su1.value, + su2.value + }); + } private Thread _opVoidThread; private int _opByteSOnewayCallCount = 0; diff --git a/java/test/src/main/java/test/Ice/operations/MyDerivedClassI.java b/java/test/src/main/java/test/Ice/operations/MyDerivedClassI.java index 1b5edabfd5a..2e7d418a7ea 100644 --- a/java/test/src/main/java/test/Ice/operations/MyDerivedClassI.java +++ b/java/test/src/main/java/test/Ice/operations/MyDerivedClassI.java @@ -832,6 +832,48 @@ public final class MyDerivedClassI extends MyDerivedClass { return value; } + + @Override + public String[] opStringLiterals(Ice.Current current) + { + return new String[] + { + s0.value, + s1.value, + s2.value, + s3.value, + s4.value, + s5.value, + s6.value, + s7.value, + s8.value, + s9.value, + s10.value, + + sw0.value, + sw1.value, + sw2.value, + sw3.value, + sw4.value, + sw5.value, + sw6.value, + sw7.value, + sw8.value, + sw9.value, + sw10.value, + + ss0.value, + ss1.value, + ss2.value, + ss3.value, + ss4.value, + ss5.value, + + su0.value, + su1.value, + su2.value + }; + } private int _opByteSOnewayCallCount = 0; } diff --git a/java/test/src/main/java/test/Ice/operations/Test.ice b/java/test/src/main/java/test/Ice/operations/Test.ice index f4e9f4000f6..069faffeb6b 100644 --- a/java/test/src/main/java/test/Ice/operations/Test.ice +++ b/java/test/src/main/java/test/Ice/operations/Test.ice @@ -250,6 +250,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -273,4 +275,69 @@ class MyDerivedClass extends MyClass MyStruct1 opMyStruct1(MyStruct1 opMyStruct1); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/java/test/src/main/java/test/Ice/operations/TestAMD.ice b/java/test/src/main/java/test/Ice/operations/TestAMD.ice index f27ceb1c24c..19b2eb839b5 100644 --- a/java/test/src/main/java/test/Ice/operations/TestAMD.ice +++ b/java/test/src/main/java/test/Ice/operations/TestAMD.ice @@ -250,6 +250,8 @@ dictionary<MyEnum, MyEnumS> MyEnumMyEnumSD; ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -273,4 +275,69 @@ class MyClass1 MyStruct1 opMyStruct1(MyStruct1 opMyStruct1); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/java/test/src/main/java/test/Ice/operations/TieMyDerivedClassI.java b/java/test/src/main/java/test/Ice/operations/TieMyDerivedClassI.java index d1c1540bccb..0e4e6b89cb1 100644 --- a/java/test/src/main/java/test/Ice/operations/TieMyDerivedClassI.java +++ b/java/test/src/main/java/test/Ice/operations/TieMyDerivedClassI.java @@ -797,5 +797,47 @@ public final class TieMyDerivedClassI implements _MyDerivedClassOperations return value; } + @Override + public String[] opStringLiterals(Ice.Current current) + { + return new String[] + { + s0.value, + s1.value, + s2.value, + s3.value, + s4.value, + s5.value, + s6.value, + s7.value, + s8.value, + s9.value, + s10.value, + + sw0.value, + sw1.value, + sw2.value, + sw3.value, + sw4.value, + sw5.value, + sw6.value, + sw7.value, + sw8.value, + sw9.value, + sw10.value, + + ss0.value, + ss1.value, + ss2.value, + ss3.value, + ss4.value, + ss5.value, + + su0.value, + su1.value, + su2.value + }; + } + private int _opByteSOnewayCallCount = 0; } diff --git a/java/test/src/main/java/test/Ice/operations/Twoways.java b/java/test/src/main/java/test/Ice/operations/Twoways.java index f2df1a9f938..0a8d61f6397 100644 --- a/java/test/src/main/java/test/Ice/operations/Twoways.java +++ b/java/test/src/main/java/test/Ice/operations/Twoways.java @@ -55,6 +55,85 @@ class Twoways twoways(Application app, MyClassPrx p) { Communicator communicator = app.communicator(); + + String[] literals = p.opStringLiterals(); + + test(s0.value.equals("\\") && + s0.value.equals(sw0.value) && + s0.value.equals(literals[0]) && + s0.value.equals(literals[11])); + + test(s1.value.equals("A") && + s1.value.equals(sw1.value) && + s1.value.equals(literals[1]) && + s1.value.equals(literals[12])); + + test(s2.value.equals("Ice") && + s2.value.equals(sw2.value) && + s2.value.equals(literals[2]) && + s2.value.equals(literals[13])); + + test(s3.value.equals("A21") && + s3.value.equals(sw3.value) && + s3.value.equals(literals[3]) && + s3.value.equals(literals[14])); + + test(s4.value.equals("\\u0041 \\U00000041") && + s4.value.equals(sw4.value) && + s4.value.equals(literals[4]) && + s4.value.equals(literals[15])); + + test(s5.value.equals("\u00FF") && + s5.value.equals(sw5.value) && + s5.value.equals(literals[5]) && + s5.value.equals(literals[16])); + + test(s6.value.equals("\u03FF") && + s6.value.equals(sw6.value) && + s6.value.equals(literals[6]) && + s6.value.equals(literals[17])); + + test(s7.value.equals("\u05F0") && + s7.value.equals(sw7.value) && + s7.value.equals(literals[7]) && + s7.value.equals(literals[18])); + + test(s8.value.equals("\uD800\uDC00") && + s8.value.equals(sw8.value) && + s8.value.equals(literals[8]) && + s8.value.equals(literals[19])); + + test(s9.value.equals("\uD83C\uDF4C") && + s9.value.equals(sw9.value) && + s9.value.equals(literals[9]) && + s9.value.equals(literals[20])); + + test(s10.value.equals("\u0DA7") && + s10.value.equals(sw10.value) && + s10.value.equals(literals[10]) && + s10.value.equals(literals[21])); + + test(ss0.value.equals("\'\"\u003f\\\u0007\b\f\n\r\t\u000b") && + ss0.value.equals(ss1.value) && + ss0.value.equals(ss2.value) && + ss0.value.equals(literals[22]) && + ss0.value.equals(literals[23]) && + ss0.value.equals(literals[24])); + + test(ss3.value.equals("\\\\U\\u\\") && + ss3.value.equals(literals[25])); + + test(ss4.value.equals("\\A\\") && + ss4.value.equals(literals[26])); + + test(ss5.value.equals("\\u0041\\") && + ss5.value.equals(literals[27])); + + test(su0.value.equals(su1.value) && + su0.value.equals(su2.value) && + su0.value.equals(literals[28]) && + su0.value.equals(literals[29]) && + su0.value.equals(literals[30])); p.ice_ping(); diff --git a/js/test/Ice/operations/Test.ice b/js/test/Ice/operations/Test.ice index 23e3f72c0f2..84e1194c8ae 100644 --- a/js/test/Ice/operations/Test.ice +++ b/js/test/Ice/operations/Test.ice @@ -248,6 +248,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -271,4 +273,69 @@ class MyDerivedClass extends MyClass MyStruct1 opMyStruct1(MyStruct1 opMyStruct1); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/js/test/Ice/operations/Twoways.js b/js/test/Ice/operations/Twoways.js index 8de60ddd95d..4d238a5a0d5 100644 --- a/js/test/Ice/operations/Twoways.js +++ b/js/test/Ice/operations/Twoways.js @@ -42,6 +42,88 @@ ctx.set("two", "TWO"); ctx.set("three", "THREE"); + return prx.opStringLiterals(); + } + ).then( + function(literals) + { + test(Test.s0 == "\\" && + Test.s0 == Test.sw0 && + Test.s0 == literals[0] && + Test.s0 == literals[11]); + + test(Test.s1 == "A" && + Test.s1 == Test.sw1 && + Test.s1 == literals[1] && + Test.s1 == literals[12]); + + test(Test.s2 == "Ice" && + Test.s2 == Test.sw2 && + Test.s2 == literals[2] && + Test.s2 == literals[13]); + + test(Test.s3 == "A21" && + Test.s3 == Test.sw3 && + Test.s3 == literals[3] && + Test.s3 == literals[14]); + + test(Test.s4 == "\\u0041 \\U00000041" && + Test.s4 == Test.sw4 && + Test.s4 == literals[4] && + Test.s4 == literals[15]); + + test(Test.s5 == "\u00FF" && + Test.s5 == Test.sw5 && + Test.s5 == literals[5] && + Test.s5 == literals[16]); + + test(Test.s6 == "\u03FF" && + Test.s6 == Test.sw6 && + Test.s6 == literals[6] && + Test.s6 == literals[17]); + + test(Test.s7 == "\u05F0" && + Test.s7 == Test.sw7 && + Test.s7 == literals[7] && + Test.s7 == literals[18]); + + test(Test.s8 == "\uD800\uDC00" && + Test.s8 == Test.sw8 && + Test.s8 == literals[8] && + Test.s8 == literals[19]); + + test(Test.s9 == "\uD83C\uDF4C" && + Test.s9 == Test.sw9 && + Test.s9 == literals[9] && + Test.s9 == literals[20]); + + test(Test.s10 == "\u0DA7" && + Test.s10 == Test.sw10 && + Test.s10 == literals[10] && + Test.s10 == literals[21]); + + test(Test.ss0 == "\'\"\?\\\u0007\b\f\n\r\t\v" && + Test.ss0 == Test.ss1 && + Test.ss0 == Test.ss2 && + Test.ss0 == literals[22] && + Test.ss0 == literals[23] && + Test.ss0 == literals[24]); + + test(Test.ss3 == "\\\\U\\u\\" && + Test.ss3 == literals[25]); + + test(Test.ss4 == "\\A\\" && + Test.ss4 == literals[26]); + + test(Test.ss5 == "\\u0041\\" && + Test.ss5 == literals[27]); + + test(Test.su0 == Test.su1 && + Test.su0 == Test.su2 && + Test.su0 == literals[28] && + Test.su0 == literals[29] && + Test.su0 == literals[30]); + return prx.ice_ping(); } ).then( diff --git a/php/test/Ice/operations/Client.php b/php/test/Ice/operations/Client.php index 40583fb9e05..da449507589 100644 --- a/php/test/Ice/operations/Client.php +++ b/php/test/Ice/operations/Client.php @@ -41,6 +41,123 @@ function twoways($communicator, $p) $myDerivedClass = $NS ? "Test\\MyDerivedClass" : "Test_MyDerivedClass"; $myClass = $NS ? "Test\\MyClass" : "Test_MyClass"; $objectPrxHelper = $NS ? "Ice\\ObjectPrxHelper" : "Ice_ObjectPrxHelper"; + + + $s0 = $NS ? constant("Test\\s0") : constant("Test_s0"); + $s1 = $NS ? constant("Test\\s1") : constant("Test_s1"); + $s2 = $NS ? constant("Test\\s2") : constant("Test_s2"); + $s3 = $NS ? constant("Test\\s3") : constant("Test_s3"); + $s4 = $NS ? constant("Test\\s4") : constant("Test_s4"); + $s5 = $NS ? constant("Test\\s5") : constant("Test_s5"); + $s6 = $NS ? constant("Test\\s6") : constant("Test_s6"); + $s7 = $NS ? constant("Test\\s7") : constant("Test_s7"); + $s8 = $NS ? constant("Test\\s8") : constant("Test_s8"); + $s9 = $NS ? constant("Test\\s9") : constant("Test_s9"); + $s10 = $NS ? constant("Test\\s10") : constant("Test_s10"); + + $sw0 = $NS ? constant("Test\\sw0") : constant("Test_sw0"); + $sw1 = $NS ? constant("Test\\sw1") : constant("Test_sw1"); + $sw2 = $NS ? constant("Test\\sw2") : constant("Test_sw2"); + $sw3 = $NS ? constant("Test\\sw3") : constant("Test_sw3"); + $sw4 = $NS ? constant("Test\\sw4") : constant("Test_sw4"); + $sw5 = $NS ? constant("Test\\sw5") : constant("Test_sw5"); + $sw6 = $NS ? constant("Test\\sw6") : constant("Test_sw6"); + $sw7 = $NS ? constant("Test\\sw7") : constant("Test_sw7"); + $sw8 = $NS ? constant("Test\\sw8") : constant("Test_sw8"); + $sw9 = $NS ? constant("Test\\sw9") : constant("Test_sw9"); + $sw10 = $NS ? constant("Test\\sw10") : constant("Test_sw10"); + + $ss0 = $NS ? constant("Test\\ss0") : constant("Test_ss0"); + $ss1 = $NS ? constant("Test\\ss1") : constant("Test_ss1"); + $ss2 = $NS ? constant("Test\\ss2") : constant("Test_ss2"); + $ss3 = $NS ? constant("Test\\ss3") : constant("Test_ss3"); + $ss4 = $NS ? constant("Test\\ss4") : constant("Test_ss4"); + $ss5 = $NS ? constant("Test\\ss5") : constant("Test_ss5"); + + $su0 = $NS ? constant("Test\\su0") : constant("Test_su0"); + $su1 = $NS ? constant("Test\\su1") : constant("Test_su1"); + $su2 = $NS ? constant("Test\\su2") : constant("Test_su2"); + + { + $literals = $p->opStringLiterals(); + + test($s0 == "\\" && + $s0 == $sw0 && + $s0 == $literals[0] && + $s0 == $literals[11]); + + test($s1 == "A" && + $s1 == $sw1 && + $s1 == $literals[1] && + $s1 == $literals[12]); + + test($s2 == "Ice" && + $s2 == $sw2 && + $s2 == $literals[2] && + $s2 == $literals[13]); + + test($s3 == "A21" && + $s3 == $sw3 && + $s3 == $literals[3] && + $s3 == $literals[14]); + + test($s4 == "\\u0041 \\U00000041" && + $s4 == $sw4 && + $s4 == $literals[4] && + $s4 == $literals[15]); + + test($s5 == "\xc3\xbf" && + $s5 == $sw5 && + $s5 == $literals[5] && + $s5 == $literals[16]); + + test($s6 == "\xcf\xbf" && + $s6 == $sw6 && + $s6 == $literals[6] && + $s6 == $literals[17]); + + test($s7 == "\xd7\xb0" && + $s7 == $sw7 && + $s7 == $literals[7] && + $s7 == $literals[18]); + + test($s8 == "\xf0\x90\x80\x80" && + $s8 == $sw8 && + $s8 == $literals[8] && + $s8 == $literals[19]); + + test($s9 == "\xf0\x9f\x8d\x8c" && + $s9 == $sw9 && + $s9 == $literals[9] && + $s9 == $literals[20]); + + test($s10 == "\xe0\xb6\xa7" && + $s10 == $sw10 && + $s10 == $literals[10] && + $s10 == $literals[21]); + + test($ss0 == "'\"?\\\007\010\f\n\r\t\v" && + $ss0 == $ss1 && + $ss1 == $ss2 && + $ss0 == $literals[22] && + $ss0 == $literals[23] && + $ss0 == $literals[24]); + + test($ss3 == "\\\\U\\u\\" && + $ss3 == $literals[25]); + + test($ss4 == "\\A\\" && + $ss4 == $literals[26]); + + test($ss5 == "\\u0041\\" && + $ss5 == $literals[27]); + + test($su0 == $su1 && + $su0 == $su2 && + $su0 == $literals[28] && + $su0 == $literals[29] && + $su0 == $literals[30]); + } { $p->ice_ping(); diff --git a/php/test/Ice/operations/Test.ice b/php/test/Ice/operations/Test.ice index 4d314fc8e0b..7c37371e0b9 100644 --- a/php/test/Ice/operations/Test.ice +++ b/php/test/Ice/operations/Test.ice @@ -224,6 +224,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; // Test data member with same name as class @@ -249,5 +251,69 @@ class MyDerivedClass extends MyClass MyClass1 opMyClass1(MyClass1 c); }; -}; +// +// String literals +// +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + +}; diff --git a/python/test/Ice/operations/ServerAMD.py b/python/test/Ice/operations/ServerAMD.py index 39e51b08c77..3561221335a 100755 --- a/python/test/Ice/operations/ServerAMD.py +++ b/python/test/Ice/operations/ServerAMD.py @@ -403,6 +403,12 @@ class MyDerivedClassI(Test.MyDerivedClass): def opMyStruct1_async(self, cb, value, current=None): return cb.ice_response(value) + def opStringLiterals_async(self, cb, current=None): + return cb.ice_response([ + Test.s0, Test.s1, Test.s2, Test.s3, Test.s4, Test.s5, Test.s6, Test.s7, Test.s8, Test.s9, Test.s10, + Test.sw0, Test.sw1, Test.sw2, Test.sw3, Test.sw4, Test.sw5, Test.sw6, Test.sw7, Test.sw8, Test.sw9, Test.sw10, + Test.ss0, Test.ss1, Test.ss2, Test.ss3, Test.ss4, Test.ss5, + Test.su0, Test.su1, Test.su2]) def run(args, communicator): communicator.getProperties().setProperty("TestAdapter.Endpoints", "default -p 12010:udp") diff --git a/python/test/Ice/operations/Test.ice b/python/test/Ice/operations/Test.ice index f2a46f54e9b..7f5ec510834 100644 --- a/python/test/Ice/operations/Test.ice +++ b/python/test/Ice/operations/Test.ice @@ -246,6 +246,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -269,4 +271,69 @@ class MyDerivedClass extends MyClass MyStruct1 opMyStruct1(MyStruct1 opMyStruct1); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/python/test/Ice/operations/TestAMD.ice b/python/test/Ice/operations/TestAMD.ice index 022dca4bba4..ba567612d60 100644 --- a/python/test/Ice/operations/TestAMD.ice +++ b/python/test/Ice/operations/TestAMD.ice @@ -244,6 +244,8 @@ dictionary<MyEnum, MyEnumS> MyEnumMyEnumSD; ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -267,4 +269,69 @@ class MyClass1 MyStruct1 opMyStruct1(MyStruct1 opMyStruct1); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/python/test/Ice/operations/TestI.py b/python/test/Ice/operations/TestI.py index 5035d00cb3a..6700fbfa26e 100644 --- a/python/test/Ice/operations/TestI.py +++ b/python/test/Ice/operations/TestI.py @@ -361,5 +361,11 @@ class MyDerivedClassI(Test.MyDerivedClass): def opMyStruct1(self, value, current=None): return value + + def opStringLiterals(self, current=None): + return [Test.s0, Test.s1, Test.s2, Test.s3, Test.s4, Test.s5, Test.s6, Test.s7, Test.s8, Test.s9, Test.s10, + Test.sw0, Test.sw1, Test.sw2, Test.sw3, Test.sw4, Test.sw5, Test.sw6, Test.sw7, Test.sw8, Test.sw9, Test.sw10, + Test.ss0, Test.ss1, Test.ss2, Test.ss3, Test.ss4, Test.ss5, + Test.su0, Test.su1, Test.su2]
\ No newline at end of file diff --git a/python/test/Ice/operations/Twoways.py b/python/test/Ice/operations/Twoways.py index ed0d9e3ae3e..04dcee69f1a 100644 --- a/python/test/Ice/operations/Twoways.py +++ b/python/test/Ice/operations/Twoways.py @@ -14,6 +14,86 @@ def test(b): raise RuntimeError('test assertion failed') def twoways(communicator, p): + + literals = p.opStringLiterals(); + + test(Test.s0 == "\\" and + Test.s0 == Test.sw0 and + Test.s0 == literals[0] and + Test.s0 == literals[11]); + + test(Test.s1 == "A" and + Test.s1 == Test.sw1 and + Test.s1 == literals[1] and + Test.s1 == literals[12]); + + test(Test.s2 == "Ice" and + Test.s2 == Test.sw2 and + Test.s2 == literals[2] and + Test.s2 == literals[13]); + + test(Test.s3 == "A21" and + Test.s3 == Test.sw3 and + Test.s3 == literals[3] and + Test.s3 == literals[14]); + + test(Test.s4 == "\\u0041 \\U00000041" and + Test.s4 == Test.sw4 and + Test.s4 == literals[4] and + Test.s4 == literals[15]); + + test(Test.s5 == "\xc3\xbf" and + Test.s5 == Test.sw5 and + Test.s5 == literals[5] and + Test.s5 == literals[16]); + + test(Test.s6 == "\xcf\xbf" and + Test.s6 == Test.sw6 and + Test.s6 == literals[6] and + Test.s6 == literals[17]); + + test(Test.s7 == "\xd7\xb0" and + Test.s7 == Test.sw7 and + Test.s7 == literals[7] and + Test.s7 == literals[18]); + + test(Test.s8 == "\xf0\x90\x80\x80" and + Test.s8 == Test.sw8 and + Test.s8 == literals[8] and + Test.s8 == literals[19]); + + test(Test.s9 == "\xf0\x9f\x8d\x8c" and + Test.s9 == Test.sw9 and + Test.s9 == literals[9] and + Test.s9 == literals[20]); + + test(Test.s10 == "\xe0\xb6\xa7" and + Test.s10 == Test.sw10 and + Test.s10 == literals[10] and + Test.s10 == literals[21]); + + test(Test.ss0 == "\'\"\x3f\\\a\b\f\n\r\t\v" and + Test.ss0 == Test.ss1 and + Test.ss0 == Test.ss2 and + Test.ss0 == literals[22] and + Test.ss0 == literals[23] and + Test.ss0 == literals[24]); + + test(Test.ss3 == "\\\\U\\u\\" and + Test.ss3 == literals[25]); + + test(Test.ss4 == "\\A\\" and + Test.ss4 == literals[26]); + + test(Test.ss5 == "\\u0041\\" and + Test.ss5 == literals[27]); + + test(Test.su0 == Test.su1 and + Test.su0 == Test.su2 and + Test.su0 == literals[28] and + Test.su0 == literals[29] and + Test.su0 == literals[30]); + # # ice_ping # diff --git a/ruby/test/Ice/operations/Test.ice b/ruby/test/Ice/operations/Test.ice index 8964103ae90..63410b8f61a 100644 --- a/ruby/test/Ice/operations/Test.ice +++ b/ruby/test/Ice/operations/Test.ice @@ -245,6 +245,8 @@ class MyClass ByteBoolD opByteBoolD1(ByteBoolD opByteBoolD1); StringS opStringS2(StringS stringS); ByteBoolD opByteBoolD2(ByteBoolD byteBoolD); + + StringS opStringLiterals(); }; struct MyStruct1 @@ -268,4 +270,69 @@ class MyDerivedClass extends MyClass MyStruct1 opMyStruct1(MyStruct1 opMyStruct1); }; +// +// String literals +// + +const string s0 = "\u005c"; // backslash +const string s1 = "\u0041"; // A +const string s2 = "\u0049\u0063\u0065"; // Ice +const string s3 = "\u004121"; // A21 +const string s4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string s5 = "\u00FF"; // ÿ +const string s6 = "\u03FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string s7 = "\u05F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string s8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string s9 = "\U0001F34C"; // BANANA (U+1F34C) +const string s10 = "\u0DA7"; // Sinhala Letter Alpapraana Ttayanna + +const string sw0 = "\U0000005c"; // backslash +const string sw1 = "\U00000041"; // A +const string sw2 = "\U00000049\U00000063\U00000065"; // Ice +const string sw3 = "\U0000004121"; // A21 +const string sw4 = "\\u0041 \\U00000041"; // \\u0041 \\U00000041 +const string sw5 = "\U000000FF"; // ÿ +const string sw6 = "\U000003FF"; // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL (U+03FF) +const string sw7 = "\U000005F0"; // HEBREW LIGATURE YIDDISH DOUBLE VAV (U+05F0) +const string sw8 = "\U00010000"; // LINEAR B SYLLABLE B008 A (U+10000) +const string sw9 = "\U0001F34C"; // BANANA (U+1F34C) +const string sw10 = "\U00000DA7"; // Sinhala Letter Alpapraana Ttayanna + +/** +\' single quote byte 0x27 in ASCII encoding +\" double quote byte 0x22 in ASCII encoding +\? question mark byte 0x3f in ASCII encoding +\\ backslash byte 0x5c in ASCII encoding +\a audible bell byte 0x07 in ASCII encoding +\b backspace byte 0x08 in ASCII encoding +\f form feed - new page byte 0x0c in ASCII encoding +\n line feed - new line byte 0x0a in ASCII encoding +\r carriage return byte 0x0d in ASCII encoding +\t horizontal tab byte 0x09 in ASCII encoding +\v vertical tab byte 0x0b in ASCII encoding +**/ + +const string ss0 = "\'\"\?\\\a\b\f\n\r\t\v"; +const string ss1 = "\u0027\u0022\u003f\u005c\u0007\u0008\u000c\u000a\u000d\u0009\u000b"; +const string ss2 = "\U00000027\U00000022\U0000003f\U0000005c\U00000007\U00000008\U0000000c\U0000000a\U0000000d\U00000009\U0000000b"; + +const string ss3 = "\\\\U\\u\\"; /* \\U\u\ */ +const string ss4 = "\\\u0041\\"; /* \A\ */ +const string ss5 = "\\u0041\\"; /* \u0041\ */ + +// +// ÿ - Unicode Character 'LATIN SMALL LETTER Y WITH DIAERESIS' (U+00FF) +// Ā - Unicode Character 'LATIN CAPITAL LETTER A WITH MACRON' (U+0100) +// ἀ - Unicode Character 'GREEK SMALL LETTER ALPHA WITH PSILI' (U+1F00) +// 𐆔 - Unicode Character 'ROMAN DIMIDIA SEXTULA SIGN' (U+10194) +// 𐅪 - Unicode Character 'GREEK ACROPHONIC THESPIAN ONE HUNDRED' (U+1016A) +// 𐆘 - Unicode Character 'ROMAN SESTERTIUS SIGN' (U+10198) +// 🍀 - Unicode Character 'FOUR LEAF CLOVER' (U+1F340) +// 🍁 - Unicode Character 'MAPLE LEAF' (U+1F341) +// 🍂 - Unicode Character 'FALLEN LEAF' (U+1F342) +// 🍃 - Unicode Character 'LEAF FLUTTERING IN WIND' (U+1F343) +const string su0 = "ÿĀἀ𐆔𐅪𐆘🍀🍁🍂🍃"; +const string su1 = "\u00FF\u0100\u1F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; +const string su2 = "\U000000FF\U00000100\U00001F00\U00010194\U0001016A\U00010198\U0001F340\U0001F341\U0001F342\U0001F343"; + }; diff --git a/ruby/test/Ice/operations/Twoways.rb b/ruby/test/Ice/operations/Twoways.rb index 1bfb26892b7..536ec9e1e90 100644 --- a/ruby/test/Ice/operations/Twoways.rb +++ b/ruby/test/Ice/operations/Twoways.rb @@ -8,6 +8,86 @@ # ********************************************************************** def twoways(communicator, p) + + literals = p.opStringLiterals(); + + test(Test::S0 == "\\" && + Test::S0 == Test::Sw0 && + Test::S0 == literals[0] && + Test::S0 == literals[11]); + + test(Test::S1 == "A" && + Test::S1 == Test::Sw1 && + Test::S1 == literals[1] && + Test::S1 == literals[12]); + + test(Test::S2 == "Ice" && + Test::S2 == Test::Sw2 && + Test::S2 == literals[2] && + Test::S2 == literals[13]); + + test(Test::S3 == "A21" && + Test::S3 == Test::Sw3 && + Test::S3 == literals[3] && + Test::S3 == literals[14]); + + test(Test::S4 == "\\u0041 \\U00000041" && + Test::S4 == Test::Sw4 && + Test::S4 == literals[4] && + Test::S4 == literals[15]); + + test(Test::S5 == "\xC3\xBF"); + test(Test::S5 == Test::Sw5); + test(Test::S5 == literals[5]); + test(Test::S5 == literals[16]); + + test(Test::S6 == "\xcf\xbf" && + Test::S6 == Test::Sw6 && + Test::S6 == literals[6] && + Test::S6 == literals[17]); + + test(Test::S7 == "\xd7\xb0"); + test(Test::S7 == Test::Sw7); + test(Test::S7 == literals[7]); + test(Test::S7 == literals[18]); + + test(Test::S8 == "\xf0\x90\x80\x80" && + Test::S8 == Test::Sw8 && + Test::S8 == literals[8] && + Test::S8 == literals[19]); + + test(Test::S9 == "\xf0\x9f\x8d\x8c" && + Test::S9 == Test::Sw9 && + Test::S9 == literals[9] && + Test::S9 == literals[20]); + + test(Test::S10 == "\xe0\xb6\xa7" && + Test::S10 == Test::Sw10 && + Test::S10 == literals[10] && + Test::S10 == literals[21]); + + test(Test::Ss0 == "\'\"\x3f\\\a\b\f\n\r\t\v" && + Test::Ss0 == Test::Ss1 && + Test::Ss0 == Test::Ss2 && + Test::Ss0 == literals[22] && + Test::Ss0 == literals[23] && + Test::Ss0 == literals[24]); + + test(Test::Ss3 == "\\\\U\\u\\" && + Test::Ss3 == literals[25]); + + test(Test::Ss4 == "\\A\\" && + Test::Ss4 == literals[26]); + + test(Test::Ss5 == "\\u0041\\" && + Test::Ss5 == literals[27]); + + test(Test::Su0 == Test::Su1 && + Test::Su0 == Test::Su2 && + Test::Su0 == literals[28] && + Test::Su0 == literals[29] && + Test::Su0 == literals[30]); + # # ice_ping # |