diff options
author | Jose <jose@zeroc.com> | 2016-03-08 13:46:55 +0100 |
---|---|---|
committer | Jose <jose@zeroc.com> | 2016-03-08 13:46:55 +0100 |
commit | 2bd402833bfdb54c1940dd0038be8af05d6f5e6f (patch) | |
tree | eb7be3853dc45452397b730e586434f6e859efb3 /cpp/src/slice2cs | |
parent | Windows fixes for icegriddb/icestormdb (diff) | |
download | ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.bz2 ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.tar.xz ice-2bd402833bfdb54c1940dd0038be8af05d6f5e6f.zip |
ICE-6991 - Add support for unicode escape sequences
Diffstat (limited to 'cpp/src/slice2cs')
-rw-r--r-- | cpp/src/slice2cs/Gen.cpp | 152 | ||||
-rw-r--r-- | cpp/src/slice2cs/Makefile | 2 | ||||
-rw-r--r-- | cpp/src/slice2cs/Makefile.mak | 2 |
3 files changed, 138 insertions, 18 deletions
diff --git a/cpp/src/slice2cs/Gen.cpp b/cpp/src/slice2cs/Gen.cpp index cf09aa9df1b..e537f90f3ea 100644 --- a/cpp/src/slice2cs/Gen.cpp +++ b/cpp/src/slice2cs/Gen.cpp @@ -10,6 +10,7 @@ #include <IceUtil/DisableWarnings.h> #include <IceUtil/Functional.h> #include <IceUtil/StringUtil.h> +#include <IceUtil/InputUtil.h> #include <Gen.h> #include <limits> #include <sys/stat.h> @@ -20,6 +21,7 @@ #endif #include <IceUtil/Iterator.h> #include <IceUtil/UUID.h> +#include <IceUtil/Unicode.h> #include <Slice/Checksum.h> #include <Slice/DotNetNames.h> #include <Slice/FileTracker.h> @@ -35,6 +37,45 @@ namespace { string +u16CodePoint(unsigned short value) +{ + ostringstream s; + s << "\\u"; + s << hex; + s.width(4); + s.fill('0'); + s << value; + return s.str(); +} + + +void +writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out) +{ + vector<unsigned short> u16buffer; + IceUtilInternal::ConversionResult result = convertUTF8ToUTF16(u8buffer, u16buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c) + { + out << u16CodePoint(*c); + } +} + +string sliceModeToIceMode(Operation::Mode opMode) { string mode; @@ -1983,41 +2024,120 @@ Slice::CsVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePt // here because they are sensitive to the current locale. // static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "0123456789" - "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' "; + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' "; + static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end()); _out << "\""; // Opening " - for(string::const_iterator c = value.begin(); c != value.end(); ++c) + vector<unsigned char> u8buffer; // Buffer to convert multibyte characters + + for(size_t i = 0; i < value.size();) { - if(charSet.find(*c) == charSet.end()) + if(charSet.find(value[i]) == charSet.end()) { - unsigned char uc = *c; // char may be signed, so make it positive - ostringstream s; - s << "\\u"; // Print as unicode if not in basic source character set - s << hex; - s.width(4); - s.fill('0'); - s << static_cast<unsigned>(uc); - _out << s.str(); + if(static_cast<unsigned char>(value[i]) < 128) // Single byte character + { + // + // Print as unicode if not in basic source character set + // + _out << u16CodePoint(static_cast<unsigned int>(value[i])); + } + else + { + u8buffer.push_back(value[i]); + } } else { - switch(*c) + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, _out); + u8buffer.clear(); + } + switch(value[i]) { case '\\': + { + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) + { + if(value[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && value[j] == 'U') + { + _out << s.substr(0, s.size() - 1); + i = j + 1; + + string codepoint = value.substr(j + 1, 8); + assert(codepoint.size() == 8); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + + // + // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal + // and is represented using a Unicode surrogate pair. + // + if(v > 0xFFFF) + { + unsigned int high = ((v - 0x10000) / 0x400) + 0xD800; + unsigned int low = ((v - 0x10000) % 0x400) + 0xDC00; + _out << u16CodePoint(high); + _out << u16CodePoint(low); + } + else + { + _out << "\\U" << codepoint; + } + + i = j + 1 + 8; + } + else + { + _out << s; + i = j; + } + continue; + } case '"': { _out << "\\"; break; } } - _out << *c; // Print normally if in basic source character set + _out << value[i]; // Print normally if in basic source character set } + i++; } - + + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, _out); + u8buffer.clear(); + } + _out << "\""; // Closing " } else if(bp && bp->kind() == Builtin::KindLong) diff --git a/cpp/src/slice2cs/Makefile b/cpp/src/slice2cs/Makefile index e51e24c0445..e46c1005dd6 100644 --- a/cpp/src/slice2cs/Makefile +++ b/cpp/src/slice2cs/Makefile @@ -20,7 +20,7 @@ RPATH_DIR = $(LOADER_PATH)/../$(libsubdir) include $(top_srcdir)/config/Make.rules -CPPFLAGS := -I. $(CPPFLAGS) +CPPFLAGS := -I. -I.. $(CPPFLAGS) $(NAME): $(OBJS) rm -f $@ diff --git a/cpp/src/slice2cs/Makefile.mak b/cpp/src/slice2cs/Makefile.mak index 0a422c08b34..ac748e4fabd 100644 --- a/cpp/src/slice2cs/Makefile.mak +++ b/cpp/src/slice2cs/Makefile.mak @@ -18,7 +18,7 @@ OBJS = .\Gen.obj \ !include $(top_srcdir)/config/Make.rules.mak -CPPFLAGS = -I. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN +CPPFLAGS = -I. -I.. $(CPPFLAGS) -DWIN32_LEAN_AND_MEAN !if "$(GENERATE_PDB)" == "yes" PDBFLAGS = /pdb:$(NAME:.exe=.pdb) |