diff options
author | Jose <jose@zeroc.com> | 2016-03-10 09:33:14 +0100 |
---|---|---|
committer | Jose <jose@zeroc.com> | 2016-03-10 09:33:14 +0100 |
commit | 709f8a26f7ac03bed5a4d1a0b9de6c4e9d14f806 (patch) | |
tree | 0665aa227fce4c4c369619fb2ea1d9f718979655 /cpp/src | |
parent | Windows PHP build fixes (diff) | |
download | ice-709f8a26f7ac03bed5a4d1a0b9de6c4e9d14f806.tar.bz2 ice-709f8a26f7ac03bed5a4d1a0b9de6c4e9d14f806.tar.xz ice-709f8a26f7ac03bed5a4d1a0b9de6c4e9d14f806.zip |
String literals fixes
- Fixed escape sequences in C++ wide strings
- Fixed objetive-c escape sequences
- Update ruby to use magic comments to set the file encoding
Diffstat (limited to 'cpp/src')
-rw-r--r-- | cpp/src/IceUtil/Unicode.cpp | 18 | ||||
-rw-r--r-- | cpp/src/IceUtil/Unicode.h | 4 | ||||
-rw-r--r-- | cpp/src/Slice/Ruby.cpp | 5 | ||||
-rw-r--r-- | cpp/src/Slice/RubyUtil.cpp | 2 | ||||
-rw-r--r-- | cpp/src/slice2cpp/Gen.cpp | 255 | ||||
-rw-r--r-- | cpp/src/slice2objc/Gen.cpp | 100 |
6 files changed, 296 insertions, 88 deletions
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp index 7bad1d67c17..ca36a912b47 100644 --- a/cpp/src/IceUtil/Unicode.cpp +++ b/cpp/src/IceUtil/Unicode.cpp @@ -147,6 +147,24 @@ IceUtilInternal::convertUTF8ToUTF16(const vector<unsigned char>& source, vector< } ConversionResult +IceUtilInternal::convertUTF8ToUTF32(const vector<unsigned char>& source, vector<unsigned int>& target, ConversionFlags flags) +{ + target.resize(source.size()); + const unsigned char* sourceStart = &source[0]; + const unsigned char* sourceEnd = &source[0] + source.size(); + + unsigned int* targetStart = &target[0]; + unsigned int* targetEnd = &target[0] + target.size(); + ConversionResult result = ConvertUTF8toUTF32(&sourceStart, sourceEnd, &targetStart, targetEnd, flags); + + if(result == conversionOK) + { + target.resize(targetStart - &target[0]); + } + return result; +} + +ConversionResult IceUtilInternal::convertUTF32ToUTF8(const vector<unsigned int>& source, vector<unsigned char>& target, ConversionFlags flags) { target.resize(source.size() * 4); diff --git a/cpp/src/IceUtil/Unicode.h b/cpp/src/IceUtil/Unicode.h index 2c96d6c6448..d5c3b235ddb 100644 --- a/cpp/src/IceUtil/Unicode.h +++ b/cpp/src/IceUtil/Unicode.h @@ -50,6 +50,10 @@ convertUTF8ToUTF16(const std::vector<unsigned char>&, std::vector<unsigned short IceUtil::ConversionFlags); ICE_UTIL_API ConversionResult +convertUTF8ToUTF32(const std::vector<unsigned char>&, std::vector<unsigned int>&, + IceUtil::ConversionFlags); + +ICE_UTIL_API ConversionResult convertUTF32ToUTF8(const std::vector<unsigned int>&, std::vector<unsigned char>&, IceUtil::ConversionFlags); diff --git a/cpp/src/Slice/Ruby.cpp b/cpp/src/Slice/Ruby.cpp index 62daa345cbf..209711a679d 100644 --- a/cpp/src/Slice/Ruby.cpp +++ b/cpp/src/Slice/Ruby.cpp @@ -301,7 +301,10 @@ Slice::Ruby::compile(int argc, char* argv[]) throw FileException(__FILE__, __LINE__, os.str()); } FileTracker::instance()->addFile(file); - + // + // Ruby magic comment to set the file encoding, it must be first or second line + // + out << "# encoding: utf-8\n"; printHeader(out); printGeneratedHeader(out, base + ".ice", "#"); diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp index 38bbeb5da19..3639a53b185 100644 --- a/cpp/src/Slice/RubyUtil.cpp +++ b/cpp/src/Slice/RubyUtil.cpp @@ -1601,7 +1601,7 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr ++i; } - _out << "\".force_encoding(\"utf-8\")"; // Closing " + _out << "\""; // Closing " break; } diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp index d93c457bf9e..5df28c48df2 100644 --- a/cpp/src/slice2cpp/Gen.cpp +++ b/cpp/src/slice2cpp/Gen.cpp @@ -32,6 +32,45 @@ namespace { string +u32CodePoint(unsigned int value) +{ + ostringstream s; + s << "\\U"; + s << hex; + s.width(8); + s.fill('0'); + s << value; + return s.str(); +} + + +void +writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out) +{ + vector<unsigned int> u32buffer; + IceUtilInternal::ConversionResult result = convertUTF8ToUTF32(u8buffer, u32buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c) + { + out << u32CodePoint(*c); + } +} + +string getDeprecateSymbol(const ContainedPtr& p1, const ContainedPtr& p2) { string deprecateMetadata, deprecateSymbol; @@ -74,115 +113,173 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt if((useWstring & TypeContextUseWstring) || findMetaData(metaData) == "wstring") { - out << 'L'; - } - out << "\""; // Opening " + // + // Wide strings + // + vector<unsigned char> u8buffer; // Buffer to convert multibyte characters - for(size_t i = 0; i < value.size();) - { - if(charSet.find(value[i]) == charSet.end()) + out << "L\""; + for(size_t i = 0; i < value.size();) + { + if(charSet.find(value[i]) == charSet.end()) + { + if(static_cast<unsigned char>(value[i]) < 128) // Single byte character + { + // + // Print as unicode if not in basic source character set + // + out << u32CodePoint(static_cast<unsigned int>(value[i])); + } + else + { + u8buffer.push_back(value[i]); + } + } + else + { + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) + { + writeU8Buffer(u8buffer, out); + u8buffer.clear(); + } + + switch(value[i]) + { + case '"': + { + out << "\\"; + break; + } + } + + out << value[i]; // Print normally if in basic source character set + } + i++; + + } + + // + // Write any pedding characters in the utf8 buffer + // + if(!u8buffer.empty()) { - unsigned char uc = value[i]; // char may be signed, so make it positive - ostringstream s; - s << "\\"; // Print as octal if not in basic source character set - s.width(3); - s.fill('0'); - s << oct; - s << static_cast<unsigned>(uc); - out << s.str(); + writeU8Buffer(u8buffer, out); + u8buffer.clear(); } - else + out << "\""; + } + else // narrow strings + { + out << "\""; // Opening " + + for(size_t i = 0; i < value.size();) { - switch(value[i]) + if(charSet.find(value[i]) == charSet.end()) + { + unsigned char uc = value[i]; // char may be signed, so make it positive + ostringstream s; + s << "\\"; // Print as octal if not in basic source character set + s.width(3); + s.fill('0'); + s << oct; + s << static_cast<unsigned>(uc); + out << s.str(); + } + else { - case '\\': + switch(value[i]) { - string s = "\\"; - size_t j = i + 1; - for(; j < value.size(); ++j) + case '\\': { - if(value[j] != '\\') + string s = "\\"; + size_t j = i + 1; + for(; j < value.size(); ++j) { - break; + if(value[j] != '\\') + { + break; + } + s += "\\"; } - s += "\\"; - } - // - // An even number of slash \ will escape the backslash and - // the codepoint will be interpreted as its charaters - // - // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] - // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') - // - if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) - { // - // Convert codepoint to UTF8 bytes and write the escaped bytes + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') // - out << s.substr(0, s.size() - 1); + if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) + { + // + // Convert codepoint to UTF8 bytes and write the escaped bytes + // + out << s.substr(0, s.size() - 1); - size_t sz = value[j] == 'U' ? 8 : 4; - string codepoint = value.substr(j + 1, sz); - assert(codepoint.size() == sz); + size_t sz = value[j] == 'U' ? 8 : 4; + string codepoint = value.substr(j + 1, sz); + assert(codepoint.size() == sz); - IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); - vector<unsigned int> u32buffer; - u32buffer.push_back(static_cast<unsigned int>(v)); + vector<unsigned int> u32buffer; + u32buffer.push_back(static_cast<unsigned int>(v)); - vector<unsigned char> u8buffer; + vector<unsigned char> u8buffer; - IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion); - switch(result) - { - case conversionOK: - break; - case sourceExhausted: - throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); - case sourceIllegal: - throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); - default: + IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion); + switch(result) { - assert(0); - throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } } - } - ostringstream s; - for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) + ostringstream s; + for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) + { + s << "\\"; + s.fill('0'); + s.width(3); + s << oct; + s << static_cast<unsigned int>(*q); + } + out << s.str(); + + i = j + 1 + sz; + } + else { - s << "\\"; - s.fill('0'); - s.width(3); - s << oct; - s << static_cast<unsigned int>(*q); + out << s; + i = j; } - out << s.str(); - - i = j + 1 + sz; + continue; } - else + case '"': { - out << s; - i = j; + out << "\\"; + break; } - continue; - } - case '"': - { - out << "\\"; - break; } + + out << value[i]; // Print normally if in basic source character set } - - out << value[i]; // Print normally if in basic source character set + ++i; } - ++i; + out << "\""; // Closing " } - - out << "\""; // Closing " } else if(bp && bp->kind() == Builtin::KindLong) { diff --git a/cpp/src/slice2objc/Gen.cpp b/cpp/src/slice2objc/Gen.cpp index 283efc935e3..1363779f8e9 100644 --- a/cpp/src/slice2objc/Gen.cpp +++ b/cpp/src/slice2objc/Gen.cpp @@ -17,6 +17,8 @@ #include <direct.h> #endif #include <IceUtil/Iterator.h> +#include <IceUtil/Unicode.h> +#include <IceUtil/InputUtil.h> #include <IceUtil/UUID.h> #include <Slice/Checksum.h> #include <Slice/FileTracker.h> @@ -1492,13 +1494,13 @@ Slice::Gen::TypesVisitor::writeConstantValue(IceUtilInternal::Output& out, const out << "@\""; // Opening @" - for(string::const_iterator c = val.begin(); c != val.end(); ++c) + for(size_t i = 0; i < val.size();) { - if(charSet.find(*c) == charSet.end()) + if(charSet.find(val[i]) == charSet.end()) { - unsigned char uc = *c; // char may be signed, so make it positive + unsigned char uc = val[i]; // char may be signed, so make it positive ostringstream s; - s << "\\"; // Print as octal if not in basic source character set + s << "\\"; // Print as octal if not in basic source character set s.width(3); s.fill('0'); s << oct; @@ -1507,11 +1509,95 @@ Slice::Gen::TypesVisitor::writeConstantValue(IceUtilInternal::Output& out, const } else { - out << *c; // Print normally if in basic source character set + switch(val[i]) + { + case '\\': + { + string s = "\\"; + size_t j = i + 1; + for(; j < val.size(); ++j) + { + if(val[j] != '\\') + { + break; + } + s += "\\"; + } + + // + // An even number of slash \ will escape the backslash and + // the codepoint will be interpreted as its charaters + // + // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] + // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') + // + if(s.size() % 2 != 0 && (val[j] == 'U' || val[j] == 'u')) + { + // + // Convert codepoint to UTF8 bytes and write the escaped bytes + // + out << s.substr(0, s.size() - 1); + + size_t sz = val[j] == 'U' ? 8 : 4; + string codepoint = val.substr(j + 1, sz); + assert(codepoint.size() == sz); + + IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); + + + vector<unsigned int> u32buffer; + u32buffer.push_back(static_cast<unsigned int>(v)); + + vector<unsigned char> u8buffer; + + IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion); + switch(result) + { + case conversionOK: + break; + case sourceExhausted: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted"); + case sourceIllegal: + throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal"); + default: + { + assert(0); + throw IceUtil::IllegalConversionException(__FILE__, __LINE__); + } + } + + ostringstream s; + for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) + { + s << "\\"; + s.fill('0'); + s.width(3); + s << oct; + s << static_cast<unsigned int>(*q); + } + out << s.str(); + + i = j + 1 + sz; + } + else + { + out << s; + i = j; + } + continue; + } + case '"': + { + out << "\\"; + break; + } + } + + out << val[i]; // Print normally if in basic source character set } + ++i; } - - out << "\""; // Closing " + out << "\""; // Closing " } else { |