summaryrefslogtreecommitdiff
path: root/cpp/src
diff options
context:
space:
mode:
Diffstat (limited to 'cpp/src')
-rw-r--r--cpp/src/IceUtil/Unicode.cpp18
-rw-r--r--cpp/src/IceUtil/Unicode.h4
-rw-r--r--cpp/src/Slice/Ruby.cpp5
-rw-r--r--cpp/src/Slice/RubyUtil.cpp2
-rw-r--r--cpp/src/slice2cpp/Gen.cpp255
-rw-r--r--cpp/src/slice2objc/Gen.cpp100
6 files changed, 296 insertions, 88 deletions
diff --git a/cpp/src/IceUtil/Unicode.cpp b/cpp/src/IceUtil/Unicode.cpp
index 7bad1d67c17..ca36a912b47 100644
--- a/cpp/src/IceUtil/Unicode.cpp
+++ b/cpp/src/IceUtil/Unicode.cpp
@@ -147,6 +147,24 @@ IceUtilInternal::convertUTF8ToUTF16(const vector<unsigned char>& source, vector<
}
ConversionResult
+IceUtilInternal::convertUTF8ToUTF32(const vector<unsigned char>& source, vector<unsigned int>& target, ConversionFlags flags)
+{
+ target.resize(source.size());
+ const unsigned char* sourceStart = &source[0];
+ const unsigned char* sourceEnd = &source[0] + source.size();
+
+ unsigned int* targetStart = &target[0];
+ unsigned int* targetEnd = &target[0] + target.size();
+ ConversionResult result = ConvertUTF8toUTF32(&sourceStart, sourceEnd, &targetStart, targetEnd, flags);
+
+ if(result == conversionOK)
+ {
+ target.resize(targetStart - &target[0]);
+ }
+ return result;
+}
+
+ConversionResult
IceUtilInternal::convertUTF32ToUTF8(const vector<unsigned int>& source, vector<unsigned char>& target, ConversionFlags flags)
{
target.resize(source.size() * 4);
diff --git a/cpp/src/IceUtil/Unicode.h b/cpp/src/IceUtil/Unicode.h
index 2c96d6c6448..d5c3b235ddb 100644
--- a/cpp/src/IceUtil/Unicode.h
+++ b/cpp/src/IceUtil/Unicode.h
@@ -50,6 +50,10 @@ convertUTF8ToUTF16(const std::vector<unsigned char>&, std::vector<unsigned short
IceUtil::ConversionFlags);
ICE_UTIL_API ConversionResult
+convertUTF8ToUTF32(const std::vector<unsigned char>&, std::vector<unsigned int>&,
+ IceUtil::ConversionFlags);
+
+ICE_UTIL_API ConversionResult
convertUTF32ToUTF8(const std::vector<unsigned int>&, std::vector<unsigned char>&,
IceUtil::ConversionFlags);
diff --git a/cpp/src/Slice/Ruby.cpp b/cpp/src/Slice/Ruby.cpp
index 62daa345cbf..209711a679d 100644
--- a/cpp/src/Slice/Ruby.cpp
+++ b/cpp/src/Slice/Ruby.cpp
@@ -301,7 +301,10 @@ Slice::Ruby::compile(int argc, char* argv[])
throw FileException(__FILE__, __LINE__, os.str());
}
FileTracker::instance()->addFile(file);
-
+ //
+ // Ruby magic comment to set the file encoding, it must be first or second line
+ //
+ out << "# encoding: utf-8\n";
printHeader(out);
printGeneratedHeader(out, base + ".ice", "#");
diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp
index 38bbeb5da19..3639a53b185 100644
--- a/cpp/src/Slice/RubyUtil.cpp
+++ b/cpp/src/Slice/RubyUtil.cpp
@@ -1601,7 +1601,7 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr
++i;
}
- _out << "\".force_encoding(\"utf-8\")"; // Closing "
+ _out << "\""; // Closing "
break;
}
diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp
index d93c457bf9e..5df28c48df2 100644
--- a/cpp/src/slice2cpp/Gen.cpp
+++ b/cpp/src/slice2cpp/Gen.cpp
@@ -32,6 +32,45 @@ namespace
{
string
+u32CodePoint(unsigned int value)
+{
+ ostringstream s;
+ s << "\\U";
+ s << hex;
+ s.width(8);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
+{
+ vector<unsigned int> u32buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF32(u8buffer, u32buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c)
+ {
+ out << u32CodePoint(*c);
+ }
+}
+
+string
getDeprecateSymbol(const ContainedPtr& p1, const ContainedPtr& p2)
{
string deprecateMetadata, deprecateSymbol;
@@ -74,115 +113,173 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
if((useWstring & TypeContextUseWstring) || findMetaData(metaData) == "wstring")
{
- out << 'L';
- }
- out << "\""; // Opening "
+ //
+ // Wide strings
+ //
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
+ out << "L\"";
+ for(size_t i = 0; i < value.size();)
+ {
+ if(charSet.find(value[i]) == charSet.end())
+ {
+ if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
+ {
+ //
+ // Print as unicode if not in basic source character set
+ //
+ out << u32CodePoint(static_cast<unsigned int>(value[i]));
+ }
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
+ }
+ else
+ {
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
+ }
+
+ switch(value[i])
+ {
+ case '"':
+ {
+ out << "\\";
+ break;
+ }
+ }
+
+ out << value[i]; // Print normally if in basic source character set
+ }
+ i++;
+
+ }
+
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
{
- unsigned char uc = value[i]; // char may be signed, so make it positive
- ostringstream s;
- s << "\\"; // Print as octal if not in basic source character set
- s.width(3);
- s.fill('0');
- s << oct;
- s << static_cast<unsigned>(uc);
- out << s.str();
+ writeU8Buffer(u8buffer, out);
+ u8buffer.clear();
}
- else
+ out << "\"";
+ }
+ else // narrow strings
+ {
+ out << "\""; // Opening "
+
+ for(size_t i = 0; i < value.size();)
{
- switch(value[i])
+ if(charSet.find(value[i]) == charSet.end())
+ {
+ unsigned char uc = value[i]; // char may be signed, so make it positive
+ ostringstream s;
+ s << "\\"; // Print as octal if not in basic source character set
+ s.width(3);
+ s.fill('0');
+ s << oct;
+ s << static_cast<unsigned>(uc);
+ out << s.str();
+ }
+ else
{
- case '\\':
+ switch(value[i])
{
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
+ case '\\':
{
- if(value[j] != '\\')
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
{
- break;
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
}
- s += "\\";
- }
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
//
- // Convert codepoint to UTF8 bytes and write the escaped bytes
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
//
- out << s.substr(0, s.size() - 1);
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ out << s.substr(0, s.size() - 1);
- size_t sz = value[j] == 'U' ? 8 : 4;
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
- vector<unsigned int> u32buffer;
- u32buffer.push_back(static_cast<unsigned int>(v));
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(static_cast<unsigned int>(v));
- vector<unsigned char> u8buffer;
+ vector<unsigned char> u8buffer;
- IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
- switch(result)
- {
- case conversionOK:
- break;
- case sourceExhausted:
- throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
- case sourceIllegal:
- throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
- default:
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
{
- assert(0);
- throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
}
- }
- ostringstream s;
- for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
{
- s << "\\";
- s.fill('0');
- s.width(3);
- s << oct;
- s << static_cast<unsigned int>(*q);
+ out << s;
+ i = j;
}
- out << s.str();
-
- i = j + 1 + sz;
+ continue;
}
- else
+ case '"':
{
- out << s;
- i = j;
+ out << "\\";
+ break;
}
- continue;
- }
- case '"':
- {
- out << "\\";
- break;
}
+
+ out << value[i]; // Print normally if in basic source character set
}
-
- out << value[i]; // Print normally if in basic source character set
+ ++i;
}
- ++i;
+ out << "\""; // Closing "
}
-
- out << "\""; // Closing "
}
else if(bp && bp->kind() == Builtin::KindLong)
{
diff --git a/cpp/src/slice2objc/Gen.cpp b/cpp/src/slice2objc/Gen.cpp
index 283efc935e3..1363779f8e9 100644
--- a/cpp/src/slice2objc/Gen.cpp
+++ b/cpp/src/slice2objc/Gen.cpp
@@ -17,6 +17,8 @@
#include <direct.h>
#endif
#include <IceUtil/Iterator.h>
+#include <IceUtil/Unicode.h>
+#include <IceUtil/InputUtil.h>
#include <IceUtil/UUID.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -1492,13 +1494,13 @@ Slice::Gen::TypesVisitor::writeConstantValue(IceUtilInternal::Output& out, const
out << "@\""; // Opening @"
- for(string::const_iterator c = val.begin(); c != val.end(); ++c)
+ for(size_t i = 0; i < val.size();)
{
- if(charSet.find(*c) == charSet.end())
+ if(charSet.find(val[i]) == charSet.end())
{
- unsigned char uc = *c; // char may be signed, so make it positive
+ unsigned char uc = val[i]; // char may be signed, so make it positive
ostringstream s;
- s << "\\"; // Print as octal if not in basic source character set
+ s << "\\"; // Print as octal if not in basic source character set
s.width(3);
s.fill('0');
s << oct;
@@ -1507,11 +1509,95 @@ Slice::Gen::TypesVisitor::writeConstantValue(IceUtilInternal::Output& out, const
}
else
{
- out << *c; // Print normally if in basic source character set
+ switch(val[i])
+ {
+ case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < val.size(); ++j)
+ {
+ if(val[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (val[j] == 'U' || val[j] == 'u'))
+ {
+ //
+ // Convert codepoint to UTF8 bytes and write the escaped bytes
+ //
+ out << s.substr(0, s.size() - 1);
+
+ size_t sz = val[j] == 'U' ? 8 : 4;
+ string codepoint = val.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+
+
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(static_cast<unsigned int>(v));
+
+ vector<unsigned char> u8buffer;
+
+ IceUtilInternal::ConversionResult result = convertUTF32ToUTF8(u32buffer, u8buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ ostringstream s;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ s << "\\";
+ s.fill('0');
+ s.width(3);
+ s << oct;
+ s << static_cast<unsigned int>(*q);
+ }
+ out << s.str();
+
+ i = j + 1 + sz;
+ }
+ else
+ {
+ out << s;
+ i = j;
+ }
+ continue;
+ }
+ case '"':
+ {
+ out << "\\";
+ break;
+ }
+ }
+
+ out << val[i]; // Print normally if in basic source character set
}
+ ++i;
}
-
- out << "\""; // Closing "
+ out << "\""; // Closing "
}
else
{