summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpp/src/Slice/PythonUtil.cpp189
-rw-r--r--python/test/Ice/operations/Twoways.py147
2 files changed, 249 insertions, 87 deletions
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp
index 0caebc4ef4d..997942012b2 100644
--- a/cpp/src/Slice/PythonUtil.cpp
+++ b/cpp/src/Slice/PythonUtil.cpp
@@ -196,6 +196,44 @@ private:
}
}
+string
+u32CodePoint(unsigned int value)
+{
+ ostringstream s;
+ s << "\\U";
+ s << hex;
+ s.width(8);
+ s.fill('0');
+ s << value;
+ return s.str();
+}
+
+void
+writeU8Buffer(const vector<unsigned char>& u8buffer, ostringstream& out)
+{
+ vector<unsigned int> u32buffer;
+ IceUtilInternal::ConversionResult result = convertUTF8ToUTF32(u8buffer, u32buffer, IceUtil::lenientConversion);
+ switch(result)
+ {
+ case conversionOK:
+ break;
+ case sourceExhausted:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source exhausted");
+ case sourceIllegal:
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__, "string source illegal");
+ default:
+ {
+ assert(0);
+ throw IceUtil::IllegalConversionException(__FILE__, __LINE__);
+ }
+ }
+
+ for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c)
+ {
+ out << u32CodePoint(*c);
+ }
+}
+
static string
lookupKwd(const string& name)
{
@@ -1837,6 +1875,9 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
}
case Slice::Builtin::KindString:
{
+ ostringstream sv2;
+ ostringstream sv3;
+
//
// Expand strings into the basic source character set. We can't use isalpha() and the like
// here because they are sensitive to the current locale.
@@ -1847,8 +1888,6 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
"_{}[]#()<>%:;.?*+-/^&|~!=, '";
static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
- _out << "\""; // Opening "
-
for(size_t i = 0; i < value.size();)
{
char c = value[i];
@@ -1856,7 +1895,7 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
{
case '"':
{
- _out << "\\\"";
+ sv2 << "\\\"";
break;
}
case '\\':
@@ -1884,7 +1923,7 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
//
// Convert codepoint to UTF8 bytes and write the escaped bytes
//
- _out << s.substr(0, s.size() - 1);
+ sv2 << s.substr(0, s.size() - 1);
size_t sz = value[j] == 'U' ? 8 : 4;
string codepoint = value.substr(j + 1, sz);
@@ -1921,40 +1960,40 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
s << oct;
s << static_cast<unsigned int>(*q);
}
- _out << s.str();
+ sv2 << s.str();
i = j + 1 + sz;
}
else
{
- _out << s;
+ sv2 << s;
i = j;
}
continue;
}
case '\r':
{
- _out << "\\r";
+ sv2 << "\\r";
break;
}
case '\n':
{
- _out << "\\n";
+ sv2 << "\\n";
break;
}
case '\t':
{
- _out << "\\t";
+ sv2 << "\\t";
break;
}
case '\b':
{
- _out << "\\b";
+ sv2 << "\\b";
break;
}
case '\f':
{
- _out << "\\f";
+ sv2 << "\\f";
break;
}
default:
@@ -1968,19 +2007,140 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
s.width(3);
s.fill('0');
s << static_cast<unsigned>(uc);
- _out << s.str();
+ sv2 << s.str();
}
else
{
- _out << c; // Print normally if in basic source character set.
+ sv2 << c; // Print normally if in basic source character set.
}
break;
}
}
++i;
}
+
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size();)
+ {
+ if(charSet.find(value[i]) == charSet.end())
+ {
+ char c = value[i];
+ if(static_cast<unsigned char>(c) < 128) // Single byte character
+ {
+ //
+ // Print as unicode if not in basic source character set
+ //
+ switch(c)
+ {
+ //
+ // Don't encode this special characters as universal characters
+ //
+ case '\r':
+ {
+ sv3 << "\\r";
+ break;
+ }
+ case '\n':
+ {
+ sv3 << "\\n";
+ break;
+ }
+ case '\\':
+ {
+ sv3 << "\\";
+ break;
+ }
+ default:
+ {
+ sv3 << u32CodePoint(c);
+ break;
+ }
+ }
+ }
+ else
+ {
+ u8buffer.push_back(value[i]);
+ }
+ }
+ else
+ {
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, sv3);
+ u8buffer.clear();
+ }
+ switch(value[i])
+ {
+ case '\\':
+ {
+ string s = "\\";
+ size_t j = i + 1;
+ for(; j < value.size(); ++j)
+ {
+ if(value[j] != '\\')
+ {
+ break;
+ }
+ s += "\\";
+ }
+
+ //
+ // An even number of slash \ will escape the backslash and
+ // the codepoint will be interpreted as its charaters
+ //
+ // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
+ // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
+ //
+ if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
+ {
+ size_t sz = value[j] == 'U' ? 8 : 4;
+ sv3 << s.substr(0, s.size() - 1);
+ i = j + 1;
+
+ string codepoint = value.substr(j + 1, sz);
+ assert(codepoint.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
+ sv3 << u32CodePoint(static_cast<unsigned int>(v));
+ i = j + 1 + sz;
+ }
+ else
+ {
+ sv3 << s;
+ i = j;
+ }
+ continue;
+ }
+ case '"':
+ {
+ sv3 << "\\";
+ break;
+ }
+ }
+ sv3 << value[i]; // Print normally if in basic source character set
+ }
+ i++;
+ }
- _out << "\""; // Closing "
+ //
+ // Write any pedding characters in the utf8 buffer
+ //
+ if(!u8buffer.empty())
+ {
+ writeU8Buffer(u8buffer, sv3);
+ u8buffer.clear();
+ }
+
+
+ _out << "\"" << sv2.str() << "\"";
+ if(sv2.str() == sv3.str())
+ {
+ _out << " if _version_info_[0] < 3 else \"" << sv3.str() << "\"";
+ }
break;
}
case Slice::Builtin::KindObject:
@@ -2781,6 +2941,7 @@ Slice::Python::generate(const UnitPtr& un, bool all, bool checksum, const vector
Slice::Python::MetaDataVisitor visitor;
un->visit(&visitor, false);
+ out << nl << "from sys import version_info as _version_info_";
out << nl << "import Ice, IcePy";
if(!all)
diff --git a/python/test/Ice/operations/Twoways.py b/python/test/Ice/operations/Twoways.py
index 04dcee69f1a..8944ed4d94a 100644
--- a/python/test/Ice/operations/Twoways.py
+++ b/python/test/Ice/operations/Twoways.py
@@ -8,6 +8,7 @@
# **********************************************************************
import Ice, math, Test, array, sys
+from sys import version_info
def test(b):
if not b:
@@ -17,82 +18,82 @@ def twoways(communicator, p):
literals = p.opStringLiterals();
- test(Test.s0 == "\\" and
- Test.s0 == Test.sw0 and
- Test.s0 == literals[0] and
- Test.s0 == literals[11]);
-
- test(Test.s1 == "A" and
- Test.s1 == Test.sw1 and
- Test.s1 == literals[1] and
- Test.s1 == literals[12]);
-
- test(Test.s2 == "Ice" and
- Test.s2 == Test.sw2 and
- Test.s2 == literals[2] and
- Test.s2 == literals[13]);
+ test(Test.s0 == "\\")
+ test(Test.s0 == Test.sw0)
+ test(Test.s0 == literals[0])
+ test(Test.s0 == literals[11])
+
+ test(Test.s1 == "A")
+ test(Test.s1 == Test.sw1)
+ test(Test.s1 == literals[1])
+ test(Test.s1 == literals[12])
+
+ test(Test.s2 == "Ice")
+ test(Test.s2 == Test.sw2)
+ test(Test.s2 == literals[2])
+ test(Test.s2 == literals[13])
- test(Test.s3 == "A21" and
- Test.s3 == Test.sw3 and
- Test.s3 == literals[3] and
- Test.s3 == literals[14]);
-
- test(Test.s4 == "\\u0041 \\U00000041" and
- Test.s4 == Test.sw4 and
- Test.s4 == literals[4] and
- Test.s4 == literals[15]);
-
- test(Test.s5 == "\xc3\xbf" and
- Test.s5 == Test.sw5 and
- Test.s5 == literals[5] and
- Test.s5 == literals[16]);
-
- test(Test.s6 == "\xcf\xbf" and
- Test.s6 == Test.sw6 and
- Test.s6 == literals[6] and
- Test.s6 == literals[17]);
-
- test(Test.s7 == "\xd7\xb0" and
- Test.s7 == Test.sw7 and
- Test.s7 == literals[7] and
- Test.s7 == literals[18]);
-
- test(Test.s8 == "\xf0\x90\x80\x80" and
- Test.s8 == Test.sw8 and
- Test.s8 == literals[8] and
- Test.s8 == literals[19]);
+ test(Test.s3 == "A21")
+ test(Test.s3 == Test.sw3)
+ test(Test.s3 == literals[3])
+ test(Test.s3 == literals[14])
+
+ test(Test.s4 == "\\u0041 \\U00000041")
+ test(Test.s4 == Test.sw4)
+ test(Test.s4 == literals[4])
+ test(Test.s4 == literals[15])
+
+ test(Test.s5 == "\xc3\xbf" if version_info[0] < 3 else b"\xc3\xbf".decode("utf-8"))
+ test(Test.s5 == Test.sw5)
+ test(Test.s5 == literals[5])
+ test(Test.s5 == literals[16])
+
+ test(Test.s6 == "\xcf\xbf" if version_info[0] < 3 else b"\xcf\xbf".decode("utf-8"))
+ test(Test.s6 == Test.sw6)
+ test(Test.s6 == literals[6])
+ test(Test.s6 == literals[17])
+
+ test(Test.s7 == "\xd7\xb0" if version_info[0] < 3 else b"\xd7\xb0".decode("utf-8"))
+ test(Test.s7 == Test.sw7)
+ test(Test.s7 == literals[7])
+ test(Test.s7 == literals[18])
+
+ test(Test.s8 == "\xf0\x90\x80\x80" if version_info[0] < 3 else b"\xf0\x90\x80\x80".decode("utf-8"))
+ test(Test.s8 == Test.sw8)
+ test(Test.s8 == literals[8])
+ test(Test.s8 == literals[19])
- test(Test.s9 == "\xf0\x9f\x8d\x8c" and
- Test.s9 == Test.sw9 and
- Test.s9 == literals[9] and
- Test.s9 == literals[20]);
-
- test(Test.s10 == "\xe0\xb6\xa7" and
- Test.s10 == Test.sw10 and
- Test.s10 == literals[10] and
- Test.s10 == literals[21]);
-
- test(Test.ss0 == "\'\"\x3f\\\a\b\f\n\r\t\v" and
- Test.ss0 == Test.ss1 and
- Test.ss0 == Test.ss2 and
- Test.ss0 == literals[22] and
- Test.ss0 == literals[23] and
- Test.ss0 == literals[24]);
+ test(Test.s9 == "\xf0\x9f\x8d\x8c" if version_info[0] < 3 else b"\xf0\x9f\x8d\x8c".decode("utf-8"))
+ test(Test.s9 == Test.sw9)
+ test(Test.s9 == literals[9])
+ test(Test.s9 == literals[20])
+
+ test(Test.s10 == "\xe0\xb6\xa7" if version_info[0] < 3 else b"\xe0\xb6\xa7".decode("utf-8"))
+ test(Test.s10 == Test.sw10)
+ test(Test.s10 == literals[10])
+ test(Test.s10 == literals[21])
+
+ test(Test.ss0 == "\'\"\x3f\\\a\b\f\n\r\t\v")
+ test(Test.ss0 == Test.ss1)
+ test(Test.ss0 == Test.ss2)
+ test(Test.ss0 == literals[22])
+ test(Test.ss0 == literals[23])
+ test(Test.ss0 == literals[24])
- test(Test.ss3 == "\\\\U\\u\\" and
- Test.ss3 == literals[25]);
-
- test(Test.ss4 == "\\A\\" and
- Test.ss4 == literals[26]);
-
- test(Test.ss5 == "\\u0041\\" and
- Test.ss5 == literals[27]);
-
- test(Test.su0 == Test.su1 and
- Test.su0 == Test.su2 and
- Test.su0 == literals[28] and
- Test.su0 == literals[29] and
- Test.su0 == literals[30]);
+ test(Test.ss3 == "\\\\U\\u\\")
+ test(Test.ss3 == literals[25])
+
+ test(Test.ss4 == "\\A\\")
+ test(Test.ss4 == literals[26])
+
+ test(Test.ss5 == "\\u0041\\")
+ test(Test.ss5 == literals[27])
+
+ test(Test.su0 == Test.su1)
+ test(Test.su0 == Test.su2)
+ test(Test.su0 == literals[28])
+ test(Test.su0 == literals[29])
+ test(Test.su0 == literals[30])
#
# ice_ping