diff options
Diffstat (limited to 'cpp/src/Slice/PythonUtil.cpp')
-rw-r--r-- | cpp/src/Slice/PythonUtil.cpp | 278 |
1 files changed, 5 insertions, 273 deletions
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp index 88f3aff1772..b07e605a1ed 100644 --- a/cpp/src/Slice/PythonUtil.cpp +++ b/cpp/src/Slice/PythonUtil.cpp @@ -12,8 +12,6 @@ #include <Slice/Util.h> #include <IceUtil/IceUtil.h> #include <IceUtil/StringUtil.h> -#include <IceUtil/InputUtil.h> -#include <IceUtil/StringConverter.h> #include <climits> #include <iterator> @@ -196,29 +194,6 @@ private: } } -string -u32CodePoint(unsigned int value) -{ - ostringstream s; - s << "\\U"; - s << hex; - s.width(8); - s.fill('0'); - s << value; - return s.str(); -} - -void -writeU8Buffer(const vector<unsigned char>& u8buffer, ostringstream& out) -{ - vector<unsigned int> u32buffer = toUTF32(u8buffer); - - for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c) - { - out << u32CodePoint(*c); - } -} - static string lookupKwd(const string& name) { @@ -1862,256 +1837,13 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax } case Slice::Builtin::KindString: { - ostringstream sv2; - ostringstream sv3; - - // - // Expand strings into the basic source character set. We can't use isalpha() and the like - // here because they are sensitive to the current locale. - // - static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "0123456789" - "_{}[]#()<>%:;.?*+-/^&|~!=, '"; - static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end()); - - for(size_t i = 0; i < value.size();) - { - char c = value[i]; - switch(c) - { - case '"': - { - sv2 << "\\\""; - break; - } - case '\\': - { - string s = "\\"; - size_t j = i + 1; - for(; j < value.size(); ++j) - { - if(value[j] != '\\') - { - break; - } - s += "\\"; - } - - // - // An even number of slash \ will escape the backslash and - // the codepoint will be interpreted as its charaters - // - // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1'] - // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A') - // - if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) - { - // - // Convert codepoint to UTF8 bytes and write the escaped bytes - // - sv2 << s.substr(0, s.size() - 1); - - size_t sz = value[j] == 'U' ? 8 : 4; - string codepoint = value.substr(j + 1, sz); - assert(codepoint.size() == sz); - - IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); - - vector<unsigned int> u32buffer; - u32buffer.push_back(static_cast<unsigned int>(v)); - - vector<unsigned char> u8buffer = fromUTF32(u32buffer); - - ostringstream s; - for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q) - { - s << "\\"; - s.fill('0'); - s.width(3); - s << oct; - s << static_cast<unsigned int>(*q); - } - sv2 << s.str(); - - i = j + 1 + sz; - } - else - { - sv2 << s; - i = j; - } - continue; - } - case '\r': - { - sv2 << "\\r"; - break; - } - case '\n': - { - sv2 << "\\n"; - break; - } - case '\t': - { - sv2 << "\\t"; - break; - } - case '\b': - { - sv2 << "\\b"; - break; - } - case '\f': - { - sv2 << "\\f"; - break; - } - default: - { - if(charSet.find(c) == charSet.end()) - { - unsigned char uc = c; // Char may be signed, so make it positive. - stringstream s; - s << "\\"; // Print as octal if not in basic source character set. - s.flags(ios_base::oct); - s.width(3); - s.fill('0'); - s << static_cast<unsigned>(uc); - sv2 << s.str(); - } - else - { - sv2 << c; // Print normally if in basic source character set. - } - break; - } - } - ++i; - } + string sv2 = toStringLiteral(value, "\a\b\f\n\r\t\v", "", Octal, 0); + string sv3 = toStringLiteral(value, "\a\b\f\n\r\t\v", "", UCN, 0); - vector<unsigned char> u8buffer; // Buffer to convert multibyte characters - - for(size_t i = 0; i < value.size();) - { - if(charSet.find(value[i]) == charSet.end()) - { - char c = value[i]; - if(static_cast<unsigned char>(c) < 128) // Single byte character - { - // - // Print as unicode if not in basic source character set - // - switch(c) - { - // - // Don't encode this special characters as universal characters - // - case '\r': - { - sv3 << "\\r"; - break; - } - case '\n': - { - sv3 << "\\n"; - break; - } - case '\\': - { - sv3 << "\\"; - break; - } - default: - { - sv3 << u32CodePoint(c); - break; - } - } - } - else - { - u8buffer.push_back(value[i]); - } - } - else - { - // - // Write any pedding characters in the utf8 buffer - // - if(!u8buffer.empty()) - { - writeU8Buffer(u8buffer, sv3); - u8buffer.clear(); - } - switch(value[i]) - { - case '\\': - { - string s = "\\"; - size_t j = i + 1; - for(; j < value.size(); ++j) - { - if(value[j] != '\\') - { - break; - } - s += "\\"; - } - - // - // An even number of slash \ will escape the backslash and - // the codepoint will be interpreted as its charaters - // - // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] - // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') - // - if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u')) - { - size_t sz = value[j] == 'U' ? 8 : 4; - sv3 << s.substr(0, s.size() - 1); - i = j + 1; - - string codepoint = value.substr(j + 1, sz); - assert(codepoint.size() == sz); - - IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); - sv3 << u32CodePoint(static_cast<unsigned int>(v)); - i = j + 1 + sz; - } - else - { - sv3 << s; - i = j; - } - continue; - } - case '"': - { - sv3 << "\\"; - break; - } - } - sv3 << value[i]; // Print normally if in basic source character set - } - i++; - } - - // - // Write any pedding characters in the utf8 buffer - // - if(!u8buffer.empty()) - { - writeU8Buffer(u8buffer, sv3); - u8buffer.clear(); - } - - - _out << "\"" << sv2.str() << "\""; - if(sv2.str() != sv3.str()) + _out << "\"" << sv2<< "\""; + if(sv2 != sv3) { - _out << " if _version_info_[0] < 3 else \"" << sv3.str() << "\""; + _out << " if _version_info_[0] < 3 else \"" << sv3 << "\""; } break; } |