summaryrefslogtreecommitdiff
path: root/cpp/src/Slice
diff options
context:
space:
mode:
authorMark Spruiell <mes@zeroc.com>2017-09-04 14:40:47 -0700
committerMark Spruiell <mes@zeroc.com>2017-09-04 14:40:47 -0700
commitcdfd2cbb48cccc460541d21f604834975fe05720 (patch)
tree6e1277a7ecbe091098ae4e2c8e3c87c44799f7ad /cpp/src/Slice
parentFix PHP build warnings (diff)
downloadice-cdfd2cbb48cccc460541d21f604834975fe05720.tar.bz2
ice-cdfd2cbb48cccc460541d21f604834975fe05720.tar.xz
ice-cdfd2cbb48cccc460541d21f604834975fe05720.zip
Initial commit of MATLAB prototype
Diffstat (limited to 'cpp/src/Slice')
-rw-r--r--cpp/src/Slice/StringLiteralUtil.cpp114
-rw-r--r--cpp/src/Slice/Util.h7
2 files changed, 102 insertions, 19 deletions
diff --git a/cpp/src/Slice/StringLiteralUtil.cpp b/cpp/src/Slice/StringLiteralUtil.cpp
index ce3351f7de1..80c322b4d7f 100644
--- a/cpp/src/Slice/StringLiteralUtil.cpp
+++ b/cpp/src/Slice/StringLiteralUtil.cpp
@@ -29,9 +29,12 @@ public:
StringLiteralGenerator(const string&, const string&, EscapeMode, unsigned char);
- string escapeASCIIChar(char) const;
- string escapeCodePoint(unsigned int) const;
- string flushU8Buffer(vector<unsigned char>&) const;
+ string escapeASCIIChar(char);
+ string escapeCodePoint(unsigned int);
+ string flushU8Buffer(vector<unsigned char>&);
+
+ enum Format { NoFormat, OctalFormat, HexFormat };
+ void format(Format);
private:
@@ -39,6 +42,11 @@ private:
const string _printableEscaped;
const EscapeMode _escapeMode;
const unsigned char _cutOff;
+ const string _shortUCNPrefix;
+ const string _octalChars;
+ const string _hexChars;
+
+ Format _format; // The last format used for an escape.
};
StringLiteralGenerator::StringLiteralGenerator(const string& nonPrintableEscaped,
@@ -46,19 +54,33 @@ StringLiteralGenerator::StringLiteralGenerator(const string& nonPrintableEscaped
EscapeMode escapeMode,
unsigned char cutOff) :
_nonPrintableEscaped(nonPrintableEscaped),
- _printableEscaped(printableEscaped + "\\\""),
+ _printableEscaped(printableEscaped + "\\"),
_escapeMode(escapeMode),
- _cutOff(cutOff)
+ _cutOff(cutOff),
+ _shortUCNPrefix(escapeMode == Matlab ? "\\x" : "\\u"),
+ _octalChars("01234567"),
+ _hexChars("01234567890ABCDEFabcdef"),
+ _format(NoFormat)
{
+ //
+ // Double quotes don't need to be escaped in Matlab because the string delimiter is a single quote.
+ //
+ if(_escapeMode != Matlab)
+ {
+ const_cast<string&>(_printableEscaped) += '"';
+ }
}
string
-StringLiteralGenerator::escapeASCIIChar(char c) const
+StringLiteralGenerator::escapeASCIIChar(char c)
{
assert(static_cast<unsigned char>(c) < 128);
string result;
+ Format lastFormat = _format;
+ _format = NoFormat;
+
if(_nonPrintableEscaped.find(c) != string::npos)
{
switch(c)
@@ -125,10 +147,51 @@ StringLiteralGenerator::escapeASCIIChar(char c) const
result = '\\';
result += c;
}
+ else if(_escapeMode == Matlab && c == '\'')
+ {
+ //
+ // Matlab strings are converted by sprintf(), and sprintf() requires a single quote to be escaped
+ // with another single quote.
+ //
+ result = "''";
+ }
+ else if(_escapeMode == Matlab && c == '%')
+ {
+ //
+ // Matlab strings are converted by sprintf(), and sprintf() requires a percent to be escaped
+ // with another percent.
+ //
+ result = "%%";
+ }
else if(c >= 32 && c <= 126)
{
- // Other printable ASCII
- result = c;
+ //
+ // Other printable ASCII.
+ //
+ if(_escapeMode == Matlab)
+ {
+ //
+ // While interpreting an octal or hex escape, the Matlab parser will continue to consume adjacent
+ // legal characters. If the trailing character after an escaped value could be consumed, we escape it
+ // as well to terminate the original escape.
+ //
+ if((lastFormat == OctalFormat && _octalChars.find(c) != string::npos) ||
+ (lastFormat == HexFormat && _hexChars.find(c) != string::npos))
+ {
+ ostringstream os;
+ os << "\\" << oct << setfill('0') << setw(3) << static_cast<unsigned int>(c & 0xFF);
+ result = os.str();
+ _format = OctalFormat;
+ }
+ else
+ {
+ result = c;
+ }
+ }
+ else
+ {
+ result = c;
+ }
}
else
{
@@ -137,10 +200,12 @@ StringLiteralGenerator::escapeASCIIChar(char c) const
if((static_cast<unsigned char>(c) < _cutOff) || (_escapeMode == Octal))
{
os << "\\" << oct << setfill('0') << setw(3) << static_cast<unsigned int>(c & 0xFF);
+ _format = OctalFormat;
}
else
{
- os << "\\u" << hex << setfill('0') << setw(4) << static_cast<unsigned int>(c & 0xFF);
+ os << _shortUCNPrefix << hex << setfill('0') << setw(4) << static_cast<unsigned int>(c & 0xFF);
+ _format = HexFormat;
}
result = os.str();
}
@@ -148,7 +213,7 @@ StringLiteralGenerator::escapeASCIIChar(char c) const
}
string
-StringLiteralGenerator::escapeCodePoint(unsigned int codePoint) const
+StringLiteralGenerator::escapeCodePoint(unsigned int codePoint)
{
if(codePoint < 128)
{
@@ -165,6 +230,7 @@ StringLiteralGenerator::escapeCodePoint(unsigned int codePoint) const
{
os << "\\" << setfill('0') << setw(3) << oct << static_cast<unsigned int>(*q);
}
+ _format = OctalFormat;
return os.str();
}
else
@@ -176,20 +242,23 @@ StringLiteralGenerator::escapeCodePoint(unsigned int codePoint) const
// Output octal escape
//
os << "\\" << setfill('0') << setw(3) << oct << codePoint;
+ _format = OctalFormat;
}
else if(codePoint <= 0xFFFF)
{
- os << "\\u" << setfill('0') << setw(4) << hex << codePoint;
+ os << _shortUCNPrefix << setfill('0') << setw(4) << hex << codePoint;
+ _format = HexFormat;
}
- else if(_escapeMode == ShortUCN)
+ else if(_escapeMode == ShortUCN || _escapeMode == Matlab)
{
//
// Convert to surrogate pair
//
unsigned int highSurrogate = ((codePoint - 0x10000) / 0x400) + 0xD800;
unsigned int lowSurrogate = ((codePoint - 0x10000) % 0x400) + 0xDC00;
- os << "\\u" << setfill('0') << setw(4) << hex << highSurrogate;
- os << "\\u" << setfill('0') << setw(4) << hex << lowSurrogate;
+ os << _shortUCNPrefix << setfill('0') << setw(4) << hex << highSurrogate;
+ os << _shortUCNPrefix << setfill('0') << setw(4) << hex << lowSurrogate;
+ _format = HexFormat;
}
else if(_escapeMode == EC6UCN)
{
@@ -204,7 +273,7 @@ StringLiteralGenerator::escapeCodePoint(unsigned int codePoint) const
}
string
-StringLiteralGenerator::flushU8Buffer(vector<unsigned char>& u8buffer) const
+StringLiteralGenerator::flushU8Buffer(vector<unsigned char>& u8buffer)
{
if(u8buffer.empty())
{
@@ -224,6 +293,12 @@ StringLiteralGenerator::flushU8Buffer(vector<unsigned char>& u8buffer) const
}
}
+void
+StringLiteralGenerator::format(Format f)
+{
+ _format = f;
+}
+
}
string
@@ -271,6 +346,7 @@ Slice::toStringLiteral(const string& value,
if(c == '\\')
{
os << "\\\\";
+ generator.format(StringLiteralGenerator::NoFormat);
}
else if(c == 'u' || c == 'U')
{
@@ -284,11 +360,16 @@ Slice::toStringLiteral(const string& value,
// ASCII character that may need to escaped in languages such as Java
os << generator.escapeASCIIChar(static_cast<char>(v));
}
- else if(escapeMode == UCN || c == 'u')
+ else if(escapeMode == UCN)
{
// keep this escape as is
os << "\\" << c << codePointStr;
}
+ else if(c == 'u')
+ {
+ os << (escapeMode == Matlab ? "\\x" : "\\u") << codePointStr;
+ generator.format(StringLiteralGenerator::HexFormat);
+ }
else
{
os << generator.escapeCodePoint(static_cast<unsigned int>(v));
@@ -298,6 +379,7 @@ Slice::toStringLiteral(const string& value,
else
{
// unescaped backslash: escape it!
+ generator.format(StringLiteralGenerator::NoFormat);
os << "\\\\";
os << generator.escapeASCIIChar(c);
}
diff --git a/cpp/src/Slice/Util.h b/cpp/src/Slice/Util.h
index a31536a6ed8..33c52fe5035 100644
--- a/cpp/src/Slice/Util.h
+++ b/cpp/src/Slice/Util.h
@@ -31,16 +31,17 @@ std::vector<std::string> argvToArgs(int argc, wchar_t* argv[]);
std::vector<std::string> argvToArgs(int argc, char* argv[]);
#endif
-enum EscapeMode { UCN, Octal, ShortUCN, EC6UCN };
+enum EscapeMode { UCN, Octal, ShortUCN, Matlab, EC6UCN };
// Parameters:
// const string& value: input string provided by Slice Parser
// const string& nonPrintableEscaped: which of \a, \b, \f, \n, \r, \t, \v, \0 (null), \x20 (\s), \x1b (\e) are
// escaped in the target language
// Warning: don't include \0 if the target language recognizes octal escapes
-// const string& printableEscaped: additional printable ASCII characters other than \ and " that need to be escaped
+// const string& printableEscaped: additional printable ASCII characters other than \ and the string delimiter
+// that need to be escaped
// EscapeMode escapeMode: whether we generate both UCNs, octal escape sequences, only short UCNs (\unnnn),
-// or ECMAScript 6-style UCNs with \u{...} for astral characters
+// Matlab syntax, or ECMAScript 6-style UCNs with \u{...} for astral characters.
// unsigned char cutOff: characters < cutOff other than the nonPrintableEscaped are generated as
// octal escape sequences, regardless of escapeMode.
std::string