summaryrefslogtreecommitdiff
path: root/cpp/src
diff options
context:
space:
mode:
authorBernard Normier <bernard@zeroc.com>2016-10-25 18:35:15 -0400
committerBernard Normier <bernard@zeroc.com>2016-10-25 18:35:15 -0400
commit95624751e9d98d1265e0a007e7d7a1186036750b (patch)
treed13da90f17629496d13d5badb32b04803239d662 /cpp/src
parentNuget package updates (diff)
downloadice-95624751e9d98d1265e0a007e7d7a1186036750b.tar.bz2
ice-95624751e9d98d1265e0a007e7d7a1186036750b.tar.xz
ice-95624751e9d98d1265e0a007e7d7a1186036750b.zip
Refactored string literal generation and updated identityToString format
Diffstat (limited to 'cpp/src')
-rw-r--r--cpp/src/Ice/Initialize.cpp6
-rw-r--r--cpp/src/Ice/ReferenceFactory.cpp4
-rw-r--r--cpp/src/IcePatch2Lib/Util.cpp2
-rw-r--r--cpp/src/IceUtil/StringUtil.cpp152
-rw-r--r--cpp/src/Slice/PythonUtil.cpp278
-rw-r--r--cpp/src/Slice/RubyUtil.cpp131
-rw-r--r--cpp/src/Slice/Scanner.cpp1867
-rw-r--r--cpp/src/Slice/Scanner.l171
-rw-r--r--cpp/src/Slice/StringLiteralUtil.cpp374
-rw-r--r--cpp/src/Slice/Util.h16
-rw-r--r--cpp/src/Slice/msbuild/slice.vcxproj36
-rw-r--r--cpp/src/slice2cpp/Gen.cpp255
-rw-r--r--cpp/src/slice2cs/Gen.cpp147
-rw-r--r--cpp/src/slice2java/Gen.cpp187
-rw-r--r--cpp/src/slice2java/GenCompat.cpp187
-rw-r--r--cpp/src/slice2js/Gen.cpp159
-rw-r--r--cpp/src/slice2objc/Gen.cpp103
-rw-r--r--cpp/src/slice2php/Main.cpp134
18 files changed, 1479 insertions, 2730 deletions
diff --git a/cpp/src/Ice/Initialize.cpp b/cpp/src/Ice/Initialize.cpp
index f1f92c3826d..50b2e10def4 100644
--- a/cpp/src/Ice/Initialize.cpp
+++ b/cpp/src/Ice/Initialize.cpp
@@ -398,7 +398,7 @@ Ice::stringToIdentity(const string& s)
{
try
{
- ident.name = unescapeString(s, 0, s.size());
+ ident.name = unescapeString(s, 0, s.size(), "/");
}
catch(const IceUtil::IllegalArgumentException& e)
{
@@ -411,7 +411,7 @@ Ice::stringToIdentity(const string& s)
{
try
{
- ident.category = unescapeString(s, 0, slash);
+ ident.category = unescapeString(s, 0, slash, "/");
}
catch(const IceUtil::IllegalArgumentException& e)
{
@@ -424,7 +424,7 @@ Ice::stringToIdentity(const string& s)
{
try
{
- ident.name = unescapeString(s, slash + 1, s.size());
+ ident.name = unescapeString(s, slash + 1, s.size(), "/");
}
catch(const IceUtil::IllegalArgumentException& e)
{
diff --git a/cpp/src/Ice/ReferenceFactory.cpp b/cpp/src/Ice/ReferenceFactory.cpp
index 63b8df7effe..099cb147b4f 100644
--- a/cpp/src/Ice/ReferenceFactory.cpp
+++ b/cpp/src/Ice/ReferenceFactory.cpp
@@ -280,7 +280,7 @@ IceInternal::ReferenceFactory::create(const string& str, const string& propertyP
try
{
- facet = unescapeString(argument, 0, argument.size());
+ facet = unescapeString(argument, 0, argument.size(), "");
}
catch(const IceUtil::IllegalArgumentException& e)
{
@@ -554,7 +554,7 @@ IceInternal::ReferenceFactory::create(const string& str, const string& propertyP
try
{
- adapter = unescapeString(adapterstr, 0, adapterstr.size());
+ adapter = unescapeString(adapterstr, 0, adapterstr.size(), "");
}
catch(const IceUtil::IllegalArgumentException& e)
{
diff --git a/cpp/src/IcePatch2Lib/Util.cpp b/cpp/src/IcePatch2Lib/Util.cpp
index 63fc5087eeb..451acfc81fb 100644
--- a/cpp/src/IcePatch2Lib/Util.cpp
+++ b/cpp/src/IcePatch2Lib/Util.cpp
@@ -114,7 +114,7 @@ IcePatch2Internal::readFileInfo(FILE* fp, LargeFileInfo& info)
getline(is, s, '\t');
try
{
- info.path = IceUtilInternal::unescapeString(s, 0, s.size());
+ info.path = IceUtilInternal::unescapeString(s, 0, s.size(), "");
}
catch(const IceUtil::IllegalArgumentException& ex)
{
diff --git a/cpp/src/IceUtil/StringUtil.cpp b/cpp/src/IceUtil/StringUtil.cpp
index 0e7d9162d6c..84c24da3e03 100644
--- a/cpp/src/IceUtil/StringUtil.cpp
+++ b/cpp/src/IceUtil/StringUtil.cpp
@@ -11,30 +11,14 @@
#include <IceUtil/StringConverter.h>
#include <cstring>
+#include <iomanip>
+
using namespace std;
using namespace IceUtil;
namespace
{
-string
-toOctalString(unsigned int n)
-{
- string s;
- s.resize(32);
- string::size_type charPos = 32;
- const int radix = 1 << 3;
- int mask = radix - 1;
- do
- {
- s[--charPos] = '0' + (n & mask);
- n >>= 3;
- }
- while(n != 0);
-
- return string(s, charPos, (32 - charPos));
-}
-
char
toHexDigit(Byte b)
{
@@ -158,49 +142,67 @@ IceUtilInternal::escapeString(const string& s, const string& special, ToStringMo
result.append("\\\\");
break;
}
-
case '\'':
{
result.append("\\'");
break;
}
-
case '"':
{
result.append("\\\"");
break;
}
-
+ case '\a':
+ {
+ if(toStringMode == ICE_ENUM(ToStringMode, Compat))
+ {
+ // Octal escape for compatibility with 3.6 and earlier
+ result.append("\\007");
+ }
+ else
+ {
+ result.append("\\a");
+ }
+ break;
+ }
case '\b':
{
result.append("\\b");
break;
}
-
case '\f':
{
result.append("\\f");
break;
}
-
case '\n':
{
result.append("\\n");
break;
}
-
case '\r':
{
result.append("\\r");
break;
}
-
case '\t':
{
result.append("\\t");
break;
}
-
+ case '\v':
+ {
+ if(toStringMode == ICE_ENUM(ToStringMode, Compat))
+ {
+ // Octal escape for compatibility with 3.6 and earlier
+ result.append("\\013");
+ }
+ else
+ {
+ result.append("\\v");
+ }
+ break;
+ }
default:
{
if(special.find(c) != string::npos)
@@ -218,9 +220,6 @@ IceUtilInternal::escapeString(const string& s, const string& special, ToStringMo
{
// append octal string
- result.push_back('\\');
- string octal = toOctalString(i);
- //
// Add leading zeroes so that we avoid problems during
// decoding. For example, consider the escaped string
// \0013 (i.e., a character with value 1 followed by the
@@ -228,11 +227,9 @@ IceUtilInternal::escapeString(const string& s, const string& special, ToStringMo
// result would be incorrectly interpreted as a single
// character with value 11.
//
- for(string::size_type j = octal.size(); j < 3; j++)
- {
- result.push_back('0');
- }
- result.append(octal);
+ ostringstream os;
+ os << '\\' << oct << setfill('0') << setw(3) << static_cast<unsigned int>(i);
+ result.append(os.str());
}
else if(i < 32 || i == 127)
{
@@ -305,10 +302,10 @@ checkChar(const string& s, string::size_type pos)
void
appendUTF8(unsigned int codePoint, bool inBMP, string& result)
{
- if(inBMP && codePoint >= 0xD800 && codePoint <= 0xDFFF)
+ if(codePoint >= 0xD800 && codePoint <= 0xDFFF)
{
throw IllegalArgumentException(__FILE__, __LINE__,
- "A non-BMP character cannot be encoded with \\unnnn, use \\Unnnnnnnn instead");
+ "A universal character name cannot designate a surrogate");
}
if(codePoint <= 0x7F)
@@ -351,7 +348,7 @@ appendUTF8(unsigned int codePoint, bool inBMP, string& result)
//
bool
decodeChar(const string& s, string::size_type start, string::size_type end, string::size_type& nextStart,
- string& result)
+ const string& special, string& result)
{
assert(start < end);
assert(end <= s.size());
@@ -362,13 +359,14 @@ decodeChar(const string& s, string::size_type start, string::size_type end, stri
{
result.push_back(checkChar(s, start++));
}
+ else if(start + 1 == end)
+ {
+ // Keep trailing backslash
+ ++start;
+ result.push_back('\\');
+ }
else
{
- if(start + 1 == end)
- {
- throw IllegalArgumentException(__FILE__, __LINE__, "trailing backslash");
- }
-
char c = s[++start];
switch(c)
@@ -376,11 +374,18 @@ decodeChar(const string& s, string::size_type start, string::size_type end, stri
case '\\':
case '\'':
case '"':
+ case '?':
{
++start;
result.push_back(c);
break;
}
+ case 'a':
+ {
+ ++start;
+ result.push_back('\a');
+ break;
+ }
case 'b':
{
++start;
@@ -411,6 +416,12 @@ decodeChar(const string& s, string::size_type start, string::size_type end, stri
result.push_back('\t');
break;
}
+ case 'v':
+ {
+ ++start;
+ result.push_back('\v');
+ break;
+ }
case 'u':
case 'U':
{
@@ -487,12 +498,57 @@ decodeChar(const string& s, string::size_type start, string::size_type end, stri
}
break;
}
+ case 'x':
+ {
+ int val = 0;
+ int size = 2;
+ ++start;
+ while(size > 0 && start < end)
+ {
+ c = s[start++];
+ int charVal = 0;
+ if(c >= '0' && c <= '9')
+ {
+ charVal = c - '0';
+ }
+ else if(c >= 'a' && c <= 'f')
+ {
+ charVal = 10 + (c - 'a');
+ }
+ else if(c >= 'A' && c <= 'F')
+ {
+ charVal = 10 + (c - 'A');
+ }
+ else
+ {
+ --start; // move back
+ break; // while
+ }
+ val = val * 16 + charVal;
+ --size;
+ }
+ if(size == 2)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__,
+ "Invalid \\x escape sequence: no hex digit");
+ }
+ result.push_back(static_cast<char>(val));
+ if(val > 127)
+ {
+ pureASCII = false;
+ }
+ break;
+ }
default:
{
if(static_cast<unsigned char>(c) > 127)
{
pureASCII = false;
}
+ if(special.empty() || special.find(c) == string::npos)
+ {
+ result.push_back('\\'); // not in special, so we keep the backslash
+ }
result.push_back(checkChar(s, start++));
break;
}
@@ -508,10 +564,18 @@ decodeChar(const string& s, string::size_type start, string::size_type end, stri
// Remove escape sequences added by escapeString.
//
string
-IceUtilInternal::unescapeString(const string& s, string::size_type start, string::size_type end)
+IceUtilInternal::unescapeString(const string& s, string::size_type start, string::size_type end, const string& special)
{
assert(start <= end && end <= s.size());
+ for(string::size_type i = 0; i < special.size(); ++i)
+ {
+ if(static_cast<unsigned char>(special[i]) < 32 || static_cast<unsigned char>(special[i]) > 126)
+ {
+ throw IllegalArgumentException(__FILE__, __LINE__, "Special characters must be in ASCII range 32-126");
+ }
+ }
+
// Optimization for strings without escapes
string::size_type p = s.find('\\', start);
if(p == string::npos || p >= end)
@@ -553,7 +617,7 @@ IceUtilInternal::unescapeString(const string& s, string::size_type start, string
result.reserve(end - start);
while(start < end)
{
- if(decodeChar(*inputStringPtr, start, end, start, result))
+ if(decodeChar(*inputStringPtr, start, end, start, special, result))
{
resultIsPureASCII = false;
}
diff --git a/cpp/src/Slice/PythonUtil.cpp b/cpp/src/Slice/PythonUtil.cpp
index 88f3aff1772..b07e605a1ed 100644
--- a/cpp/src/Slice/PythonUtil.cpp
+++ b/cpp/src/Slice/PythonUtil.cpp
@@ -12,8 +12,6 @@
#include <Slice/Util.h>
#include <IceUtil/IceUtil.h>
#include <IceUtil/StringUtil.h>
-#include <IceUtil/InputUtil.h>
-#include <IceUtil/StringConverter.h>
#include <climits>
#include <iterator>
@@ -196,29 +194,6 @@ private:
}
}
-string
-u32CodePoint(unsigned int value)
-{
- ostringstream s;
- s << "\\U";
- s << hex;
- s.width(8);
- s.fill('0');
- s << value;
- return s.str();
-}
-
-void
-writeU8Buffer(const vector<unsigned char>& u8buffer, ostringstream& out)
-{
- vector<unsigned int> u32buffer = toUTF32(u8buffer);
-
- for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c)
- {
- out << u32CodePoint(*c);
- }
-}
-
static string
lookupKwd(const string& name)
{
@@ -1862,256 +1837,13 @@ Slice::Python::CodeVisitor::writeConstantValue(const TypePtr& type, const Syntax
}
case Slice::Builtin::KindString:
{
- ostringstream sv2;
- ostringstream sv3;
-
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=, '";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
-
- for(size_t i = 0; i < value.size();)
- {
- char c = value[i];
- switch(c)
- {
- case '"':
- {
- sv2 << "\\\"";
- break;
- }
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- //
- // Convert codepoint to UTF8 bytes and write the escaped bytes
- //
- sv2 << s.substr(0, s.size() - 1);
-
- size_t sz = value[j] == 'U' ? 8 : 4;
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
- vector<unsigned int> u32buffer;
- u32buffer.push_back(static_cast<unsigned int>(v));
-
- vector<unsigned char> u8buffer = fromUTF32(u32buffer);
-
- ostringstream s;
- for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
- {
- s << "\\";
- s.fill('0');
- s.width(3);
- s << oct;
- s << static_cast<unsigned int>(*q);
- }
- sv2 << s.str();
-
- i = j + 1 + sz;
- }
- else
- {
- sv2 << s;
- i = j;
- }
- continue;
- }
- case '\r':
- {
- sv2 << "\\r";
- break;
- }
- case '\n':
- {
- sv2 << "\\n";
- break;
- }
- case '\t':
- {
- sv2 << "\\t";
- break;
- }
- case '\b':
- {
- sv2 << "\\b";
- break;
- }
- case '\f':
- {
- sv2 << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(c) == charSet.end())
- {
- unsigned char uc = c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- sv2 << s.str();
- }
- else
- {
- sv2 << c; // Print normally if in basic source character set.
- }
- break;
- }
- }
- ++i;
- }
+ string sv2 = toStringLiteral(value, "\a\b\f\n\r\t\v", "", Octal, 0);
+ string sv3 = toStringLiteral(value, "\a\b\f\n\r\t\v", "", UCN, 0);
- vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
-
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- char c = value[i];
- if(static_cast<unsigned char>(c) < 128) // Single byte character
- {
- //
- // Print as unicode if not in basic source character set
- //
- switch(c)
- {
- //
- // Don't encode this special characters as universal characters
- //
- case '\r':
- {
- sv3 << "\\r";
- break;
- }
- case '\n':
- {
- sv3 << "\\n";
- break;
- }
- case '\\':
- {
- sv3 << "\\";
- break;
- }
- default:
- {
- sv3 << u32CodePoint(c);
- break;
- }
- }
- }
- else
- {
- u8buffer.push_back(value[i]);
- }
- }
- else
- {
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, sv3);
- u8buffer.clear();
- }
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- size_t sz = value[j] == 'U' ? 8 : 4;
- sv3 << s.substr(0, s.size() - 1);
- i = j + 1;
-
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
- sv3 << u32CodePoint(static_cast<unsigned int>(v));
- i = j + 1 + sz;
- }
- else
- {
- sv3 << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- sv3 << "\\";
- break;
- }
- }
- sv3 << value[i]; // Print normally if in basic source character set
- }
- i++;
- }
-
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, sv3);
- u8buffer.clear();
- }
-
-
- _out << "\"" << sv2.str() << "\"";
- if(sv2.str() != sv3.str())
+ _out << "\"" << sv2<< "\"";
+ if(sv2 != sv3)
{
- _out << " if _version_info_[0] < 3 else \"" << sv3.str() << "\"";
+ _out << " if _version_info_[0] < 3 else \"" << sv3 << "\"";
}
break;
}
diff --git a/cpp/src/Slice/RubyUtil.cpp b/cpp/src/Slice/RubyUtil.cpp
index d561c8db755..b5d62fc0aee 100644
--- a/cpp/src/Slice/RubyUtil.cpp
+++ b/cpp/src/Slice/RubyUtil.cpp
@@ -12,7 +12,6 @@
#include <Slice/Util.h>
#include <IceUtil/Functional.h>
#include <IceUtil/InputUtil.h>
-#include <IceUtil/StringConverter.h>
#include <iterator>
using namespace std;
@@ -1451,134 +1450,8 @@ Slice::Ruby::CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTr
}
case Slice::Builtin::KindString:
{
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=, '";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
-
- _out << "\""; // Opening "
-
- for(size_t i = 0; i < value.size();)
- {
- char c = value[i];
- switch(c)
- {
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- //
- // Convert codepoint to UTF8 bytes and write the escaped bytes
- //
- _out << s.substr(0, s.size() - 1);
-
- size_t sz = value[j] == 'U' ? 8 : 4;
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
- vector<unsigned int> u32buffer;
- u32buffer.push_back(static_cast<unsigned int>(v));
-
- vector<unsigned char> u8buffer = fromUTF32(u32buffer);
-
- ostringstream s;
- for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
- {
- s << "\\";
- s.fill('0');
- s.width(3);
- s << oct;
- s << static_cast<unsigned int>(*q);
- }
- _out << s.str();
-
- i = j + 1 + sz;
- }
- else
- {
- _out << s;
- i = j;
- }
- continue;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\b':
- {
- _out << "\\b";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(c) == charSet.end())
- {
- unsigned char uc = c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
- }
- else
- {
- _out << c; // Print normally if in basic source character set.
- }
- break;
- }
- }
- ++i;
- }
-
- _out << "\""; // Closing "
+ // RubyUCN available in Ruby 1.9 or greater
+ _out << "\"" << toStringLiteral(value, "\a\b\f\n\r\t\v\x20\x1b", "", EC6UCN, 0) << "\"";
break;
}
diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp
index 62d1a7a6d85..ab4bc66e038 100644
--- a/cpp/src/Slice/Scanner.cpp
+++ b/cpp/src/Slice/Scanner.cpp
@@ -54,7 +54,7 @@
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
@@ -71,7 +71,7 @@ typedef uint32_t flex_uint32_t;
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
@@ -197,86 +197,86 @@ extern FILE *slice_in, *slice_out;
#define YY_LESS_LINENO(n)
#define YY_LINENO_REWIND_TO(ptr)
-
+
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up slice_text. */ \
+ do \
+ { \
+ /* Undo effects of setting up slice_text. */ \
int yyless_macro_arg = (n); \
YY_LESS_LINENO(yyless_macro_arg);\
- *yy_cp = (yy_hold_char); \
- YY_RESTORE_YY_MORE_OFFSET \
- (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
- YY_DO_BEFORE_ACTION; /* set up slice_text again */ \
- } \
- while ( 0 )
+ *yy_cp = (yy_hold_char); \
+ YY_RESTORE_YY_MORE_OFFSET \
+ (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up slice_text again */ \
+ } \
+ while ( 0 )
#define unput(c) yyunput( c, (yytext_ptr) )
#ifndef YY_STRUCT_YY_BUFFER_STATE
#define YY_STRUCT_YY_BUFFER_STATE
struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- yy_size_t yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ yy_size_t yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
- int yy_buffer_status;
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
#define YY_BUFFER_NEW 0
#define YY_BUFFER_NORMAL 1
- /* When an EOF's been seen but there's still some text to process
- * then we mark the buffer as YY_EOF_PENDING, to indicate that we
- * shouldn't try reading from the input source any more. We might
- * still have a bunch of tokens to match, though, because of
- * possible backing-up.
- *
- * When we actually see the EOF, we change the status to "new"
- * (via slice_restart()), so that the user can continue scanning by
- * just pointing slice_in at a new input file.
- */
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via slice_restart()), so that the user can continue scanning by
+ * just pointing slice_in at a new input file.
+ */
#define YY_BUFFER_EOF_PENDING 2
- };
+ };
#endif /* !YY_STRUCT_YY_BUFFER_STATE */
/* Stack of input buffers. */
@@ -339,24 +339,24 @@ void slice_free (void * );
#define yy_new_buffer slice__create_buffer
#define yy_set_interactive(is_interactive) \
- { \
- if ( ! YY_CURRENT_BUFFER ){ \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){ \
slice_ensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
+ YY_CURRENT_BUFFER_LVALUE = \
slice__create_buffer(slice_in,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
- }
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+ }
#define yy_set_bol(at_bol) \
- { \
- if ( ! YY_CURRENT_BUFFER ){\
+ { \
+ if ( ! YY_CURRENT_BUFFER ){\
slice_ensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
+ YY_CURRENT_BUFFER_LVALUE = \
slice__create_buffer(slice_in,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
- }
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+ }
#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
@@ -387,21 +387,21 @@ static void yy_fatal_error (yyconst char msg[] );
* corresponding action - sets up slice_text.
*/
#define YY_DO_BEFORE_ACTION \
- (yytext_ptr) = yy_bp; \
- slice_leng = (size_t) (yy_cp - yy_bp); \
- (yy_hold_char) = *yy_cp; \
- *yy_cp = '\0'; \
- (yy_c_buf_p) = yy_cp;
+ (yytext_ptr) = yy_bp; \
+ slice_leng = (size_t) (yy_cp - yy_bp); \
+ (yy_hold_char) = *yy_cp; \
+ *yy_cp = '\0'; \
+ (yy_c_buf_p) = yy_cp;
#define YY_NUM_RULES 20
#define YY_END_OF_BUFFER 21
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
- {
- flex_int32_t yy_verify;
- flex_int32_t yy_nxt;
- };
+ {
+ flex_int32_t yy_verify;
+ flex_int32_t yy_nxt;
+ };
static yyconst flex_int16_t yy_accept[73] =
{ 0,
0, 0, 0, 0, 0, 0, 21, 19, 17, 17,
@@ -694,7 +694,7 @@ extern int slice_wrap (void );
#endif
static void yyunput (int c,char *buf_ptr );
-
+
#ifndef yytext_ptr
static void yy_flex_strncpy (char *,yyconst char *,int );
#endif
@@ -736,33 +736,33 @@ static int input (void );
*/
#ifndef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
- if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
- { \
- int c = '*'; \
- size_t n; \
- for ( n = 0; n < max_size && \
- (c = getc( slice_in )) != EOF && c != '\n'; ++n ) \
- buf[n] = (char) c; \
- if ( c == '\n' ) \
- buf[n++] = (char) c; \
- if ( c == EOF && ferror( slice_in ) ) \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- result = n; \
- } \
- else \
- { \
- errno=0; \
- while ( (result = fread(buf, 1, max_size, slice_in))==0 && ferror(slice_in)) \
- { \
- if( errno != EINTR) \
- { \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- break; \
- } \
- errno=0; \
- clearerr(slice_in); \
- } \
- }\
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+ { \
+ int c = '*'; \
+ size_t n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( slice_in )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( slice_in ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else \
+ { \
+ errno=0; \
+ while ( (result = fread(buf, 1, max_size, slice_in))==0 && ferror(slice_in)) \
+ { \
+ if( errno != EINTR) \
+ { \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ break; \
+ } \
+ errno=0; \
+ clearerr(slice_in); \
+ } \
+ }\
\
#endif
@@ -811,102 +811,102 @@ extern int slice_lex (void);
#endif
#define YY_RULE_SETUP \
- if ( slice_leng > 0 ) \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = \
- (slice_text[slice_leng - 1] == '\n'); \
- YY_USER_ACTION
+ if ( slice_leng > 0 ) \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = \
+ (slice_text[slice_leng - 1] == '\n'); \
+ YY_USER_ACTION
/** The main scanner function which does all the work.
*/
YY_DECL
{
- register yy_state_type yy_current_state;
- register char *yy_cp, *yy_bp;
- register int yy_act;
-
- if ( !(yy_init) )
- {
- (yy_init) = 1;
+ register yy_state_type yy_current_state;
+ register char *yy_cp, *yy_bp;
+ register int yy_act;
+
+ if ( !(yy_init) )
+ {
+ (yy_init) = 1;
#ifdef YY_USER_INIT
- YY_USER_INIT;
+ YY_USER_INIT;
#endif
- if ( ! (yy_start) )
- (yy_start) = 1; /* first start state */
+ if ( ! (yy_start) )
+ (yy_start) = 1; /* first start state */
- if ( ! slice_in )
- slice_in = stdin;
+ if ( ! slice_in )
+ slice_in = stdin;
- if ( ! slice_out )
- slice_out = stdout;
+ if ( ! slice_out )
+ slice_out = stdout;
- if ( ! YY_CURRENT_BUFFER ) {
- slice_ensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- slice__create_buffer(slice_in,YY_BUF_SIZE );
- }
+ if ( ! YY_CURRENT_BUFFER ) {
+ slice_ensure_buffer_stack ();
+ YY_CURRENT_BUFFER_LVALUE =
+ slice__create_buffer(slice_in,YY_BUF_SIZE );
+ }
- slice__load_buffer_state( );
- }
+ slice__load_buffer_state( );
+ }
- {
+ {
#line 94 "Scanner.l"
#line 857 "lex.yy.c"
- while ( 1 ) /* loops until end-of-file is reached */
- {
- yy_cp = (yy_c_buf_p);
+ while ( 1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = (yy_c_buf_p);
- /* Support of slice_text. */
- *yy_cp = (yy_hold_char);
+ /* Support of slice_text. */
+ *yy_cp = (yy_hold_char);
- /* yy_bp points to the position in yy_ch_buf of the start of
- * the current run.
- */
- yy_bp = yy_cp;
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
- yy_current_state = (yy_start);
- yy_current_state += YY_AT_BOL();
+ yy_current_state = (yy_start);
+ yy_current_state += YY_AT_BOL();
yy_match:
- do
- {
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 73 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- ++yy_cp;
- }
- while ( yy_current_state != 72 );
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
+ do
+ {
+ register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
+ if ( yy_accept[yy_current_state] )
+ {
+ (yy_last_accepting_state) = yy_current_state;
+ (yy_last_accepting_cpos) = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 73 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ ++yy_cp;
+ }
+ while ( yy_current_state != 72 );
+ yy_cp = (yy_last_accepting_cpos);
+ yy_current_state = (yy_last_accepting_state);
yy_find_action:
- yy_act = yy_accept[yy_current_state];
+ yy_act = yy_accept[yy_current_state];
- YY_DO_BEFORE_ACTION;
+ YY_DO_BEFORE_ACTION;
do_action: /* This label is used only to access EOF actions. */
- switch ( yy_act )
- { /* beginning of action switch */
- case 0: /* must back up */
- /* undo the effects of YY_DO_BEFORE_ACTION */
- *yy_cp = (yy_hold_char);
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = (yy_hold_char);
+ yy_cp = (yy_last_accepting_cpos);
+ yy_current_state = (yy_last_accepting_state);
+ goto yy_find_action;
case 1:
*yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
@@ -920,7 +920,7 @@ YY_RULE_SETUP
BEGIN(BOMSCAN);
}
}
- YY_BREAK
+ YY_BREAK
case 2:
/* rule 2 can match eol */
*yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
@@ -935,7 +935,7 @@ YY_RULE_SETUP
BEGIN(BOMSCAN);
}
}
- YY_BREAK
+ YY_BREAK
case 3:
*yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
(yy_c_buf_p) = yy_cp -= 1;
@@ -948,7 +948,7 @@ YY_RULE_SETUP
BEGIN(BOMSCAN);
}
}
- YY_BREAK
+ YY_BREAK
case 4:
/* rule 4 can match eol */
*yy_cp = (yy_hold_char); /* undo effects of setting up slice_text */
@@ -963,7 +963,7 @@ YY_RULE_SETUP
BEGIN(BOMSCAN);
}
}
- YY_BREAK
+ YY_BREAK
case 5:
YY_RULE_SETUP
#line 124 "Scanner.l"
@@ -981,7 +981,7 @@ YY_RULE_SETUP
}
while(c != '\n' && c != EOF);
}
- YY_BREAK
+ YY_BREAK
case 6:
YY_RULE_SETUP
#line 139 "Scanner.l"
@@ -1025,7 +1025,7 @@ YY_RULE_SETUP
unit->setComment(comment);
}
}
- YY_BREAK
+ YY_BREAK
case 7:
YY_RULE_SETUP
#line 180 "Scanner.l"
@@ -1033,7 +1033,7 @@ YY_RULE_SETUP
BEGIN(MAINSCAN);
return ICE_SCOPE_DELIMITER;
}
- YY_BREAK
+ YY_BREAK
case 8:
YY_RULE_SETUP
#line 185 "Scanner.l"
@@ -1041,7 +1041,7 @@ YY_RULE_SETUP
BEGIN(MAINSCAN);
return ICE_METADATA_OPEN;
}
- YY_BREAK
+ YY_BREAK
case 9:
YY_RULE_SETUP
#line 190 "Scanner.l"
@@ -1049,7 +1049,7 @@ YY_RULE_SETUP
BEGIN(MAINSCAN);
return ICE_METADATA_CLOSE;
}
- YY_BREAK
+ YY_BREAK
case 10:
YY_RULE_SETUP
#line 195 "Scanner.l"
@@ -1057,7 +1057,7 @@ YY_RULE_SETUP
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_OPEN;
}
- YY_BREAK
+ YY_BREAK
case 11:
YY_RULE_SETUP
#line 200 "Scanner.l"
@@ -1065,7 +1065,7 @@ YY_RULE_SETUP
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_CLOSE;
}
- YY_BREAK
+ YY_BREAK
case 12:
/* rule 12 can match eol */
YY_RULE_SETUP
@@ -1094,7 +1094,7 @@ YY_RULE_SETUP
return ICE_KEYWORD_OP;
}
}
- YY_BREAK
+ YY_BREAK
case 13:
YY_RULE_SETUP
#line 230 "Scanner.l"
@@ -1105,7 +1105,7 @@ YY_RULE_SETUP
*yylvalp = ident;
return *slice_text == '\\' ? ICE_IDENTIFIER : checkKeyword(ident->v);
}
- YY_BREAK
+ YY_BREAK
case 14:
YY_RULE_SETUP
#line 238 "Scanner.l"
@@ -1126,9 +1126,10 @@ YY_RULE_SETUP
unit->error("EOF in string");
break;
}
- else if(c == '\n')
+ else if(static_cast<unsigned char>(c) < 32 || c == 127)
{
- unit->error("newline in string");
+ unit->error("a string literal can only contain printable ASCII characters and non-ASCII characters");
+ break;
}
else if(c == '\\')
{
@@ -1147,61 +1148,49 @@ YY_RULE_SETUP
}
case '"':
case '\'':
+ case '?':
{
str->v += next;
break;
}
-
- case 'n':
- {
- str->v += '\n';
- break;
- }
-
- case 'r':
+ case 'a':
{
- str->v += '\r';
+ str->v += '\a';
break;
}
-
- case 't':
+ case 'b':
{
- str->v += '\t';
+ str->v += '\b';
break;
}
-
- case 'v':
+ case 'f':
{
- str->v += '\v';
+ str->v += '\f';
break;
}
-
- case 'f':
+ case 'n':
{
- str->v += '\f';
+ str->v += '\n';
break;
}
-
- case 'a':
+ case 'r':
{
- str->v += '\a';
+ str->v += '\r';
break;
}
-
- case 'b':
+ case 't':
{
- str->v += '\b';
+ str->v += '\t';
break;
}
-
- case '?':
+ case 'v':
{
- str->v += '\?';
+ str->v += '\v';
break;
}
//
- // Octal value \nnn limited to three octal digits but terminate at the first character
+ // Octal value \nnn limited to three octal digits but terminate at the first character
// that is not a valid octal digit if encountered sooner.
//
case '0':
@@ -1210,10 +1199,10 @@ YY_RULE_SETUP
case '3':
case '4':
case '5':
+ case '6':
case '7':
{
static string octalDigits = "01234567";
- IceUtil::Int64 value = 0;
string escape;
escape += next;
for(int i = 0; i < 2; ++i)
@@ -1224,150 +1213,87 @@ YY_RULE_SETUP
unput(next);
break;
}
- escape += next;
+ escape += next;
}
str->literal += escape;
- value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
-
- if(value == 0)
+ IceUtil::Int64 value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+ if(value > 255)
{
- unit->error("illegal NUL character in string constant");
- }
- else if(value > 255)
- {
- ostringstream os;
- os << "octal escape sequence out of range: '\\" << oct << value << "'";
- unit->warning(os.str());
+ unit->error(string("octal escape sequence out of range: `\\") + escape + "'");
}
str->v += static_cast<char>(value);
break;
}
+
case 'x':
{
- IceUtil::Int64 value = 0;
string escape = "";
+ next = static_cast<char>(yyinput());
//
// Unlike C++, we limit hex escape sequences to 2 hex digits
//
- while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))) && escape.length() < 2)
+ while(isxdigit(next) && escape.length() < 2)
{
escape += next;
+ next = static_cast<char>(yyinput());
}
unput(next);
-
- str->literal += escape;
- value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
- if(value == 0)
+ if(escape.length() == 0)
{
- unit->error("illegal NUL character in string constant");
+ unit->error("no hex digit in hex escape sequence");
}
+
+ str->literal += escape;
+ IceUtil::Int64 value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
assert(value >= 0 && value <= 255);
str->v += static_cast<char>(value);
break;
}
//
- // Universal character name \unnnn code point U+nnnn
+ // Universal character name
//
case 'u':
+ case 'U':
{
- IceUtil::Int64 value = 0;
string escape = "";
-
- for(int i = 0; i < 4; ++i)
+ char c = next;
+ int size = (c == 'u') ? 4 : 8;
+ while(size > 0)
{
next = static_cast<char>(yyinput());
- str->literal += next;
if(!isxdigit(static_cast<unsigned char>(next)))
{
- unit->error("unknown escape sequence in string constant: " + str->literal);
+ unit->error(string("unknown escape sequence in string literal: `\\") + c + escape + next + "'");
+ unput(next);
break;
}
escape += next;
+ --size;
}
-
- value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
- ostringstream os;
- os << '\\' << 'u';
- os.fill('0');
- os.width(4);
- os << hex << value;
-
- if(value == 0)
+ if(size == 0)
{
- unit->error("illegal NUL character in string constant");
- }
-
-
- //
- // Determine if a character is a surrogate:
- //
- // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
- // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
- //
- else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
- {
- unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
- }
-
- str->v += os.str();
-
- break;
- }
-
- case 'U':
- {
- IceUtil::Int64 value = 0;
- string escape = "";
-
- for(int i = 0; i < 8; ++i)
- {
- next = static_cast<char>(yyinput());
- str->literal += next;
- if(!isxdigit(static_cast<unsigned char>(next)))
+ // All digits read, check value
+ IceUtil::Int64 codePoint = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+ if(codePoint >= 0xd800 && codePoint <= 0xdfff)
{
-
- unit->error("unknown escape sequence in string constant: " + str->literal);
- break;
+ unit->error(string("a universal character name cannot designate a surrogate: `\\") + c + escape + "'");
}
- escape += next;
- }
-
- value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
-
- ostringstream os;
- os << '\\' << 'U';
- os.fill('0');
- os.width(8);
- os << hex << value;
-
- if(value == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
-
- //
- // Determine if a character is a surrogate:
- //
- // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
- // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
- //
- else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
- {
- unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
}
- str->v += os.str();
+ str->literal += escape;
+ str->v += string("\\") + c + escape;
break;
}
default:
{
ostringstream os;
- os << "unknown escape sequence '\\" << next << "'";
+ os << "unknown escape sequence `\\" << next << "'";
unit->warning(os.str());
// Escape the \ in this unknown escape sequence
@@ -1385,7 +1311,7 @@ YY_RULE_SETUP
*yylvalp = str;
return ICE_STRING_LITERAL;
}
- YY_BREAK
+ YY_BREAK
case 15:
YY_RULE_SETUP
#line 515 "Scanner.l"
@@ -1404,7 +1330,7 @@ YY_RULE_SETUP
}
return ICE_INTEGER_LITERAL;
}
- YY_BREAK
+ YY_BREAK
case 16:
YY_RULE_SETUP
#line 531 "Scanner.l"
@@ -1433,11 +1359,11 @@ YY_RULE_SETUP
string msg = "floating-point constant `";
msg += slice_text;
msg += "' too small (underflow)";
- unit->error(msg);
+ unit->error(msg);
}
return ICE_FLOATING_POINT_LITERAL;
}
- YY_BREAK
+ YY_BREAK
case 17:
/* rule 17 can match eol */
YY_RULE_SETUP
@@ -1454,7 +1380,7 @@ YY_RULE_SETUP
unit->nextLine();
}
}
- YY_BREAK
+ YY_BREAK
case 18:
YY_RULE_SETUP
#line 574 "Scanner.l"
@@ -1463,7 +1389,7 @@ YY_RULE_SETUP
BEGIN(MAINSCAN);
}
- YY_BREAK
+ YY_BREAK
case 19:
YY_RULE_SETUP
#line 580 "Scanner.l"
@@ -1482,147 +1408,147 @@ YY_RULE_SETUP
}
return slice_text[0];
}
- YY_BREAK
+ YY_BREAK
case 20:
YY_RULE_SETUP
#line 596 "Scanner.l"
ECHO;
- YY_BREAK
+ YY_BREAK
#line 1491 "lex.yy.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(BOMSCAN):
case YY_STATE_EOF(MAINSCAN):
- yyterminate();
-
- case YY_END_OF_BUFFER:
- {
- /* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
-
- /* Undo the effects of YY_DO_BEFORE_ACTION. */
- *yy_cp = (yy_hold_char);
- YY_RESTORE_YY_MORE_OFFSET
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
- {
- /* We're scanning a new file or input source. It's
- * possible that this happened because the user
- * just pointed slice_in at a new source and called
- * slice_lex(). If so, then we have to assure
- * consistency between YY_CURRENT_BUFFER and our
- * globals. Here is the right place to do so, because
- * this is the first action (other than possibly a
- * back-up) that will match for the new input source.
- */
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- YY_CURRENT_BUFFER_LVALUE->yy_input_file = slice_in;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
- }
-
- /* Note that here we test for yy_c_buf_p "<=" to the position
- * of the first EOB in the buffer, since yy_c_buf_p will
- * already have been incremented past the NUL character
- * (since all states make transitions on EOB to the
- * end-of-buffer state). Contrast this with the test
- * in input().
- */
- if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- { /* This was really a NUL. */
- yy_state_type yy_next_state;
-
- (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- /* Okay, we're now positioned to make the NUL
- * transition. We couldn't have
- * yy_get_previous_state() go ahead and do it
- * for us because it doesn't know how to deal
- * with the possibility of jamming (and we don't
- * want to build jamming into it because then it
- * will run more slowly).
- */
-
- yy_next_state = yy_try_NUL_trans( yy_current_state );
-
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-
- if ( yy_next_state )
- {
- /* Consume the NUL. */
- yy_cp = ++(yy_c_buf_p);
- yy_current_state = yy_next_state;
- goto yy_match;
- }
-
- else
- {
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
- }
- }
-
- else switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_END_OF_FILE:
- {
- (yy_did_buffer_switch_on_eof) = 0;
-
- if ( slice_wrap( ) )
- {
- /* Note: because we've taken care in
- * yy_get_next_buffer() to have set up
- * slice_text, we can now set up
- * yy_c_buf_p so that if some total
- * hoser (like flex itself) wants to
- * call the scanner after we return the
- * YY_NULL, it'll still work - another
- * YY_NULL will get returned.
- */
- (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
-
- yy_act = YY_STATE_EOF(YY_START);
- goto do_action;
- }
-
- else
- {
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
- }
- break;
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) =
- (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_match;
-
- case EOB_ACT_LAST_MATCH:
- (yy_c_buf_p) =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_find_action;
- }
- break;
- }
-
- default:
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--no action found" );
- } /* end of action switch */
- } /* end of scanning one token */
- } /* end of user's declarations */
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = (yy_hold_char);
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed slice_in at a new source and called
+ * slice_lex(). If so, then we have to assure
+ * consistency between YY_CURRENT_BUFFER and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ YY_CURRENT_BUFFER_LVALUE->yy_input_file = slice_in;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( );
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++(yy_c_buf_p);
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = (yy_last_accepting_cpos);
+ yy_current_state = (yy_last_accepting_state);
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer( ) )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ (yy_did_buffer_switch_on_eof) = 0;
+
+ if ( slice_wrap( ) )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * slice_text, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! (yy_did_buffer_switch_on_eof) )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ (yy_c_buf_p) =
+ (yytext_ptr) + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( );
+
+ yy_cp = (yy_c_buf_p);
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ (yy_c_buf_p) =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+ yy_current_state = yy_get_previous_state( );
+
+ yy_cp = (yy_c_buf_p);
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of user's declarations */
} /* end of slice_lex */
/* yy_get_next_buffer - try to read in a new buffer
@@ -1634,164 +1560,164 @@ case YY_STATE_EOF(MAINSCAN):
*/
static int yy_get_next_buffer (void)
{
- register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
- register char *source = (yytext_ptr);
- register int number_to_move, i;
- int ret_val;
-
- if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--end of buffer missed" );
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
- { /* Don't try to fill the buffer, so this is an EOF. */
- if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
- {
- /* We matched a single character, the EOB, so
- * treat this as a final EOF.
- */
- return EOB_ACT_END_OF_FILE;
- }
-
- else
- {
- /* We matched some text prior to the EOB, first
- * process it.
- */
- return EOB_ACT_LAST_MATCH;
- }
- }
-
- /* Try to read more data. */
-
- /* First move last chars to start of buffer. */
- number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
-
- for ( i = 0; i < number_to_move; ++i )
- *(dest++) = *(source++);
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
- /* don't do the read, it's not guaranteed to return an EOF,
- * just force an EOF
- */
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
-
- else
- {
- yy_size_t num_to_read =
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
-
- while ( num_to_read <= 0 )
- { /* Not enough room in the buffer - grow it. */
-
- /* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
-
- int yy_c_buf_p_offset =
- (int) ((yy_c_buf_p) - b->yy_ch_buf);
-
- if ( b->yy_is_our_buffer )
- {
- yy_size_t new_size = b->yy_buf_size * 2;
-
- if ( new_size <= 0 )
- b->yy_buf_size += b->yy_buf_size / 8;
- else
- b->yy_buf_size *= 2;
-
- b->yy_ch_buf = (char *)
- /* Include room in for 2 EOB chars. */
- slice_realloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
- }
- else
- /* Can't grow it, we don't own it. */
- b->yy_ch_buf = 0;
-
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR(
- "fatal error - scanner input buffer overflow" );
-
- (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
-
- num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
- number_to_move - 1;
-
- }
-
- if ( num_to_read > YY_READ_BUF_SIZE )
- num_to_read = YY_READ_BUF_SIZE;
-
- /* Read in more data. */
- YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), num_to_read );
-
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- if ( (yy_n_chars) == 0 )
- {
- if ( number_to_move == YY_MORE_ADJ )
- {
- ret_val = EOB_ACT_END_OF_FILE;
- slice_restart(slice_in );
- }
-
- else
- {
- ret_val = EOB_ACT_LAST_MATCH;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
- YY_BUFFER_EOF_PENDING;
- }
- }
-
- else
- ret_val = EOB_ACT_CONTINUE_SCAN;
-
- if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
- /* Extend the array by 50%, plus the number we really need. */
- yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) slice_realloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
- if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
- }
-
- (yy_n_chars) += number_to_move;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
-
- (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+ register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+ register char *source = (yytext_ptr);
+ register int number_to_move, i;
+ int ret_val;
+
+ if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+ else
+ {
+ yy_size_t num_to_read =
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
+
+ int yy_c_buf_p_offset =
+ (int) ((yy_c_buf_p) - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ yy_size_t new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ slice_realloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = 0;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
- return ret_val;
+ num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+ number_to_move - 1;
+
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+ (yy_n_chars), num_to_read );
+
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ if ( (yy_n_chars) == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ slice_restart(slice_in );
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+ /* Extend the array by 50%, plus the number we really need. */
+ yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) slice_realloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
+ if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+ }
+
+ (yy_n_chars) += number_to_move;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+ (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+ return ret_val;
}
/* yy_get_previous_state - get the state just before the EOB char was reached */
static yy_state_type yy_get_previous_state (void)
{
- register yy_state_type yy_current_state;
- register char *yy_cp;
-
- yy_current_state = (yy_start);
- yy_current_state += YY_AT_BOL();
-
- for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
- {
- register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 73 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- }
-
- return yy_current_state;
+ register yy_state_type yy_current_state;
+ register char *yy_cp;
+
+ yy_current_state = (yy_start);
+ yy_current_state += YY_AT_BOL();
+
+ for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
+ {
+ register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+ if ( yy_accept[yy_current_state] )
+ {
+ (yy_last_accepting_state) = yy_current_state;
+ (yy_last_accepting_cpos) = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 73 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ }
+
+ return yy_current_state;
}
/* yy_try_NUL_trans - try to make a transition on the NUL character
@@ -1801,62 +1727,62 @@ static int yy_get_next_buffer (void)
*/
static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
{
- register int yy_is_jam;
- register char *yy_cp = (yy_c_buf_p);
-
- register YY_CHAR yy_c = 1;
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 73 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 72);
-
- return yy_is_jam ? 0 : yy_current_state;
+ register int yy_is_jam;
+ register char *yy_cp = (yy_c_buf_p);
+
+ register YY_CHAR yy_c = 1;
+ if ( yy_accept[yy_current_state] )
+ {
+ (yy_last_accepting_state) = yy_current_state;
+ (yy_last_accepting_cpos) = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 73 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ yy_is_jam = (yy_current_state == 72);
+
+ return yy_is_jam ? 0 : yy_current_state;
}
static void yyunput (int c, register char * yy_bp )
{
- register char *yy_cp;
-
+ register char *yy_cp;
+
yy_cp = (yy_c_buf_p);
- /* undo effects of setting up slice_text */
- *yy_cp = (yy_hold_char);
+ /* undo effects of setting up slice_text */
+ *yy_cp = (yy_hold_char);
- if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
- { /* need to shift things up to make room */
- /* +2 for EOB chars. */
- register yy_size_t number_to_move = (yy_n_chars) + 2;
- register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
- register char *source =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
+ if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+ { /* need to shift things up to make room */
+ /* +2 for EOB chars. */
+ register yy_size_t number_to_move = (yy_n_chars) + 2;
+ register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
+ register char *source =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
- while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
- *--dest = *--source;
+ while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ *--dest = *--source;
- yy_cp += (int) (dest - source);
- yy_bp += (int) (dest - source);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
+ yy_cp += (int) (dest - source);
+ yy_bp += (int) (dest - source);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
- if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
- YY_FATAL_ERROR( "flex scanner push-back overflow" );
- }
+ if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+ YY_FATAL_ERROR( "flex scanner push-back overflow" );
+ }
- *--yy_cp = (char) c;
+ *--yy_cp = (char) c;
- (yytext_ptr) = yy_bp;
- (yy_hold_char) = *yy_cp;
- (yy_c_buf_p) = yy_cp;
+ (yytext_ptr) = yy_bp;
+ (yy_hold_char) = *yy_cp;
+ (yy_c_buf_p) = yy_cp;
}
#ifndef YY_NO_INPUT
@@ -1867,182 +1793,182 @@ static int yy_get_next_buffer (void)
#endif
{
- int c;
-
- *(yy_c_buf_p) = (yy_hold_char);
-
- if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
- {
- /* yy_c_buf_p now points to the character we want to return.
- * If this occurs *before* the EOB characters, then it's a
- * valid NUL; if not, then we've hit the end of the buffer.
- */
- if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- /* This was really a NUL. */
- *(yy_c_buf_p) = '\0';
-
- else
- { /* need more input */
- yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
- ++(yy_c_buf_p);
-
- switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_LAST_MATCH:
- /* This happens because yy_g_n_b()
- * sees that we've accumulated a
- * token and flags that we need to
- * try matching the token before
- * proceeding. But for input(),
- * there's no matching to consider.
- * So convert the EOB_ACT_LAST_MATCH
- * to EOB_ACT_END_OF_FILE.
- */
-
- /* Reset buffer status. */
- slice_restart(slice_in );
-
- /*FALLTHROUGH*/
-
- case EOB_ACT_END_OF_FILE:
- {
- if ( slice_wrap( ) )
- return EOF;
-
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
+ int c;
+
+ *(yy_c_buf_p) = (yy_hold_char);
+
+ if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+ /* This was really a NUL. */
+ *(yy_c_buf_p) = '\0';
+
+ else
+ { /* need more input */
+ yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
+ ++(yy_c_buf_p);
+
+ switch ( yy_get_next_buffer( ) )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ slice_restart(slice_in );
+
+ /*FALLTHROUGH*/
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( slice_wrap( ) )
+ return EOF;
+
+ if ( ! (yy_did_buffer_switch_on_eof) )
+ YY_NEW_FILE;
#ifdef __cplusplus
- return yyinput();
+ return yyinput();
#else
- return input();
+ return input();
#endif
- }
+ }
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) = (yytext_ptr) + offset;
- break;
- }
- }
- }
+ case EOB_ACT_CONTINUE_SCAN:
+ (yy_c_buf_p) = (yytext_ptr) + offset;
+ break;
+ }
+ }
+ }
- c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
- *(yy_c_buf_p) = '\0'; /* preserve slice_text */
- (yy_hold_char) = *++(yy_c_buf_p);
+ c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
+ *(yy_c_buf_p) = '\0'; /* preserve slice_text */
+ (yy_hold_char) = *++(yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = (c == '\n');
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = (c == '\n');
- return c;
+ return c;
}
#endif /* ifndef YY_NO_INPUT */
/** Immediately switch to a different input stream.
* @param input_file A readable stream.
- *
+ *
* @note This function does not reset the start condition to @c INITIAL .
*/
void slice_restart (FILE * input_file )
{
-
- if ( ! YY_CURRENT_BUFFER ){
+
+ if ( ! YY_CURRENT_BUFFER ){
slice_ensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
+ YY_CURRENT_BUFFER_LVALUE =
slice__create_buffer(slice_in,YY_BUF_SIZE );
- }
+ }
- slice__init_buffer(YY_CURRENT_BUFFER,input_file );
- slice__load_buffer_state( );
+ slice__init_buffer(YY_CURRENT_BUFFER,input_file );
+ slice__load_buffer_state( );
}
/** Switch to a different input buffer.
* @param new_buffer The new input buffer.
- *
+ *
*/
void slice__switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
-
- /* TODO. We should be able to replace this entire function body
- * with
- * slice_pop_buffer_state();
- * slice_push_buffer_state(new_buffer);
+
+ /* TODO. We should be able to replace this entire function body
+ * with
+ * slice_pop_buffer_state();
+ * slice_push_buffer_state(new_buffer);
*/
- slice_ensure_buffer_stack ();
- if ( YY_CURRENT_BUFFER == new_buffer )
- return;
-
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
- slice__load_buffer_state( );
-
- /* We don't actually know whether we did this switch during
- * EOF (slice_wrap()) processing, but the only time this flag
- * is looked at is after slice_wrap() is called, so it's safe
- * to go ahead and always set it.
- */
- (yy_did_buffer_switch_on_eof) = 1;
+ slice_ensure_buffer_stack ();
+ if ( YY_CURRENT_BUFFER == new_buffer )
+ return;
+
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *(yy_c_buf_p) = (yy_hold_char);
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+ slice__load_buffer_state( );
+
+ /* We don't actually know whether we did this switch during
+ * EOF (slice_wrap()) processing, but the only time this flag
+ * is looked at is after slice_wrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ (yy_did_buffer_switch_on_eof) = 1;
}
static void slice__load_buffer_state (void)
{
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
- slice_in = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
- (yy_hold_char) = *(yy_c_buf_p);
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+ slice_in = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+ (yy_hold_char) = *(yy_c_buf_p);
}
/** Allocate and initialize an input buffer state.
* @param file A readable stream.
* @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
+ *
* @return the allocated buffer state.
*/
YY_BUFFER_STATE slice__create_buffer (FILE * file, int size )
{
- YY_BUFFER_STATE b;
-
- b = (YY_BUFFER_STATE) slice_alloc(sizeof( struct yy_buffer_state ) );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in slice__create_buffer()" );
+ YY_BUFFER_STATE b;
- b->yy_buf_size = size;
+ b = (YY_BUFFER_STATE) slice_alloc(sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in slice__create_buffer()" );
- /* yy_ch_buf has to be 2 characters longer than the size given because
- * we need to put in 2 end-of-buffer characters.
- */
- b->yy_ch_buf = (char *) slice_alloc(b->yy_buf_size + 2 );
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in slice__create_buffer()" );
+ b->yy_buf_size = size;
- b->yy_is_our_buffer = 1;
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) slice_alloc(b->yy_buf_size + 2 );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in slice__create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
- slice__init_buffer(b,file );
+ slice__init_buffer(b,file );
- return b;
+ return b;
}
/** Destroy the buffer.
* @param b a buffer created with slice__create_buffer()
- *
+ *
*/
void slice__delete_buffer (YY_BUFFER_STATE b )
{
-
- if ( ! b )
- return;
- if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
- YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+ if ( ! b )
+ return;
+
+ if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
- if ( b->yy_is_our_buffer )
- slice_free((void *) b->yy_ch_buf );
+ if ( b->yy_is_our_buffer )
+ slice_free((void *) b->yy_ch_buf );
- slice_free((void *) b );
+ slice_free((void *) b );
}
/* Initializes or reinitializes a buffer.
@@ -2052,12 +1978,12 @@ static void slice__load_buffer_state (void)
static void slice__init_buffer (YY_BUFFER_STATE b, FILE * file )
{
- int oerrno = errno;
-
- slice__flush_buffer(b );
+ int oerrno = errno;
- b->yy_input_file = file;
- b->yy_fill_buffer = 1;
+ slice__flush_buffer(b );
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
/* If b is the current buffer, then slice__init_buffer was _probably_
* called from slice_restart() or through yy_get_next_buffer.
@@ -2069,87 +1995,87 @@ static void slice__load_buffer_state (void)
}
b->yy_is_interactive = 0;
-
- errno = oerrno;
+
+ errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
+ *
*/
void slice__flush_buffer (YY_BUFFER_STATE b )
{
- if ( ! b )
- return;
+ if ( ! b )
+ return;
- b->yy_n_chars = 0;
+ b->yy_n_chars = 0;
- /* We always need two end-of-buffer characters. The first causes
- * a transition to the end-of-buffer state. The second causes
- * a jam in that state.
- */
- b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
- b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
- b->yy_buf_pos = &b->yy_ch_buf[0];
+ b->yy_buf_pos = &b->yy_ch_buf[0];
- b->yy_at_bol = 1;
- b->yy_buffer_status = YY_BUFFER_NEW;
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
- if ( b == YY_CURRENT_BUFFER )
- slice__load_buffer_state( );
+ if ( b == YY_CURRENT_BUFFER )
+ slice__load_buffer_state( );
}
/** Pushes the new state onto the stack. The new state becomes
* the current state. This function will allocate the stack
* if necessary.
* @param new_buffer The new state.
- *
+ *
*/
void slice_push_buffer_state (YY_BUFFER_STATE new_buffer )
{
- if (new_buffer == NULL)
- return;
-
- slice_ensure_buffer_stack();
-
- /* This block is copied from slice__switch_to_buffer. */
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- /* Only push if top exists. Otherwise, replace top. */
- if (YY_CURRENT_BUFFER)
- (yy_buffer_stack_top)++;
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
- /* copied from slice__switch_to_buffer. */
- slice__load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
+ if (new_buffer == NULL)
+ return;
+
+ slice_ensure_buffer_stack();
+
+ /* This block is copied from slice__switch_to_buffer. */
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *(yy_c_buf_p) = (yy_hold_char);
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ /* Only push if top exists. Otherwise, replace top. */
+ if (YY_CURRENT_BUFFER)
+ (yy_buffer_stack_top)++;
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+ /* copied from slice__switch_to_buffer. */
+ slice__load_buffer_state( );
+ (yy_did_buffer_switch_on_eof) = 1;
}
/** Removes and deletes the top of the stack, if present.
* The next element becomes the new top.
- *
+ *
*/
void slice_pop_buffer_state (void)
{
- if (!YY_CURRENT_BUFFER)
- return;
-
- slice__delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- if ((yy_buffer_stack_top) > 0)
- --(yy_buffer_stack_top);
-
- if (YY_CURRENT_BUFFER) {
- slice__load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
- }
+ if (!YY_CURRENT_BUFFER)
+ return;
+
+ slice__delete_buffer(YY_CURRENT_BUFFER );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ if ((yy_buffer_stack_top) > 0)
+ --(yy_buffer_stack_top);
+
+ if (YY_CURRENT_BUFFER) {
+ slice__load_buffer_state( );
+ (yy_did_buffer_switch_on_eof) = 1;
+ }
}
/* Allocates the stack if it does not exist.
@@ -2157,131 +2083,131 @@ void slice_pop_buffer_state (void)
*/
static void slice_ensure_buffer_stack (void)
{
- yy_size_t num_to_alloc;
-
- if (!(yy_buffer_stack)) {
+ yy_size_t num_to_alloc;
+
+ if (!(yy_buffer_stack)) {
- /* First allocation is just for 2 elements, since we don't know if this
- * scanner will even need a stack. We use 2 instead of 1 to avoid an
- * immediate realloc on the next call.
+ /* First allocation is just for 2 elements, since we don't know if this
+ * scanner will even need a stack. We use 2 instead of 1 to avoid an
+ * immediate realloc on the next call.
*/
- num_to_alloc = 1;
- (yy_buffer_stack) = (struct yy_buffer_state**)slice_alloc
- (num_to_alloc * sizeof(struct yy_buffer_state*)
- );
- if ( ! (yy_buffer_stack) )
- YY_FATAL_ERROR( "out of dynamic memory in slice_ensure_buffer_stack()" );
-
- memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
- (yy_buffer_stack_max) = num_to_alloc;
- (yy_buffer_stack_top) = 0;
- return;
- }
-
- if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
-
- /* Increase the buffer to prepare for a possible push. */
- int grow_size = 8 /* arbitrary grow size */;
-
- num_to_alloc = (yy_buffer_stack_max) + grow_size;
- (yy_buffer_stack) = (struct yy_buffer_state**)slice_realloc
- ((yy_buffer_stack),
- num_to_alloc * sizeof(struct yy_buffer_state*)
- );
- if ( ! (yy_buffer_stack) )
- YY_FATAL_ERROR( "out of dynamic memory in slice_ensure_buffer_stack()" );
-
- /* zero only the new slots.*/
- memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
- (yy_buffer_stack_max) = num_to_alloc;
- }
+ num_to_alloc = 1;
+ (yy_buffer_stack) = (struct yy_buffer_state**)slice_alloc
+ (num_to_alloc * sizeof(struct yy_buffer_state*)
+ );
+ if ( ! (yy_buffer_stack) )
+ YY_FATAL_ERROR( "out of dynamic memory in slice_ensure_buffer_stack()" );
+
+ memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+
+ (yy_buffer_stack_max) = num_to_alloc;
+ (yy_buffer_stack_top) = 0;
+ return;
+ }
+
+ if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
+
+ /* Increase the buffer to prepare for a possible push. */
+ int grow_size = 8 /* arbitrary grow size */;
+
+ num_to_alloc = (yy_buffer_stack_max) + grow_size;
+ (yy_buffer_stack) = (struct yy_buffer_state**)slice_realloc
+ ((yy_buffer_stack),
+ num_to_alloc * sizeof(struct yy_buffer_state*)
+ );
+ if ( ! (yy_buffer_stack) )
+ YY_FATAL_ERROR( "out of dynamic memory in slice_ensure_buffer_stack()" );
+
+ /* zero only the new slots.*/
+ memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
+ (yy_buffer_stack_max) = num_to_alloc;
+ }
}
/** Setup the input buffer state to scan directly from a user-specified character buffer.
* @param base the character buffer
* @param size the size in bytes of the character buffer
- *
- * @return the newly allocated buffer state object.
+ *
+ * @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE slice__scan_buffer (char * base, yy_size_t size )
{
- YY_BUFFER_STATE b;
-
- if ( size < 2 ||
- base[size-2] != YY_END_OF_BUFFER_CHAR ||
- base[size-1] != YY_END_OF_BUFFER_CHAR )
- /* They forgot to leave room for the EOB's. */
- return 0;
-
- b = (YY_BUFFER_STATE) slice_alloc(sizeof( struct yy_buffer_state ) );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in slice__scan_buffer()" );
-
- b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
- b->yy_buf_pos = b->yy_ch_buf = base;
- b->yy_is_our_buffer = 0;
- b->yy_input_file = 0;
- b->yy_n_chars = b->yy_buf_size;
- b->yy_is_interactive = 0;
- b->yy_at_bol = 1;
- b->yy_fill_buffer = 0;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- slice__switch_to_buffer(b );
-
- return b;
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return 0;
+
+ b = (YY_BUFFER_STATE) slice_alloc(sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in slice__scan_buffer()" );
+
+ b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = 0;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ slice__switch_to_buffer(b );
+
+ return b;
}
/** Setup the input buffer state to scan a string. The next call to slice_lex() will
* scan from a @e copy of @a str.
* @param yystr a NUL-terminated string to scan
- *
+ *
* @return the newly allocated buffer state object.
* @note If you want to scan bytes that may contain NUL values, then use
* slice__scan_bytes() instead.
*/
YY_BUFFER_STATE slice__scan_string (yyconst char * yystr )
{
-
- return slice__scan_bytes(yystr,strlen(yystr) );
+
+ return slice__scan_bytes(yystr,strlen(yystr) );
}
/** Setup the input buffer state to scan the given bytes. The next call to slice_lex() will
* scan from a @e copy of @a bytes.
* @param yybytes the byte buffer to scan
* @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
- *
+ *
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE slice__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len )
{
- YY_BUFFER_STATE b;
- char *buf;
- yy_size_t n;
- yy_size_t i;
-
- /* Get memory for full buffer, including space for trailing EOB's. */
- n = _yybytes_len + 2;
- buf = (char *) slice_alloc(n );
- if ( ! buf )
- YY_FATAL_ERROR( "out of dynamic memory in slice__scan_bytes()" );
-
- for ( i = 0; i < _yybytes_len; ++i )
- buf[i] = yybytes[i];
-
- buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
-
- b = slice__scan_buffer(buf,n );
- if ( ! b )
- YY_FATAL_ERROR( "bad buffer in slice__scan_bytes()" );
-
- /* It's okay to grow etc. this buffer, and we should throw it
- * away when we're done.
- */
- b->yy_is_our_buffer = 1;
-
- return b;
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ yy_size_t i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = _yybytes_len + 2;
+ buf = (char *) slice_alloc(n );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in slice__scan_bytes()" );
+
+ for ( i = 0; i < _yybytes_len; ++i )
+ buf[i] = yybytes[i];
+
+ buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = slice__scan_buffer(buf,n );
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in slice__scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
}
#ifndef YY_EXIT_FAILURE
@@ -2290,40 +2216,40 @@ YY_BUFFER_STATE slice__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_
static void yy_fatal_error (yyconst char* msg )
{
- (void) fprintf( stderr, "%s\n", msg );
- exit( YY_EXIT_FAILURE );
+ (void) fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
}
/* Redefine yyless() so it works in section 3 code. */
#undef yyless
#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up slice_text. */ \
+ do \
+ { \
+ /* Undo effects of setting up slice_text. */ \
int yyless_macro_arg = (n); \
YY_LESS_LINENO(yyless_macro_arg);\
- slice_text[slice_leng] = (yy_hold_char); \
- (yy_c_buf_p) = slice_text + yyless_macro_arg; \
- (yy_hold_char) = *(yy_c_buf_p); \
- *(yy_c_buf_p) = '\0'; \
- slice_leng = yyless_macro_arg; \
- } \
- while ( 0 )
+ slice_text[slice_leng] = (yy_hold_char); \
+ (yy_c_buf_p) = slice_text + yyless_macro_arg; \
+ (yy_hold_char) = *(yy_c_buf_p); \
+ *(yy_c_buf_p) = '\0'; \
+ slice_leng = yyless_macro_arg; \
+ } \
+ while ( 0 )
/* Accessor methods (get/set functions) to struct members. */
/** Get the current line number.
- *
+ *
*/
int slice_get_lineno (void)
{
-
+
return slice_lineno;
}
/** Get the input stream.
- *
+ *
*/
FILE *slice_get_in (void)
{
@@ -2331,7 +2257,7 @@ FILE *slice_get_in (void)
}
/** Get the output stream.
- *
+ *
*/
FILE *slice_get_out (void)
{
@@ -2339,7 +2265,7 @@ FILE *slice_get_out (void)
}
/** Get the length of the current token.
- *
+ *
*/
yy_size_t slice_get_leng (void)
{
@@ -2347,7 +2273,7 @@ yy_size_t slice_get_leng (void)
}
/** Get the current token.
- *
+ *
*/
char *slice_get_text (void)
@@ -2357,18 +2283,18 @@ char *slice_get_text (void)
/** Set the current line number.
* @param line_number
- *
+ *
*/
void slice_set_lineno (int line_number )
{
-
+
slice_lineno = line_number;
}
/** Set the input stream. This does not discard the current
* input buffer.
* @param in_str A readable stream.
- *
+ *
* @see slice__switch_to_buffer
*/
void slice_set_in (FILE * in_str )
@@ -2422,17 +2348,17 @@ static int yy_init_globals (void)
/* slice_lex_destroy is for both reentrant and non-reentrant scanners. */
int slice_lex_destroy (void)
{
-
+
/* Pop the buffer stack, destroying each element. */
- while(YY_CURRENT_BUFFER){
- slice__delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- slice_pop_buffer_state();
- }
+ while(YY_CURRENT_BUFFER){
+ slice__delete_buffer(YY_CURRENT_BUFFER );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ slice_pop_buffer_state();
+ }
- /* Destroy the stack itself. */
- slice_free((yy_buffer_stack) );
- (yy_buffer_stack) = NULL;
+ /* Destroy the stack itself. */
+ slice_free((yy_buffer_stack) );
+ (yy_buffer_stack) = NULL;
/* Reset the globals. This is important in a non-reentrant scanner so the next time
* slice_lex() is called, initialization will occur. */
@@ -2448,43 +2374,43 @@ int slice_lex_destroy (void)
#ifndef yytext_ptr
static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
{
- register int i;
- for ( i = 0; i < n; ++i )
- s1[i] = s2[i];
+ register int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
}
#endif
#ifdef YY_NEED_STRLEN
static int yy_flex_strlen (yyconst char * s )
{
- register int n;
- for ( n = 0; s[n]; ++n )
- ;
+ register int n;
+ for ( n = 0; s[n]; ++n )
+ ;
- return n;
+ return n;
}
#endif
void *slice_alloc (yy_size_t size )
{
- return (void *) malloc( size );
+ return (void *) malloc( size );
}
void *slice_realloc (void * ptr, yy_size_t size )
{
- /* The cast to (char *) in the following accommodates both
- * implementations that use char* generic pointers, and those
- * that use void* generic pointers. It works with the latter
- * because both ANSI C and C++ allow castless assignment from
- * any pointer type to void*, and deal with argument conversions
- * as though doing an assignment.
- */
- return (void *) realloc( (char *) ptr, size );
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return (void *) realloc( (char *) ptr, size );
}
void slice_free (void * ptr )
{
- free( (char *) ptr ); /* see slice_realloc() for (char *) cast */
+ free( (char *) ptr ); /* see slice_realloc() for (char *) cast */
}
#define YYTABLES_NAME "yytables"
@@ -2561,4 +2487,3 @@ checkKeyword(string& id)
}
}
-
diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l
index aadffc22bc1..fd72b46e228 100644
--- a/cpp/src/Slice/Scanner.l
+++ b/cpp/src/Slice/Scanner.l
@@ -239,7 +239,7 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
BEGIN(MAINSCAN);
StringTokPtr str = new StringTok;
str->literal = "\"";
- while(true)
+ while(true)
{
char c = static_cast<char>(yyinput());
str->literal += c;
@@ -252,9 +252,10 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
unit->error("EOF in string");
break;
}
- else if(c == '\n')
+ else if(static_cast<unsigned char>(c) < 32 || c == 127)
{
- unit->error("newline in string");
+ unit->error("a string literal can only contain printable ASCII characters and non-ASCII characters");
+ break;
}
else if(c == '\\')
{
@@ -273,56 +274,44 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
}
case '"':
case '\'':
+ case '?':
{
str->v += next;
break;
}
-
- case 'n':
- {
- str->v += '\n';
- break;
- }
-
- case 'r':
+ case 'a':
{
- str->v += '\r';
+ str->v += '\a';
break;
}
-
- case 't':
+ case 'b':
{
- str->v += '\t';
+ str->v += '\b';
break;
}
-
- case 'v':
+ case 'f':
{
- str->v += '\v';
+ str->v += '\f';
break;
}
-
- case 'f':
+ case 'n':
{
- str->v += '\f';
+ str->v += '\n';
break;
}
-
- case 'a':
+ case 'r':
{
- str->v += '\a';
+ str->v += '\r';
break;
}
-
- case 'b':
+ case 't':
{
- str->v += '\b';
+ str->v += '\t';
break;
}
-
- case '?':
+ case 'v':
{
- str->v += '\?';
+ str->v += '\v';
break;
}
@@ -336,10 +325,10 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
case '3':
case '4':
case '5':
+ case '6':
case '7':
{
static string octalDigits = "01234567";
- IceUtil::Int64 value = 0;
string escape;
escape += next;
for(int i = 0; i < 2; ++i)
@@ -353,17 +342,10 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
escape += next;
}
str->literal += escape;
- value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
-
- if(value == 0)
+ IceUtil::Int64 value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+ if(value > 255)
{
- unit->error("illegal NUL character in string constant");
- }
- else if(value > 255)
- {
- ostringstream os;
- os << "octal escape sequence out of range: '\\" << oct << value << "'";
- unit->warning(os.str());
+ unit->error(string("octal escape sequence out of range: `\\") + escape + "'");
}
str->v += static_cast<char>(value);
break;
@@ -371,130 +353,73 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
case 'x':
{
- IceUtil::Int64 value = 0;
string escape = "";
+ next = static_cast<char>(yyinput());
//
// Unlike C++, we limit hex escape sequences to 2 hex digits
//
- while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))) && escape.length() < 2)
+ while(isxdigit(next) && escape.length() < 2)
{
escape += next;
+ next = static_cast<char>(yyinput());
}
unput(next);
- str->literal += escape;
- value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
-
- if(value == 0)
+ if(escape.length() == 0)
{
- unit->error("illegal NUL character in string constant");
+ unit->error("no hex digit in hex escape sequence");
}
+
+ str->literal += escape;
+ IceUtil::Int64 value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
assert(value >= 0 && value <= 255);
str->v += static_cast<char>(value);
break;
}
//
- // Universal character name \unnnn code point U+nnnn
+ // Universal character name
//
case 'u':
+ case 'U':
{
- IceUtil::Int64 value = 0;
string escape = "";
-
- for(int i = 0; i < 4; ++i)
+ char c = next;
+ int size = (c == 'u') ? 4 : 8;
+ while(size > 0)
{
next = static_cast<char>(yyinput());
- str->literal += next;
if(!isxdigit(static_cast<unsigned char>(next)))
{
- unit->error("unknown escape sequence in string constant: " + str->literal);
+ unit->error(string("unknown escape sequence in string literal: `\\") + c + escape + next + "'");
+ unput(next);
break;
}
escape += next;
+ --size;
}
- value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
-
- ostringstream os;
- os << '\\' << 'u';
- os.fill('0');
- os.width(4);
- os << hex << value;
-
- if(value == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
-
-
- //
- // Determine if a character is a surrogate:
- //
- // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
- // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
- //
- else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ if(size == 0)
{
- unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
- }
-
- str->v += os.str();
-
- break;
- }
-
- case 'U':
- {
- IceUtil::Int64 value = 0;
- string escape = "";
-
- for(int i = 0; i < 8; ++i)
- {
- next = static_cast<char>(yyinput());
- str->literal += next;
- if(!isxdigit(static_cast<unsigned char>(next)))
+ // All digits read, check value
+ IceUtil::Int64 codePoint = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+ if(codePoint >= 0xd800 && codePoint <= 0xdfff)
{
-
- unit->error("unknown escape sequence in string constant: " + str->literal);
- break;
+ unit->error(string("a universal character name cannot designate a surrogate: `\\") + c + escape + "'");
}
- escape += next;
}
- value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
-
- ostringstream os;
- os << '\\' << 'U';
- os.fill('0');
- os.width(8);
- os << hex << value;
-
- if(value == 0)
- {
- unit->error("illegal NUL character in string constant");
- }
-
- //
- // Determine if a character is a surrogate:
- //
- // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
- // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
- //
- else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
- {
- unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
- }
-
- str->v += os.str();
+ str->literal += escape;
+ str->v += string("\\") + c + escape;
break;
}
default:
{
ostringstream os;
- os << "unknown escape sequence '\\" << next << "'";
+ os << "unknown escape sequence `\\" << next << "'";
unit->warning(os.str());
// Escape the \ in this unknown escape sequence
diff --git a/cpp/src/Slice/StringLiteralUtil.cpp b/cpp/src/Slice/StringLiteralUtil.cpp
new file mode 100644
index 00000000000..9527d30e13f
--- /dev/null
+++ b/cpp/src/Slice/StringLiteralUtil.cpp
@@ -0,0 +1,374 @@
+// **********************************************************************
+//
+// Copyright (c) 2003-2016 ZeroC, Inc. All rights reserved.
+//
+// This copy of Ice is licensed to you under the terms described in the
+// ICE_LICENSE file included in this distribution.
+//
+// **********************************************************************
+
+#include <Slice/Util.h>
+
+#include <IceUtil/InputUtil.h>
+#include <IceUtil/StringConverter.h>
+
+#include <ostream>
+#include <iomanip>
+
+using namespace std;
+using namespace Slice;
+using namespace IceUtil;
+using namespace IceUtilInternal;
+
+namespace
+{
+
+class StringLiteralGenerator
+{
+public:
+
+ StringLiteralGenerator(const string&, const string&, EscapeMode, unsigned char);
+
+ string escapeASCIIChar(char) const;
+ string escapeCodePoint(unsigned int) const;
+ string flushU8Buffer(vector<unsigned char>&) const;
+
+private:
+
+ const string _nonPrintableEscaped;
+ const string _printableEscaped;
+ const EscapeMode _escapeMode;
+ const unsigned char _cutOff;
+};
+
+StringLiteralGenerator::StringLiteralGenerator(const string& nonPrintableEscaped,
+ const string& printableEscaped,
+ EscapeMode escapeMode,
+ unsigned char cutOff) :
+ _nonPrintableEscaped(nonPrintableEscaped),
+ _printableEscaped(printableEscaped + "\\\""),
+ _escapeMode(escapeMode),
+ _cutOff(cutOff)
+{
+}
+
+string
+StringLiteralGenerator::escapeASCIIChar(char c) const
+{
+ assert(static_cast<unsigned char>(c) < 128);
+
+ string result;
+
+ if(_nonPrintableEscaped.find(c) != string::npos)
+ {
+ switch(c)
+ {
+ case '\a':
+ {
+ result = "\\a";
+ break;
+ }
+ case '\b':
+ {
+ result = "\\b";
+ break;
+ }
+ case '\f':
+ {
+ result = "\\f";
+ break;
+ }
+ case '\n':
+ {
+ result = "\\n";
+ break;
+ }
+ case '\r':
+ {
+ result = "\\r";
+ break;
+ }
+ case '\t':
+ {
+ result = "\\t";
+ break;
+ }
+ case '\v':
+ {
+ result = "\\v";
+ break;
+ }
+ case '\0':
+ {
+ result = "\\0";
+ break;
+ }
+ case '\x20':
+ {
+ result = "\\s";
+ break;
+ }
+ case '\x1b':
+ {
+ result = "\\e";
+ break;
+ }
+ default:
+ {
+ // The caller cannot add additional non-printable ASCII characters!
+ assert(0);
+ }
+ }
+ }
+ else if(_printableEscaped.find(c) != string::npos)
+ {
+ result = '\\';
+ result += c;
+ }
+ else if(c >= 32 && c <= 126)
+ {
+ // Other printable ASCII
+ result = c;
+ }
+ else
+ {
+ // Other non-printable ASCII character
+ ostringstream os;
+ if((static_cast<unsigned char>(c) < _cutOff) || (_escapeMode == Octal))
+ {
+ os << "\\" << oct << setfill('0') << setw(3) << static_cast<unsigned int>(c & 0xFF);
+ }
+ else
+ {
+ os << "\\u" << hex << setfill('0') << setw(4) << static_cast<unsigned int>(c & 0xFF);
+ }
+ result = os.str();
+ }
+ return result;
+}
+
+string
+StringLiteralGenerator::escapeCodePoint(unsigned int codePoint) const
+{
+ if(codePoint < 128)
+ {
+ return escapeASCIIChar(static_cast<char>(codePoint));
+ }
+ else if(_escapeMode == Octal)
+ {
+ vector<unsigned int> u32buffer;
+ u32buffer.push_back(codePoint);
+ vector<unsigned char> u8buffer = fromUTF32(u32buffer);
+
+ ostringstream os;
+ for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
+ {
+ os << "\\" << setfill('0') << setw(3) << oct << static_cast<unsigned int>(*q);
+ }
+ return os.str();
+ }
+ else
+ {
+ ostringstream os;
+ if(codePoint < _cutOff)
+ {
+ //
+ // Output octal escape
+ //
+ os << "\\" << setfill('0') << setw(3) << oct << codePoint;
+ }
+ else if(codePoint <= 0xFFFF)
+ {
+ os << "\\u" << setfill('0') << setw(4) << hex << codePoint;
+ }
+ else if(_escapeMode == ShortUCN)
+ {
+ //
+ // Convert to surrogate pair
+ //
+ unsigned int highSurrogate = ((codePoint - 0x10000) / 0x400) + 0xD800;
+ unsigned int lowSurrogate = ((codePoint - 0x10000) % 0x400) + 0xDC00;
+ os << "\\u" << setfill('0') << setw(4) << hex << highSurrogate;
+ os << "\\u" << setfill('0') << setw(4) << hex << lowSurrogate;
+ }
+ else if(_escapeMode == EC6UCN)
+ {
+ os << "\\u{" << hex << codePoint << "}";
+ }
+ else
+ {
+ os << "\\U" << setfill('0') << setw(8) << hex << codePoint;
+ }
+ return os.str();
+ }
+}
+
+string
+StringLiteralGenerator::flushU8Buffer(vector<unsigned char>& u8buffer) const
+{
+ if(u8buffer.empty())
+ {
+ return "";
+ }
+ else
+ {
+ ostringstream os;
+ vector<unsigned int> u32buffer = toUTF32(u8buffer);
+
+ for(vector<unsigned int>::const_iterator p = u32buffer.begin(); p != u32buffer.end(); ++p)
+ {
+ os << escapeCodePoint(*p);
+ }
+ u8buffer.clear();
+ return os.str();
+ }
+}
+
+}
+
+string
+Slice::toStringLiteral(const string& value,
+ const string& nonPrintableEscaped,
+ const string& printableEscaped,
+ EscapeMode escapeMode,
+ unsigned char cutOff)
+{
+ StringLiteralGenerator generator(nonPrintableEscaped, printableEscaped, escapeMode, cutOff);
+
+ ostringstream os;
+
+ if(escapeMode != Octal)
+ {
+ vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
+
+ for(size_t i = 0; i < value.size(); ++i)
+ {
+ char c = value[i];
+
+ if(static_cast<unsigned char>(c) >= 128)
+ {
+ // New UTF-8 byte
+ u8buffer.push_back(static_cast<unsigned char>(c));
+ }
+ else
+ {
+ //
+ // First write any outstanding UTF-8 -encoded characters
+ //
+ os << generator.flushU8Buffer(u8buffer);
+
+ if(c == '\\')
+ {
+ if(i + 1 == value.size())
+ {
+ // trailing backslash, add a second one
+ os << "\\\\";
+ }
+ else
+ {
+ c = value[++i];
+
+ if(c == '\\')
+ {
+ os << "\\\\";
+ }
+ else if(c == 'u' || c == 'U')
+ {
+ size_t sz = c == 'U' ? 8 : 4;
+ string codePointStr = value.substr(i + 1, sz);
+ assert(codePointStr.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codePointStr.c_str(), 0, 16);
+ if(v < 128)
+ {
+ // ASCII character that may need to escaped in languages such as Java
+ os << generator.escapeASCIIChar(static_cast<char>(v));
+ }
+ else if(escapeMode == UCN || c == 'u')
+ {
+ // keep this escape as is
+ os << "\\" << c << codePointStr;
+ }
+ else
+ {
+ os << generator.escapeCodePoint(static_cast<unsigned int>(v));
+ }
+ i += sz;
+ }
+ else
+ {
+ // unescaped backslash: escape it!
+ os << "\\\\";
+ os << generator.escapeASCIIChar(c);
+ }
+ }
+ }
+ else
+ {
+ os << generator.escapeASCIIChar(c);
+ }
+ }
+ }
+
+ //
+ // Write any outstanding UTF-8 -encoded characters
+ //
+ os << generator.flushU8Buffer(u8buffer);
+ }
+ else
+ {
+ assert(escapeMode == Octal);
+
+ for(size_t i = 0; i < value.size(); ++i)
+ {
+ char c = value[i];
+
+ if(static_cast<unsigned char>(c) >= 128)
+ {
+ // Write octal escape
+ os << "\\" << setfill('0') << setw(3) << oct << static_cast<unsigned int>(c & 0xFF);
+ }
+ else if(c == '\\')
+ {
+ if(i + 1 == value.size())
+ {
+ // trailing backslash, add a second one
+ os << "\\\\";
+ }
+ else
+ {
+ c = value[++i];
+
+ if(c == '\\')
+ {
+ os << "\\\\";
+ }
+ else if(c == 'u' || c == 'U')
+ {
+ //
+ // Convert code point to UTF-8 bytes and write the escaped bytes
+ //
+ size_t sz = c == 'U' ? 8 : 4;
+ string codePointStr = value.substr(i + 1, sz);
+ assert(codePointStr.size() == sz);
+
+ IceUtil::Int64 v = IceUtilInternal::strToInt64(codePointStr.c_str(), 0, 16);
+ os << generator.escapeCodePoint(static_cast<unsigned int>(v));
+ i += sz;
+ }
+ else
+ {
+ // unescaped backslash
+ os << "\\\\";
+ os << generator.escapeASCIIChar(c);
+ }
+ }
+ }
+ else
+ {
+ os << generator.escapeASCIIChar(c);
+ }
+ }
+ }
+ return os.str();
+}
diff --git a/cpp/src/Slice/Util.h b/cpp/src/Slice/Util.h
index fdf84a1290e..5b23acf389c 100644
--- a/cpp/src/Slice/Util.h
+++ b/cpp/src/Slice/Util.h
@@ -37,6 +37,22 @@ std::vector<std::string> argvToArgs(int argc, char* argv[]);
#endif
+enum EscapeMode { UCN, Octal, ShortUCN, EC6UCN };
+
+// Parameters:
+// const string& value: input string provided by Slice Parser
+// const string& nonPrintableEscaped: which of \a, \b, \f, \n, \r, \t, \v, \0 (null), \x20 (\s), \x1b (\e) are
+// escaped in the target language
+// Warning: don't include \0 if the target language recognizes octal escapes
+// const string& printableEscaped: additional printable ASCII characters other than \ and " that need to be escaped
+// EscapeMode escapeMode: whether we generate both UCNs, octal escape sequences, only short UCNs (\unnnn),
+// or ECMAScript 6-style UCNs with \u{...} for astral characters
+// unsigned char cutOff: characters < cutOff other than the nonPrintableEscaped are generated as
+// octal escape sequences, regardless of escapeMode.
+std::string
+toStringLiteral(const std::string&, const std::string&, const std::string&, EscapeMode, unsigned char);
+
+
class DependOutputUtil : IceUtil::noncopyable
{
public:
diff --git a/cpp/src/Slice/msbuild/slice.vcxproj b/cpp/src/Slice/msbuild/slice.vcxproj
index 56da4105e54..b5d159f532f 100644
--- a/cpp/src/Slice/msbuild/slice.vcxproj
+++ b/cpp/src/Slice/msbuild/slice.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -84,39 +84,11 @@
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
- <ClCompile Include="..\Checksum.cpp" />
- <ClCompile Include="..\CPlusPlusUtil.cpp" />
- <ClCompile Include="..\FileTracker.cpp" />
- <ClCompile Include="..\Grammar.cpp" />
- <ClCompile Include="..\JavaUtil.cpp" />
- <ClCompile Include="..\MD5.cpp" />
- <ClCompile Include="..\MD5I.cpp" />
- <ClCompile Include="..\Parser.cpp" />
- <ClCompile Include="..\PHPUtil.cpp" />
- <ClCompile Include="..\Preprocessor.cpp" />
- <ClCompile Include="..\Python.cpp" />
- <ClCompile Include="..\PythonUtil.cpp" />
- <ClCompile Include="..\Ruby.cpp" />
- <ClCompile Include="..\RubyUtil.cpp" />
- <ClCompile Include="..\Scanner.cpp" />
- <ClCompile Include="..\SliceUtil.cpp" />
+ <ClCompile Include="..\*.cpp" />
</ItemGroup>
<ItemGroup>
- <ClInclude Include="..\Checksum.h" />
- <ClInclude Include="..\CPlusPlusUtil.h" />
- <ClInclude Include="..\FileTracker.h" />
- <ClInclude Include="..\JavaUtil.h" />
- <ClInclude Include="..\Parser.h" />
- <ClInclude Include="..\PHPUtil.h" />
- <ClInclude Include="..\Preprocessor.h" />
- <ClInclude Include="..\PythonUtil.h" />
- <ClInclude Include="..\RubyUtil.h" />
- <ClInclude Include="..\Util.h" />
- <ClInclude Include="..\Grammar.h" />
- <ClInclude Include="..\GrammarUtil.h" />
- <ClInclude Include="..\MD5.h" />
- <ClInclude Include="..\MD5I.h" />
+ <ClInclude Include="..\*.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
-</Project> \ No newline at end of file
+</Project>
diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp
index 582f6edf732..978ab8114f2 100644
--- a/cpp/src/slice2cpp/Gen.cpp
+++ b/cpp/src/slice2cpp/Gen.cpp
@@ -13,8 +13,6 @@
#include <Slice/CPlusPlusUtil.h>
#include <IceUtil/Functional.h>
#include <IceUtil/Iterator.h>
-#include <IceUtil/InputUtil.h>
-#include <IceUtil/StringConverter.h>
#include <IceUtil/StringUtil.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -81,64 +79,6 @@ isConstexprType(const TypePtr& type)
}
}
-string
-u32CodePoint(unsigned int value, bool cpp11)
-{
- ostringstream s;
- //
- // COMPILERFIX:
- // With VC++ < 140 characters in the range of 0 to 0x9f cannot be represented
- // with a universal character name (UCN).
- //
- if(!cpp11 && value <= 0x9f)
- {
- switch(value)
- {
- case 0x22:
- {
- s << "\\\"";
- break;
- }
- case 0x5c:
- {
- s << "\\\\";
- break;
- }
- default:
- {
- s << "\\";
- s << oct;
- s.width(3);
- s.fill('0');
- s << value;
- break;
- }
- }
- }
- //
- // UCN valid characters
- //
- else
- {
- s << "\\U";
- s << hex;
- s.width(8);
- s.fill('0');
- s << value;
- }
- return s.str();
-}
-
-void
-writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out, bool cpp11)
-{
- vector<unsigned int> u32buffer = toUTF32(u8buffer);
-
- for(vector<unsigned int>::const_iterator c = u32buffer.begin(); c != u32buffer.end(); ++c)
- {
- out << u32CodePoint(*c, cpp11);
- }
-}
string
getDeprecateSymbol(const ContainedPtr& p1, const ContainedPtr& p2)
@@ -171,207 +111,16 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt
BuiltinPtr bp = BuiltinPtr::dynamicCast(type);
if(bp && bp->kind() == Builtin::KindString)
{
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
bool wide = (useWstring & TypeContextUseWstring) || findMetaData(metaData) == "wstring";
if(wide || cpp11)
{
- //
- // Wide strings or C++11 narrow string
- //
- vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
out << (wide ? "L\"" : "u8\"");
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
- {
- out << u32CodePoint(static_cast<unsigned char>(value[i]), cpp11);
- }
- else
- {
- u8buffer.push_back(value[i]);
- }
- }
- else
- {
- //
- // Write any padding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, out, cpp11);
- u8buffer.clear();
- }
-
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its characters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- //
- // Convert codepoint to UTF8 bytes and write the escaped bytes
- //
- out << s.substr(0, s.size() - 1);
-
- size_t sz = value[j] == 'U' ? 8 : 4;
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
- out << u32CodePoint(static_cast<unsigned int>(v), cpp11);
-
- i = j + 1 + sz;
- }
- else
- {
- out << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- out << "\\";
- break;
- }
- }
-
- out << value[i]; // Print normally if in basic source character set
- }
- i++;
-
- }
-
- //
- // Write any padding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, out, cpp11);
- u8buffer.clear();
- }
+ out << toStringLiteral(value, "\a\b\f\n\r\t\v", "?", UCN, cpp11 ? 0 : 0x9F + 1);
out << "\"";
}
else // C++98 narrow strings
{
- out << "\""; // Opening "
-
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- unsigned char uc = value[i]; // char may be signed, so make it positive
- ostringstream s;
- s << "\\"; // Print as octal if not in basic source character set
- s.width(3);
- s.fill('0');
- s << oct;
- s << static_cast<unsigned>(uc);
- out << s.str();
- }
- else
- {
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- //
- // Convert codepoint to UTF8 bytes and write the escaped bytes
- //
- out << s.substr(0, s.size() - 1);
-
- size_t sz = value[j] == 'U' ? 8 : 4;
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- vector<unsigned int> u32buffer;
- u32buffer.push_back(static_cast<unsigned int>(v));
-
- vector<unsigned char> u8buffer = fromUTF32(u32buffer);
-
- ostringstream s;
- for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
- {
- s << "\\";
- s.fill('0');
- s.width(3);
- s << oct;
- s << static_cast<unsigned int>(*q);
- }
- out << s.str();
-
- i = j + 1 + sz;
- }
- else
- {
- out << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- out << "\\";
- break;
- }
- }
-
- out << value[i]; // Print normally if in basic source character set
- }
- ++i;
- }
- out << "\""; // Closing "
+ out << "\"" << toStringLiteral(value, "\a\b\f\n\r\t\v", "?", Octal, 0) << "\"";
}
}
else if(bp && bp->kind() == Builtin::KindLong)
diff --git a/cpp/src/slice2cs/Gen.cpp b/cpp/src/slice2cs/Gen.cpp
index 313c04ea229..1fe547582ab 100644
--- a/cpp/src/slice2cs/Gen.cpp
+++ b/cpp/src/slice2cs/Gen.cpp
@@ -10,7 +10,6 @@
#include <IceUtil/DisableWarnings.h>
#include <IceUtil/Functional.h>
#include <IceUtil/StringUtil.h>
-#include <IceUtil/InputUtil.h>
#include <IceUtil/FileUtil.h>
#include <Gen.h>
@@ -23,7 +22,6 @@
#include <IceUtil/Iterator.h>
#include <IceUtil/UUID.h>
-#include <IceUtil/StringConverter.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
#include <Slice/Util.h>
@@ -39,30 +37,6 @@ namespace
{
string
-u16CodePoint(unsigned short value)
-{
- ostringstream s;
- s << "\\u";
- s << hex;
- s.width(4);
- s.fill('0');
- s << value;
- return s.str();
-}
-
-
-void
-writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
-{
- vector<unsigned short> u16buffer = toUTF16(u8buffer);
-
- for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
- {
- out << u16CodePoint(*c);
- }
-}
-
-string
sliceModeToIceMode(Operation::Mode opMode)
{
string mode;
@@ -1298,126 +1272,7 @@ Slice::CsVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePt
EnumPtr ep;
if(bp && bp->kind() == Builtin::KindString)
{
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
-
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
-
- _out << "\""; // Opening "
-
- vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
-
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
- {
- //
- // Print as unicode if not in basic source character set
- //
- _out << u16CodePoint(static_cast<unsigned int>(value[i]));
- }
- else
- {
- u8buffer.push_back(value[i]);
- }
- }
- else
- {
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, _out);
- u8buffer.clear();
- }
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && value[j] == 'U')
- {
- _out << s.substr(0, s.size() - 1);
- i = j + 1;
-
- string codepoint = value.substr(j + 1, 8);
- assert(codepoint.size() == 8);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- //
- // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character
- // literal and is represented using a Unicode surrogate pair.
- //
- if(v > 0xFFFF)
- {
- unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
- unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
- _out << u16CodePoint(high);
- _out << u16CodePoint(low);
- }
- else
- {
- _out << "\\U" << codepoint;
- }
-
- i = j + 1 + 8;
- }
- else
- {
- _out << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- _out << "\\";
- break;
- }
- }
- _out << value[i]; // Print normally if in basic source character set
- }
- i++;
- }
-
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, _out);
- u8buffer.clear();
- }
-
- _out << "\""; // Closing "
+ _out << "\"" << toStringLiteral(value, "\a\b\f\n\r\t\v\0", "", UCN, 0) << "\"";
}
else if(bp && bp->kind() == Builtin::KindLong)
{
diff --git a/cpp/src/slice2java/Gen.cpp b/cpp/src/slice2java/Gen.cpp
index c767febeb3a..8c3769c0fb6 100644
--- a/cpp/src/slice2java/Gen.cpp
+++ b/cpp/src/slice2java/Gen.cpp
@@ -14,7 +14,6 @@
#include <IceUtil/Iterator.h>
#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
-#include <IceUtil/StringConverter.h>
#include <cstring>
#include <limits>
@@ -28,29 +27,6 @@ namespace
{
string
-u16CodePoint(unsigned short value)
-{
- ostringstream s;
- s << "\\u";
- s << hex;
- s.width(4);
- s.fill('0');
- s << value;
- return s.str();
-}
-
-void
-writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
-{
- vector<unsigned short> u16buffer = toUTF16(u8buffer);
-
- for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
- {
- out << u16CodePoint(*c);
- }
-}
-
-string
sliceModeToIceMode(Operation::Mode opMode)
{
string mode = "com.zeroc.Ice.OperationMode.";
@@ -1505,168 +1481,7 @@ Slice::JavaVisitor::writeConstantValue(Output& out, const TypePtr& type, const S
{
case Builtin::KindString:
{
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
- out << "\"";
-
- vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
-
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- char c = value[i];
- if(static_cast<unsigned char>(c) < 128) // Single byte character
- {
- //
- // Print as unicode if not in basic source character set
- //
- switch(c)
- {
- //
- // Java doesn't want '\n' or '\r\n' encoded as universal
- // characters, that gives an error "unclosed string literal"
- //
- case '\r':
- {
- out << "\\r";
- break;
- }
- case '\n':
- {
- out << "\\n";
- break;
- }
- default:
- {
- out << u16CodePoint(c);
- break;
- }
- }
- }
- else
- {
- u8buffer.push_back(value[i]);
- }
- }
- else
- {
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, out);
- u8buffer.clear();
- }
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- size_t sz = value[j] == 'U' ? 8 : 4;
- out << s.substr(0, s.size() - 1);
- i = j + 1;
-
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- //
- // Java doesn't like this special characters encoded as universal characters
- //
- if(v == 0x5c)
- {
- out << "\\\\";
- }
- else if(v == 0xa)
- {
- out << "\\n";
- }
- else if(v == 0xd)
- {
- out << "\\r";
- }
- else if(v == 0x22)
- {
- out << "\\\"";
- }
- //
- // Unicode character in the range U+10000 to U+10FFFF is not permitted in a
- // character literal and is represented using a Unicode surrogate pair.
- //
- else if(v > 0xFFFF)
- {
- unsigned int high =
- ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
- unsigned int low =
- ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
- out << u16CodePoint(high);
- out << u16CodePoint(low);
- }
- else
- {
- out << u16CodePoint(static_cast<unsigned int>(v));
- }
-
- i = j + 1 + sz;
- }
- else
- {
- out << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- out << "\\";
- break;
- }
- }
- out << value[i]; // Print normally if in basic source character set
- }
- i++;
- }
-
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, out);
- u8buffer.clear();
- }
-
- out << "\"";
+ out << "\"" << toStringLiteral(value, "\b\f\n\r\t", "", ShortUCN, 0) << "\"";
break;
}
case Builtin::KindByte:
diff --git a/cpp/src/slice2java/GenCompat.cpp b/cpp/src/slice2java/GenCompat.cpp
index 2fef3bf1d63..205d4093750 100644
--- a/cpp/src/slice2java/GenCompat.cpp
+++ b/cpp/src/slice2java/GenCompat.cpp
@@ -12,9 +12,8 @@
#include <Slice/Util.h>
#include <IceUtil/Functional.h>
#include <IceUtil/Iterator.h>
-#include <IceUtil/StringUtil.h>
#include <IceUtil/InputUtil.h>
-#include <IceUtil/StringConverter.h>
+#include <IceUtil/StringUtil.h>
#include <cstring>
#include <limits>
@@ -28,29 +27,6 @@ namespace
{
string
-u16CodePoint(unsigned short value)
-{
- ostringstream s;
- s << "\\u";
- s << hex;
- s.width(4);
- s.fill('0');
- s << value;
- return s.str();
-}
-
-void
-writeU8Buffer(const vector<unsigned char>& u8buffer, ::IceUtilInternal::Output& out)
-{
- vector<unsigned short> u16buffer = toUTF16(u8buffer);
-
- for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
- {
- out << u16CodePoint(*c);
- }
-}
-
-string
sliceModeToIceMode(Operation::Mode opMode)
{
string mode;
@@ -1716,166 +1692,7 @@ Slice::JavaCompatVisitor::writeConstantValue(Output& out, const TypePtr& type, c
{
case Builtin::KindString:
{
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
- out << "\"";
-
- vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
-
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- char c = value[i];
- if(static_cast<unsigned char>(c) < 128) // Single byte character
- {
- //
- // Print as unicode if not in basic source character set
- //
- switch(c)
- {
- //
- // Java doesn't want '\n' or '\r\n' encoded as universal
- // characters, that gives an error "unclosed string literal"
- //
- case '\r':
- {
- out << "\\r";
- break;
- }
- case '\n':
- {
- out << "\\n";
- break;
- }
- default:
- {
- out << u16CodePoint(c);
- break;
- }
- }
- }
- else
- {
- u8buffer.push_back(value[i]);
- }
- }
- else
- {
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, out);
- u8buffer.clear();
- }
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- size_t sz = value[j] == 'U' ? 8 : 4;
- out << s.substr(0, s.size() - 1);
- i = j + 1;
-
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- //
- // Java doesn't like this special characters encoded as universal characters
- //
- if(v == 0x5c)
- {
- out << "\\\\";
- }
- else if(v == 0xa)
- {
- out << "\\n";
- }
- else if(v == 0xd)
- {
- out << "\\r";
- }
- else if(v == 0x22)
- {
- out << "\\\"";
- }
- //
- // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
- // and is represented using a Unicode surrogate pair.
- //
- else if(v > 0xFFFF)
- {
- unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
- unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
- out << u16CodePoint(high);
- out << u16CodePoint(low);
- }
- else
- {
- out << u16CodePoint(static_cast<unsigned int>(v));
- }
-
- i = j + 1 + sz;
- }
- else
- {
- out << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- out << "\\";
- break;
- }
- }
- out << value[i]; // Print normally if in basic source character set
- }
- i++;
- }
-
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, out);
- u8buffer.clear();
- }
-
- out << "\"";
+ out << "\"" << toStringLiteral(value, "\b\f\n\r\t", "", ShortUCN, 0) << "\"";
break;
}
case Builtin::KindByte:
diff --git a/cpp/src/slice2js/Gen.cpp b/cpp/src/slice2js/Gen.cpp
index e685b622006..734af09cd6f 100644
--- a/cpp/src/slice2js/Gen.cpp
+++ b/cpp/src/slice2js/Gen.cpp
@@ -20,7 +20,6 @@
#include <direct.h>
#endif
#include <IceUtil/Iterator.h>
-#include <IceUtil/StringConverter.h>
#include <IceUtil/UUID.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -36,29 +35,6 @@ namespace
{
string
-u16CodePoint(unsigned short value)
-{
- ostringstream s;
- s << "\\u";
- s << hex;
- s.width(4);
- s.fill('0');
- s << value;
- return s.str();
-}
-
-void
-writeU8Buffer(const vector<unsigned char>& u8buffer, ostringstream& out)
-{
- vector<unsigned short> u16buffer = toUTF16(u8buffer);
-
- for(vector<unsigned short>::const_iterator c = u16buffer.begin(); c != u16buffer.end(); ++c)
- {
- out << u16CodePoint(*c);
- }
-}
-
-string
sliceModeToIceMode(Operation::Mode opMode)
{
switch(opMode)
@@ -459,124 +435,9 @@ Slice::JsVisitor::writeConstantValue(const string& scope, const TypePtr& type, c
if(bp && bp->kind() == Builtin::KindString)
{
//
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
+ // For now, we generate strings in ECMAScript 5 format, with two \unnnn for astral characters
//
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
-
- os << "\""; // Opening "
-
- vector<unsigned char> u8buffer; // Buffer to convert multibyte characters
-
- for(size_t i = 0; i < value.size();)
- {
- if(charSet.find(value[i]) == charSet.end())
- {
- if(static_cast<unsigned char>(value[i]) < 128) // Single byte character
- {
- //
- // Print as unicode if not in basic source character set
- //
- os << u16CodePoint(static_cast<unsigned int>(value[i]));
- }
- else
- {
- u8buffer.push_back(value[i]);
- }
- }
- else
- {
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, os);
- u8buffer.clear();
- }
- switch(value[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && value[j] == 'U')
- {
- os << s.substr(0, s.size() - 1);
- i = j + 1;
-
- string codepoint = value.substr(j + 1, 8);
- assert(codepoint.size() == 8);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- //
- // Unicode character in the range U+10000 to U+10FFFF is not permitted in a character literal
- // and is represented using a Unicode surrogate pair.
- //
- if(v > 0xFFFF)
- {
- unsigned int high = ((static_cast<unsigned int>(v) - 0x10000) / 0x400) + 0xD800;
- unsigned int low = ((static_cast<unsigned int>(v) - 0x10000) % 0x400) + 0xDC00;
- os << u16CodePoint(high);
- os << u16CodePoint(low);
- }
- else
- {
- os << u16CodePoint(static_cast<unsigned int>(v));
- }
-
- i = j + 1 + 8;
- }
- else
- {
- os << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- os << "\\";
- break;
- }
- }
- os << value[i]; // Print normally if in basic source character set
- }
- i++;
- }
-
- //
- // Write any pedding characters in the utf8 buffer
- //
- if(!u8buffer.empty())
- {
- writeU8Buffer(u8buffer, os);
- u8buffer.clear();
- }
-
- os << "\""; // Closing "
+ os << "\"" << toStringLiteral(value, "\b\f\n\r\t\v", "", ShortUCN, 0) << "\"";
}
else if(bp && bp->kind() == Builtin::KindLong)
{
@@ -749,7 +610,7 @@ void
Slice::Gen::generate(const UnitPtr& p)
{
//
- // Check for global "js:ice-build" and "js:es6-module"
+ // Check for global "js:ice-build" and "js:es6-module"
// metadata. If this is set then we are building Ice.
//
DefinitionContextPtr dc = p->findDefinitionContext(p->topLevelFile());
@@ -943,7 +804,7 @@ relativePath(string p1, string p2)
string f1 = tokens1.back();
string f2 = tokens2.back();
-
+
tokens1.pop_back();
tokens2.pop_back();
@@ -963,14 +824,14 @@ relativePath(string p1, string p2)
{
return p1;
}
-
+
string newPath;
if(i2 == tokens2.end())
{
newPath += "./";
for(; i1 != tokens1.end(); ++i1)
{
- newPath += *i1 + "/";
+ newPath += *i1 + "/";
}
}
else
@@ -981,7 +842,7 @@ relativePath(string p1, string p2)
}
}
newPath += f1;
-
+
return newPath;
}
@@ -1050,8 +911,8 @@ Slice::Gen::RequireVisitor::writeRequires(const UnitPtr& p)
_out << nl << "import { Ice } from \"ice\";";
_out << nl << "const __M = Ice.__M;";
seenModules.push_back("Ice");
-
-
+
+
for(StringList::const_iterator i = includes.begin(); i != includes.end(); ++i)
{
set<string> modules = p->getTopLevelModules(*i);
@@ -1091,7 +952,7 @@ Slice::Gen::RequireVisitor::writeRequires(const UnitPtr& p)
}
_out << " } from ";
}
-
+
string result = relativePath(*i, p->topLevelFile());
string::size_type pos;
if((pos = result.rfind('.')) != string::npos)
diff --git a/cpp/src/slice2objc/Gen.cpp b/cpp/src/slice2objc/Gen.cpp
index c5f6fa99959..9d51de398e4 100644
--- a/cpp/src/slice2objc/Gen.cpp
+++ b/cpp/src/slice2objc/Gen.cpp
@@ -17,8 +17,6 @@
#include <direct.h>
#endif
#include <IceUtil/Iterator.h>
-#include <IceUtil/StringConverter.h>
-#include <IceUtil/InputUtil.h>
#include <IceUtil/UUID.h>
#include <Slice/Checksum.h>
#include <Slice/FileTracker.h>
@@ -1493,106 +1491,7 @@ Slice::Gen::TypesVisitor::writeConstantValue(IceUtilInternal::Output& out, const
{
if(isString(type))
{
- //
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=,\\\"' ";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
-
- out << "@\""; // Opening @"
-
- for(size_t i = 0; i < val.size();)
- {
- if(charSet.find(val[i]) == charSet.end())
- {
- unsigned char uc = val[i]; // char may be signed, so make it positive
- ostringstream s;
- s << "\\"; // Print as octal if not in basic source character set
- s.width(3);
- s.fill('0');
- s << oct;
- s << static_cast<unsigned>(uc);
- out << s.str();
- }
- else
- {
- switch(val[i])
- {
- case '\\':
- {
- string s = "\\";
- size_t j = i + 1;
- for(; j < val.size(); ++j)
- {
- if(val[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (val[j] == 'U' || val[j] == 'u'))
- {
- //
- // Convert codepoint to UTF8 bytes and write the escaped bytes
- //
- out << s.substr(0, s.size() - 1);
-
- size_t sz = val[j] == 'U' ? 8 : 4;
- string codepoint = val.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- vector<unsigned int> u32buffer;
- u32buffer.push_back(static_cast<unsigned int>(v));
-
- vector<unsigned char> u8buffer = fromUTF32(u32buffer);
-
- ostringstream s;
- for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
- {
- s << "\\";
- s.fill('0');
- s.width(3);
- s << oct;
- s << static_cast<unsigned int>(*q);
- }
- out << s.str();
-
- i = j + 1 + sz;
- }
- else
- {
- out << s;
- i = j;
- }
- continue;
- }
- case '"':
- {
- out << "\\";
- break;
- }
- }
-
- out << val[i]; // Print normally if in basic source character set
- }
- ++i;
- }
- out << "\""; // Closing "
+ out << "@\"" << toStringLiteral(val, "\a\b\f\n\r\t\v", "?", Octal, 0) << "\"";
}
else
{
diff --git a/cpp/src/slice2php/Main.cpp b/cpp/src/slice2php/Main.cpp
index b0f4b4ec7cb..d7b83782349 100644
--- a/cpp/src/slice2php/Main.cpp
+++ b/cpp/src/slice2php/Main.cpp
@@ -16,7 +16,6 @@
#include <IceUtil/StringUtil.h>
#include <IceUtil/Mutex.h>
#include <IceUtil/MutexPtrLock.h>
-#include <IceUtil/StringConverter.h>
#include <Slice/Checksum.h>
#include <Slice/Preprocessor.h>
#include <Slice/FileTracker.h>
@@ -1262,137 +1261,10 @@ CodeVisitor::writeConstantValue(const TypePtr& type, const SyntaxTreeBasePtr& va
}
case Slice::Builtin::KindString:
{
+ // PHP 7.x also supports an EC6UCN-like notation, see:
+ // https://wiki.php.net/rfc/unicode_escape
//
- // Expand strings into the basic source character set. We can't use isalpha() and the like
- // here because they are sensitive to the current locale.
- //
- static const string basicSourceChars = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "0123456789"
- "_{}[]#()<>%:;.?*+-/^&|~!=, '";
- static const set<char> charSet(basicSourceChars.begin(), basicSourceChars.end());
-
- _out << "\""; // Opening "
-
- for(size_t i = 0; i < value.size();)
- {
- char c = value[i];
- switch(c)
- {
- case '$':
- {
- _out << "\\$";
- break;
- }
- case '"':
- {
- _out << "\\\"";
- break;
- }
- case '\\':
- {
-
- string s = "\\";
- size_t j = i + 1;
- for(; j < value.size(); ++j)
- {
- if(value[j] != '\\')
- {
- break;
- }
- s += "\\";
- }
-
- //
- // An even number of slash \ will escape the backslash and
- // the codepoint will be interpreted as its charaters
- //
- // \\u00000041 - ['\\', 'u', '0', '0', '0', '0', '0', '0', '4', '1']
- // \\\u00000041 - ['\\', 'A'] (41 is the codepoint for 'A')
- //
- if(s.size() % 2 != 0 && (value[j] == 'U' || value[j] == 'u'))
- {
- //
- // Convert codepoint to UTF8 bytes and write the escaped bytes
- //
- _out << s.substr(0, s.size() - 1);
-
- size_t sz = value[j] == 'U' ? 8 : 4;
- string codepoint = value.substr(j + 1, sz);
- assert(codepoint.size() == sz);
-
- IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16);
-
-
- vector<unsigned int> u32buffer;
- u32buffer.push_back(static_cast<unsigned int>(v));
-
- vector<unsigned char> u8buffer = fromUTF32(u32buffer);
-
- ostringstream s;
- for(vector<unsigned char>::const_iterator q = u8buffer.begin(); q != u8buffer.end(); ++q)
- {
- s << "\\";
- s.fill('0');
- s.width(3);
- s << oct;
- s << static_cast<unsigned int>(*q);
- }
- _out << s.str();
-
- i = j + 1 + sz;
- }
- else
- {
- _out << s;
- i = j;
- }
- continue;
- }
- case '\r':
- {
- _out << "\\r";
- break;
- }
- case '\n':
- {
- _out << "\\n";
- break;
- }
- case '\t':
- {
- _out << "\\t";
- break;
- }
- case '\f':
- {
- _out << "\\f";
- break;
- }
- default:
- {
- if(charSet.find(c) == charSet.end())
- {
- unsigned char uc = c; // Char may be signed, so make it positive.
- stringstream s;
- s << "\\"; // Print as octal if not in basic source character set.
- s.flags(ios_base::oct);
- s.width(3);
- s.fill('0');
- s << static_cast<unsigned>(uc);
- _out << s.str();
- }
- else
- {
- _out << c; // Print normally if in basic source character set.
- }
- break;
- }
- }
- ++i;
- }
-
- _out << "\""; // Closing "
+ _out << "\"" << toStringLiteral(value, "\f\n\r\t\v\x1b", "$", Octal, 0) << "\"";
break;
}
case Slice::Builtin::KindObject: