summaryrefslogtreecommitdiff
path: root/cpp/src/Slice/Scanner.l
diff options
context:
space:
mode:
Diffstat (limited to 'cpp/src/Slice/Scanner.l')
-rw-r--r--cpp/src/Slice/Scanner.l181
1 files changed, 153 insertions, 28 deletions
diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l
index 190c00bcf5c..a9c381b7260 100644
--- a/cpp/src/Slice/Scanner.l
+++ b/cpp/src/Slice/Scanner.l
@@ -13,6 +13,8 @@
#include <Slice/Grammar.h>
#include <IceUtil/InputUtil.h>
+#include <iomanip>
+
#include <stdlib.h>
#include <math.h>
@@ -261,32 +263,42 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
switch(next)
{
case '\\':
+ {
+ str->v += '\\';
+ str->v += '\\';
+ break;
+ }
case '"':
case '\'':
{
str->v += next;
break;
}
+
case 'n':
{
str->v += '\n';
break;
}
+
case 'r':
{
str->v += '\r';
break;
}
+
case 't':
{
str->v += '\t';
break;
}
+
case 'v':
{
str->v += '\v';
break;
}
+
case 'f':
{
str->v += '\f';
@@ -311,68 +323,181 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]]
break;
}
+ //
+ // Octal value \nnn limited to three octal digits but terminate at the first character
+ // that is not a valid octal digit if encountered sooner.
+ //
case '0':
case '1':
case '2':
case '3':
+ case '4':
+ case '5':
+ case '7':
{
static string octalDigits = "01234567";
- unsigned short us = next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
+ IceUtil::Int64 value = 0;
+ string escape;
+ escape += next;
+ for(int i = 0; i < 2; ++i)
{
- str->literal += next;
- us = us * 8 + next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
- us = us * 8 + next - '0';
- }
- else
+ next = static_cast<char>(yyinput());
+ if(octalDigits.find_first_of(next) == string::npos)
{
unput(next);
+ break;
}
+ escape += next;
}
- else
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+
+ if(value == 0)
{
- unput(next);
+ unit->error("illegal NUL character in string constant");
}
- if(us == 0)
+ else if(value > 255)
{
- unit->error("illegal NUL character in string constant");
+ ostringstream os;
+ os << "octal escape sequence out of range: '\\" << oct << value << "'";
+ unit->warning(os.str());
}
- str->v += static_cast<char>(us);
+ str->v += static_cast<char>(value);
break;
}
case 'x':
{
- IceUtil::Int64 ull = 0;
+ IceUtil::Int64 value = 0;
+ string escape = "";
while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
{
+ escape += next;
+ }
+ unput(next);
+
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+
+ //
+ // Universal character name \unnnn code point U+nnnn
+ //
+ case 'u':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 4; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- ull *= 16;
- if(isdigit(static_cast<unsigned char>(next)))
- {
- ull += next - '0';
- }
- else if(islower(static_cast<unsigned char>(next)))
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - 'a' + 10;
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
- else
+ escape += next;
+ }
+
+ value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'u';
+ os.fill('0');
+ os.width(4);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+
+ break;
+ }
+
+ case 'U':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 8; ++i)
+ {
+ next = static_cast<char>(yyinput());
+ str->literal += next;
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - 'A' + 10;
+
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
+ escape += next;
}
- unput(next);
- if(ull == 0)
+
+ value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'U';
+ os.fill('0');
+ os.width(8);
+ os << hex << value;
+
+ if(value == 0)
{
unit->error("illegal NUL character in string constant");
}
- str->v += static_cast<char>(ull);
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
break;
}
- // TODO: add universal character names
+
default:
{
+ ostringstream os;
+ os << "unknown escape sequence '\\" << next << "'";
+ unit->warning(os.str());
+ //
+ // We escape the backslack in a unknown escape sequence
+ // to keep compativility with 3.6"
+ //
+ str->v += '\\';
str->v += c;
unput(next);
}