summaryrefslogtreecommitdiff
path: root/cpp/src/Slice/Scanner.cpp
diff options
context:
space:
mode:
authorJose <jose@zeroc.com>2016-03-08 22:09:37 +0100
committerJose <jose@zeroc.com>2016-03-08 22:09:37 +0100
commitcfa587bc0be11012b9ab4e8fd333e96080eb71e2 (patch)
tree31ecc86af1a9a84d09eb3c62cd302830d02a74ce /cpp/src/Slice/Scanner.cpp
parentminor fixes to icegriddb/icestormdb (diff)
parentmore icegriddb/icestormdb fixes (diff)
downloadice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.tar.bz2
ice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.tar.xz
ice-cfa587bc0be11012b9ab4e8fd333e96080eb71e2.zip
Merge remote-tracking branch 'origin/3.6'
Diffstat (limited to 'cpp/src/Slice/Scanner.cpp')
-rw-r--r--cpp/src/Slice/Scanner.cpp231
1 files changed, 178 insertions, 53 deletions
diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp
index a49c6c0433a..27d362e04c9 100644
--- a/cpp/src/Slice/Scanner.cpp
+++ b/cpp/src/Slice/Scanner.cpp
@@ -568,6 +568,8 @@ char *slice_text;
#include <Slice/Grammar.h>
#include <IceUtil/InputUtil.h>
+#include <iomanip>
+
#include <stdlib.h>
#include <math.h>
@@ -630,7 +632,7 @@ int checkKeyword(string&);
-#line 633 "lex.yy.c"
+#line 635 "lex.yy.c"
#define INITIAL 0
#define BOMSCAN 1
@@ -849,10 +851,10 @@ YY_DECL
}
{
-#line 92 "Scanner.l"
+#line 94 "Scanner.l"
-#line 855 "lex.yy.c"
+#line 857 "lex.yy.c"
while ( 1 ) /* loops until end-of-file is reached */
{
@@ -911,7 +913,7 @@ case 1:
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 94 "Scanner.l"
+#line 96 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -926,7 +928,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1);
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 101 "Scanner.l"
+#line 103 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -939,7 +941,7 @@ case 3:
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 108 "Scanner.l"
+#line 110 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -954,7 +956,7 @@ YY_LINENO_REWIND_TO(yy_cp - 1);
(yy_c_buf_p) = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up slice_text again */
YY_RULE_SETUP
-#line 115 "Scanner.l"
+#line 117 "Scanner.l"
{
if(unit->scanPosition(slice_text))
{
@@ -964,7 +966,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 122 "Scanner.l"
+#line 124 "Scanner.l"
{
// C++-style comment
BEGIN(MAINSCAN);
@@ -982,7 +984,7 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
-#line 137 "Scanner.l"
+#line 139 "Scanner.l"
{
// C-style comment
BEGIN(MAINSCAN);
@@ -1026,7 +1028,7 @@ YY_RULE_SETUP
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 178 "Scanner.l"
+#line 180 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_SCOPE_DELIMITER;
@@ -1034,7 +1036,7 @@ YY_RULE_SETUP
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 183 "Scanner.l"
+#line 185 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_METADATA_OPEN;
@@ -1042,7 +1044,7 @@ YY_RULE_SETUP
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 188 "Scanner.l"
+#line 190 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_METADATA_CLOSE;
@@ -1050,7 +1052,7 @@ YY_RULE_SETUP
YY_BREAK
case 10:
YY_RULE_SETUP
-#line 193 "Scanner.l"
+#line 195 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_OPEN;
@@ -1058,7 +1060,7 @@ YY_RULE_SETUP
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 198 "Scanner.l"
+#line 200 "Scanner.l"
{
BEGIN(MAINSCAN);
return ICE_GLOBAL_METADATA_CLOSE;
@@ -1067,7 +1069,7 @@ YY_RULE_SETUP
case 12:
/* rule 12 can match eol */
YY_RULE_SETUP
-#line 203 "Scanner.l"
+#line 205 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr ident = new StringTok;
@@ -1095,7 +1097,7 @@ YY_RULE_SETUP
YY_BREAK
case 13:
YY_RULE_SETUP
-#line 228 "Scanner.l"
+#line 230 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr ident = new StringTok;
@@ -1106,7 +1108,7 @@ YY_RULE_SETUP
YY_BREAK
case 14:
YY_RULE_SETUP
-#line 236 "Scanner.l"
+#line 238 "Scanner.l"
{
BEGIN(MAINSCAN);
StringTokPtr str = new StringTok;
@@ -1135,32 +1137,42 @@ YY_RULE_SETUP
switch(next)
{
case '\\':
+ {
+ str->v += '\\';
+ str->v += '\\';
+ break;
+ }
case '"':
case '\'':
{
str->v += next;
break;
}
+
case 'n':
{
str->v += '\n';
break;
}
+
case 'r':
{
str->v += '\r';
break;
}
+
case 't':
{
str->v += '\t';
break;
}
+
case 'v':
{
str->v += '\v';
break;
}
+
case 'f':
{
str->v += '\f';
@@ -1185,68 +1197,181 @@ YY_RULE_SETUP
break;
}
+ //
+ // Octal value \nnn limited to three octal digits but terminate at the first character
+ // that is not a valid octal digit if encountered sooner.
+ //
case '0':
case '1':
case '2':
case '3':
+ case '4':
+ case '5':
+ case '7':
{
static string octalDigits = "01234567";
- unsigned short us = next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
+ IceUtil::Int64 value = 0;
+ string escape;
+ escape += next;
+ for(int i = 0; i < 2; ++i)
{
- str->literal += next;
- us = us * 8 + next - '0';
- if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos)
- {
- us = us * 8 + next - '0';
- }
- else
+ next = static_cast<char>(yyinput());
+ if(octalDigits.find_first_of(next) == string::npos)
{
unput(next);
+ break;
}
+ escape += next;
}
- else
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8);
+
+ if(value == 0)
{
- unput(next);
+ unit->error("illegal NUL character in string constant");
}
- if(us == 0)
+ else if(value > 255)
{
- unit->error("illegal NUL character in string constant");
+ ostringstream os;
+ os << "octal escape sequence out of range: '\\" << oct << value << "'";
+ unit->warning(os.str());
}
- str->v += static_cast<char>(us);
+ str->v += static_cast<char>(value);
break;
}
case 'x':
{
- IceUtil::Int64 ull = 0;
+ IceUtil::Int64 value = 0;
+ string escape = "";
while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))))
{
+ escape += next;
+ }
+ unput(next);
+
+ str->literal += escape;
+ value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16);
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+ else if(value > 255)
+ {
+ ostringstream os;
+ os << "hex escape sequence out of range: '\\x" << hex << value << "'";
+ unit->warning(os.str());
+ }
+ str->v += static_cast<char>(value);
+ break;
+ }
+
+ //
+ // Universal character name \unnnn code point U+nnnn
+ //
+ case 'u':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 4; ++i)
+ {
+ next = static_cast<char>(yyinput());
str->literal += next;
- ull *= 16;
- if(isdigit(static_cast<unsigned char>(next)))
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - '0';
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
- else if(islower(static_cast<unsigned char>(next)))
- {
- ull += next - 'a' + 10;
- }
- else
+ escape += next;
+ }
+
+ value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'u';
+ os.fill('0');
+ os.width(4);
+ os << hex << value;
+
+ if(value == 0)
+ {
+ unit->error("illegal NUL character in string constant");
+ }
+
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
+
+ break;
+ }
+
+ case 'U':
+ {
+ IceUtil::Int64 value = 0;
+ string escape = "";
+
+ for(int i = 0; i < 8; ++i)
+ {
+ next = static_cast<char>(yyinput());
+ str->literal += next;
+ if(!isxdigit(static_cast<unsigned char>(next)))
{
- ull += next - 'A' + 10;
+
+ unit->error("unknown escape sequence in string constant: " + str->literal);
+ break;
}
+ escape += next;
}
- unput(next);
- if(ull == 0)
+
+ value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1;
+
+ ostringstream os;
+ os << '\\' << 'U';
+ os.fill('0');
+ os.width(8);
+ os << hex << value;
+
+ if(value == 0)
{
unit->error("illegal NUL character in string constant");
}
- str->v += static_cast<char>(ull);
+
+ //
+ // Determine if a character is a surrogate:
+ //
+ // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive
+ // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive.
+ //
+ else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff))
+ {
+ unit->error("unknown escape sequence in string constant: '" + os.str() + "'");
+ }
+
+ str->v += os.str();
break;
}
- // TODO: add universal character names
+
default:
{
+ ostringstream os;
+ os << "unknown escape sequence '\\" << next << "'";
+ unit->warning(os.str());
+ //
+ // We escape the backslack in a unknown escape sequence
+ // to keep compativility with 3.6"
+ //
+ str->v += '\\';
str->v += c;
unput(next);
}
@@ -1263,7 +1388,7 @@ YY_RULE_SETUP
YY_BREAK
case 15:
YY_RULE_SETUP
-#line 390 "Scanner.l"
+#line 515 "Scanner.l"
{
BEGIN(MAINSCAN);
IntegerTokPtr itp = new IntegerTok;
@@ -1282,7 +1407,7 @@ YY_RULE_SETUP
YY_BREAK
case 16:
YY_RULE_SETUP
-#line 406 "Scanner.l"
+#line 531 "Scanner.l"
{
BEGIN(MAINSCAN);
errno = 0;
@@ -1316,7 +1441,7 @@ YY_RULE_SETUP
case 17:
/* rule 17 can match eol */
YY_RULE_SETUP
-#line 436 "Scanner.l"
+#line 561 "Scanner.l"
{
// Ignore white-space
@@ -1332,7 +1457,7 @@ YY_RULE_SETUP
YY_BREAK
case 18:
YY_RULE_SETUP
-#line 449 "Scanner.l"
+#line 574 "Scanner.l"
{
// Ignore UTF-8 BOM, rule only active when parsing start of file.
@@ -1341,7 +1466,7 @@ YY_RULE_SETUP
YY_BREAK
case 19:
YY_RULE_SETUP
-#line 455 "Scanner.l"
+#line 580 "Scanner.l"
{
BEGIN(MAINSCAN);
if(slice_text[0] < 32 || slice_text[0] > 126)
@@ -1360,10 +1485,10 @@ YY_RULE_SETUP
YY_BREAK
case 20:
YY_RULE_SETUP
-#line 471 "Scanner.l"
+#line 596 "Scanner.l"
ECHO;
YY_BREAK
-#line 1366 "lex.yy.c"
+#line 1491 "lex.yy.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(BOMSCAN):
case YY_STATE_EOF(MAINSCAN):
@@ -2364,7 +2489,7 @@ void slice_free (void * ptr )
#define YYTABLES_NAME "yytables"
-#line 470 "Scanner.l"
+#line 595 "Scanner.l"