diff options
Diffstat (limited to 'cpp/src/Slice/Scanner.l')
-rw-r--r-- | cpp/src/Slice/Scanner.l | 181 |
1 files changed, 153 insertions, 28 deletions
diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l index 190c00bcf5c..a9c381b7260 100644 --- a/cpp/src/Slice/Scanner.l +++ b/cpp/src/Slice/Scanner.l @@ -13,6 +13,8 @@ #include <Slice/Grammar.h> #include <IceUtil/InputUtil.h> +#include <iomanip> + #include <stdlib.h> #include <math.h> @@ -261,32 +263,42 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] switch(next) { case '\\': + { + str->v += '\\'; + str->v += '\\'; + break; + } case '"': case '\'': { str->v += next; break; } + case 'n': { str->v += '\n'; break; } + case 'r': { str->v += '\r'; break; } + case 't': { str->v += '\t'; break; } + case 'v': { str->v += '\v'; break; } + case 'f': { str->v += '\f'; @@ -311,68 +323,181 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] break; } + // + // Octal value \nnn limited to three octal digits but terminate at the first character + // that is not a valid octal digit if encountered sooner. + // case '0': case '1': case '2': case '3': + case '4': + case '5': + case '7': { static string octalDigits = "01234567"; - unsigned short us = next - '0'; - if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos) + IceUtil::Int64 value = 0; + string escape; + escape += next; + for(int i = 0; i < 2; ++i) { - str->literal += next; - us = us * 8 + next - '0'; - if(octalDigits.find_first_of(next = static_cast<char>(yyinput())) != string::npos) - { - us = us * 8 + next - '0'; - } - else + next = static_cast<char>(yyinput()); + if(octalDigits.find_first_of(next) == string::npos) { unput(next); + break; } + escape += next; } - else + str->literal += escape; + value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8); + + if(value == 0) { - unput(next); + unit->error("illegal NUL character in string constant"); } - if(us == 0) + else if(value > 255) { - unit->error("illegal NUL character in string constant"); + ostringstream os; + os << "octal escape sequence out of range: '\\" << oct << value << "'"; + unit->warning(os.str()); } - str->v += static_cast<char>(us); + str->v += static_cast<char>(value); break; } case 'x': { - IceUtil::Int64 ull = 0; + IceUtil::Int64 value = 0; + string escape = ""; while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) { + escape += next; + } + unput(next); + + str->literal += escape; + value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16); + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + else if(value > 255) + { + ostringstream os; + os << "hex escape sequence out of range: '\\x" << hex << value << "'"; + unit->warning(os.str()); + } + str->v += static_cast<char>(value); + break; + } + + // + // Universal character name \unnnn code point U+nnnn + // + case 'u': + { + IceUtil::Int64 value = 0; + string escape = ""; + + for(int i = 0; i < 4; ++i) + { + next = static_cast<char>(yyinput()); str->literal += next; - ull *= 16; - if(isdigit(static_cast<unsigned char>(next))) - { - ull += next - '0'; - } - else if(islower(static_cast<unsigned char>(next))) + if(!isxdigit(static_cast<unsigned char>(next))) { - ull += next - 'a' + 10; + unit->error("unknown escape sequence in string constant: " + str->literal); + break; } - else + escape += next; + } + + value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; + + ostringstream os; + os << '\\' << 'u'; + os.fill('0'); + os.width(4); + os << hex << value; + + if(value == 0) + { + unit->error("illegal NUL character in string constant"); + } + + + // + // Determine if a character is a surrogate: + // + // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive + // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. + // + else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff)) + { + unit->error("unknown escape sequence in string constant: '" + os.str() + "'"); + } + + str->v += os.str(); + + break; + } + + case 'U': + { + IceUtil::Int64 value = 0; + string escape = ""; + + for(int i = 0; i < 8; ++i) + { + next = static_cast<char>(yyinput()); + str->literal += next; + if(!isxdigit(static_cast<unsigned char>(next))) { - ull += next - 'A' + 10; + + unit->error("unknown escape sequence in string constant: " + str->literal); + break; } + escape += next; } - unput(next); - if(ull == 0) + + value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; + + ostringstream os; + os << '\\' << 'U'; + os.fill('0'); + os.width(8); + os << hex << value; + + if(value == 0) { unit->error("illegal NUL character in string constant"); } - str->v += static_cast<char>(ull); + + // + // Determine if a character is a surrogate: + // + // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive + // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. + // + else if((value >= 0xd800 && value <= 0xdbff) || (value >= 0xdc00 && value <= 0xdfff)) + { + unit->error("unknown escape sequence in string constant: '" + os.str() + "'"); + } + + str->v += os.str(); break; } - // TODO: add universal character names + default: { + ostringstream os; + os << "unknown escape sequence '\\" << next << "'"; + unit->warning(os.str()); + // + // We escape the backslack in a unknown escape sequence + // to keep compativility with 3.6" + // + str->v += '\\'; str->v += c; unput(next); } |