diff options
author | Bernard Normier <bernard@zeroc.com> | 2016-10-12 13:34:57 -0400 |
---|---|---|
committer | Bernard Normier <bernard@zeroc.com> | 2016-10-12 13:34:57 -0400 |
commit | 5458f843d41d8335fc6285d95eeb64a6ac0ddf06 (patch) | |
tree | c030221d4c9fe8bfdfffbc7974869e9eb90807b6 /cpp/src | |
parent | Merge pull request #11 from grembo/patch-1 (diff) | |
download | ice-5458f843d41d8335fc6285d95eeb64a6ac0ddf06.tar.bz2 ice-5458f843d41d8335fc6285d95eeb64a6ac0ddf06.tar.xz ice-5458f843d41d8335fc6285d95eeb64a6ac0ddf06.zip |
Changed parsing of hex escape sequences in Slice string literals
Diffstat (limited to 'cpp/src')
-rw-r--r-- | cpp/src/Slice/Scanner.cpp | 26 | ||||
-rw-r--r-- | cpp/src/Slice/Scanner.l | 67 | ||||
-rw-r--r-- | cpp/src/slice2cpp/Gen.cpp | 8 |
3 files changed, 51 insertions, 50 deletions
diff --git a/cpp/src/Slice/Scanner.cpp b/cpp/src/Slice/Scanner.cpp index 27d362e04c9..62d1a7a6d85 100644 --- a/cpp/src/Slice/Scanner.cpp +++ b/cpp/src/Slice/Scanner.cpp @@ -1138,6 +1138,9 @@ YY_RULE_SETUP { case '\\': { + // + // add extra escape to our internal string + // str->v += '\\'; str->v += '\\'; break; @@ -1243,7 +1246,11 @@ YY_RULE_SETUP { IceUtil::Int64 value = 0; string escape = ""; - while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) + + // + // Unlike C++, we limit hex escape sequences to 2 hex digits + // + while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))) && escape.length() < 2) { escape += next; } @@ -1256,16 +1263,11 @@ YY_RULE_SETUP { unit->error("illegal NUL character in string constant"); } - else if(value > 255) - { - ostringstream os; - os << "hex escape sequence out of range: '\\x" << hex << value << "'"; - unit->warning(os.str()); - } + assert(value >= 0 && value <= 255); str->v += static_cast<char>(value); break; } - + // // Universal character name \unnnn code point U+nnnn // @@ -1367,12 +1369,10 @@ YY_RULE_SETUP ostringstream os; os << "unknown escape sequence '\\" << next << "'"; unit->warning(os.str()); - // - // We escape the backslack in a unknown escape sequence - // to keep compativility with 3.6" - // + + // Escape the \ in this unknown escape sequence + str->v += '\\'; str->v += '\\'; - str->v += c; unput(next); } } diff --git a/cpp/src/Slice/Scanner.l b/cpp/src/Slice/Scanner.l index a9c381b7260..aadffc22bc1 100644 --- a/cpp/src/Slice/Scanner.l +++ b/cpp/src/Slice/Scanner.l @@ -264,6 +264,9 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] { case '\\': { + // + // add extra escape to our internal string + // str->v += '\\'; str->v += '\\'; break; @@ -274,13 +277,13 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] str->v += next; break; } - + case 'n': { str->v += '\n'; break; } - + case 'r': { str->v += '\r'; @@ -292,13 +295,13 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] str->v += '\t'; break; } - + case 'v': { str->v += '\v'; break; } - + case 'f': { str->v += '\f'; @@ -324,7 +327,7 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] } // - // Octal value \nnn limited to three octal digits but terminate at the first character + // Octal value \nnn limited to three octal digits but terminate at the first character // that is not a valid octal digit if encountered sooner. // case '0': @@ -347,11 +350,11 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] unput(next); break; } - escape += next; + escape += next; } str->literal += escape; value = IceUtilInternal::strToInt64(escape.c_str(), 0, 8); - + if(value == 0) { unit->error("illegal NUL character in string constant"); @@ -365,16 +368,21 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] str->v += static_cast<char>(value); break; } + case 'x': { IceUtil::Int64 value = 0; string escape = ""; - while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput())))) + + // + // Unlike C++, we limit hex escape sequences to 2 hex digits + // + while(isxdigit(static_cast<unsigned char>(next = static_cast<char>(yyinput()))) && escape.length() < 2) { escape += next; } unput(next); - + str->literal += escape; value = IceUtilInternal::strToInt64(escape.c_str(), 0, 16); @@ -382,16 +390,11 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] { unit->error("illegal NUL character in string constant"); } - else if(value > 255) - { - ostringstream os; - os << "hex escape sequence out of range: '\\x" << hex << value << "'"; - unit->warning(os.str()); - } + assert(value >= 0 && value <= 255); str->v += static_cast<char>(value); break; } - + // // Universal character name \unnnn code point U+nnnn // @@ -411,7 +414,7 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] } escape += next; } - + value = escape.size() == 4 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; ostringstream os; @@ -419,16 +422,16 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] os.fill('0'); os.width(4); os << hex << value; - + if(value == 0) { unit->error("illegal NUL character in string constant"); } - - + + // // Determine if a character is a surrogate: - // + // // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. // @@ -438,15 +441,15 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] } str->v += os.str(); - + break; } - + case 'U': { IceUtil::Int64 value = 0; string escape = ""; - + for(int i = 0; i < 8; ++i) { next = static_cast<char>(yyinput()); @@ -459,7 +462,7 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] } escape += next; } - + value = escape.size() == 8 ? IceUtilInternal::strToInt64(escape.c_str(), 0, 16) : -1; ostringstream os; @@ -467,15 +470,15 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] os.fill('0'); os.width(8); os << hex << value; - + if(value == 0) { unit->error("illegal NUL character in string constant"); } - + // // Determine if a character is a surrogate: - // + // // * High surrogate code point, ranging from 0xd800 to 0xdbff, inclusive // * Low surrogate code point, ranging from 0xdc00 to 0xdfff, inclusive. // @@ -493,12 +496,10 @@ floating_literal (({fractional_constant}{exponent_part}?)|((\+|-)?[[:digit:]] ostringstream os; os << "unknown escape sequence '\\" << next << "'"; unit->warning(os.str()); - // - // We escape the backslack in a unknown escape sequence - // to keep compativility with 3.6" - // + + // Escape the \ in this unknown escape sequence + str->v += '\\'; str->v += '\\'; - str->v += c; unput(next); } } diff --git a/cpp/src/slice2cpp/Gen.cpp b/cpp/src/slice2cpp/Gen.cpp index 140438eb37e..582f6edf732 100644 --- a/cpp/src/slice2cpp/Gen.cpp +++ b/cpp/src/slice2cpp/Gen.cpp @@ -204,7 +204,7 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt else { // - // Write any pedding characters in the utf8 buffer + // Write any padding characters in the utf8 buffer // if(!u8buffer.empty()) { @@ -229,7 +229,7 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt // // An even number of slash \ will escape the backslash and - // the codepoint will be interpreted as its charaters + // the codepoint will be interpreted as its characters // // \\U00000041 - ['\\', 'U', '0', '0', '0', '0', '0', '0', '4', '1'] // \\\U00000041 - ['\\', 'A'] (41 is the codepoint for 'A') @@ -243,7 +243,7 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt size_t sz = value[j] == 'U' ? 8 : 4; string codepoint = value.substr(j + 1, sz); - assert(codepoint.size() == sz); + assert(codepoint.size() == sz); IceUtil::Int64 v = IceUtilInternal::strToInt64(codepoint.c_str(), 0, 16); out << u32CodePoint(static_cast<unsigned int>(v), cpp11); @@ -271,7 +271,7 @@ writeConstantValue(IceUtilInternal::Output& out, const TypePtr& type, const Synt } // - // Write any pedding characters in the utf8 buffer + // Write any padding characters in the utf8 buffer // if(!u8buffer.empty()) { |