diff options
author | randomdan <randomdan@localhost> | 2012-03-21 22:18:23 +0000 |
---|---|---|
committer | randomdan <randomdan@localhost> | 2012-03-21 22:18:23 +0000 |
commit | 1472d716d52299148c738ee9e3daeec4193f5237 (patch) | |
tree | 6af684b317e72c741ecc4b1050e5f793ef0f6fe2 | |
parent | Switch to the new stream style of things (diff) | |
download | project2-1472d716d52299148c738ee9e3daeec4193f5237.tar.bz2 project2-1472d716d52299148c738ee9e3daeec4193f5237.tar.xz project2-1472d716d52299148c738ee9e3daeec4193f5237.zip |
Stream bug fix
Fix trailing filerows ref
-rw-r--r-- | project2/streams/streamRows.cpp | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/project2/streams/streamRows.cpp b/project2/streams/streamRows.cpp index 3bf1578..e0cefca 100644 --- a/project2/streams/streamRows.cpp +++ b/project2/streams/streamRows.cpp @@ -1,8 +1,10 @@ #include <pch.hpp> #include "variables.h" +#include "scopeObject.h" #include "stream.h" #include "definedColumns.h" #include "rowProcessor.h" +#include <boost/algorithm/string/predicate.hpp> class RowProcessor; @@ -47,7 +49,6 @@ class StreamRows : public DefinedColumns, public RowSet { keepBlankRows(p->value("keepBlankRows", false)), countBlankRows(p->value("countBlankRows", false)), newline(p->value("newline", "\n").as<Glib::ustring>()), - newlin(newline, 0, newline.length() - 1), encoding(p->value("encoding", "utf-8").as<std::string>()), skipheader(p->value("skipheader", 0).as<int64_t>()), convertRequired(encoding != "utf-8") @@ -58,31 +59,60 @@ class StreamRows : public DefinedColumns, public RowSet { void execute(const Glib::ustring &, const RowProcessor * rp) const { ParseState ps(this, rp); + char * buf = NULL; + size_t bufLen = 0; + ScopeObject tidy([&]{ free(buf); }); stream->runStream([&](const char * bytes, size_t bytesLen) -> size_t { size_t used = 0, len = 0; - const gchar * utf8 = this->convertRequired ? g_convert(bytes, bytesLen, "utf-8", encoding.c_str(), &used, &len, NULL) : bytes; - for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { - this->pushChar(*iter, ps); + const char * src; + size_t srcLen; + if (bufLen) { + src = buf = (char*)realloc(buf, bufLen + bytesLen); + memcpy(buf + bufLen, bytes, bytesLen); + srcLen = bufLen += bytesLen; + } + else { + src = bytes; + srcLen = bytesLen; } if (convertRequired) { - // We allocated it.. sooo.... - free(const_cast<gchar *>(utf8)); - return used; + gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding.c_str(), &used, &len, NULL); + for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { + this->pushChar(g_utf8_get_char(iter), ps); + } + free(utf8); + } + else { + const gchar * firstInvalid; + g_utf8_validate(src, srcLen, &firstInvalid); + for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) { + this->pushChar(g_utf8_get_char(iter), ps); + } + used = firstInvalid - src; + } + size_t newBuf = srcLen - used; + if (newBuf) { + if (bufLen < newBuf) { + buf = (char*)realloc(buf, newBuf); + } + bufLen = newBuf; + memcpy(buf, src + used, bufLen); } else { - return bytesLen; + bufLen = 0; } + return bytesLen; }); } void pushChar(gunichar c, ParseState & ps) const { - if ((!ps.inQuotes) && (c == *newline.rbegin()) && (ps.tok.compare(ps.tok.length() - newlin.length(), newlin.length(), newlin) == 0)) { + if ((!ps.inQuotes) && (c == *newline.rbegin() && boost::algorithm::ends_with(ps.tok + c, newline))) { if (skipheader) { ps.skipheader -= 1; } else { - ps.tok.erase(ps.tok.length() - newlin.length()); + ps.tok.erase(ps.tok.length() - (newline.length() - 1)); if (!ps.tok.empty()) { *ps.curCol++ = VariableType(ps.tok); } @@ -149,7 +179,6 @@ class StreamRows : public DefinedColumns, public RowSet { const bool keepBlankRows; const bool countBlankRows; const Glib::ustring newline; - const Glib::ustring newlin; const std::string encoding; const size_t skipheader; bool convertRequired; |