summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--project2/streams/streamRows.cpp51
1 files changed, 40 insertions, 11 deletions
diff --git a/project2/streams/streamRows.cpp b/project2/streams/streamRows.cpp
index 3bf1578..e0cefca 100644
--- a/project2/streams/streamRows.cpp
+++ b/project2/streams/streamRows.cpp
@@ -1,8 +1,10 @@
#include <pch.hpp>
#include "variables.h"
+#include "scopeObject.h"
#include "stream.h"
#include "definedColumns.h"
#include "rowProcessor.h"
+#include <boost/algorithm/string/predicate.hpp>
class RowProcessor;
@@ -47,7 +49,6 @@ class StreamRows : public DefinedColumns, public RowSet {
keepBlankRows(p->value("keepBlankRows", false)),
countBlankRows(p->value("countBlankRows", false)),
newline(p->value("newline", "\n").as<Glib::ustring>()),
- newlin(newline, 0, newline.length() - 1),
encoding(p->value("encoding", "utf-8").as<std::string>()),
skipheader(p->value("skipheader", 0).as<int64_t>()),
convertRequired(encoding != "utf-8")
@@ -58,31 +59,60 @@ class StreamRows : public DefinedColumns, public RowSet {
void execute(const Glib::ustring &, const RowProcessor * rp) const
{
ParseState ps(this, rp);
+ char * buf = NULL;
+ size_t bufLen = 0;
+ ScopeObject tidy([&]{ free(buf); });
stream->runStream([&](const char * bytes, size_t bytesLen) -> size_t {
size_t used = 0, len = 0;
- const gchar * utf8 = this->convertRequired ? g_convert(bytes, bytesLen, "utf-8", encoding.c_str(), &used, &len, NULL) : bytes;
- for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) {
- this->pushChar(*iter, ps);
+ const char * src;
+ size_t srcLen;
+ if (bufLen) {
+ src = buf = (char*)realloc(buf, bufLen + bytesLen);
+ memcpy(buf + bufLen, bytes, bytesLen);
+ srcLen = bufLen += bytesLen;
+ }
+ else {
+ src = bytes;
+ srcLen = bytesLen;
}
if (convertRequired) {
- // We allocated it.. sooo....
- free(const_cast<gchar *>(utf8));
- return used;
+ gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding.c_str(), &used, &len, NULL);
+ for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) {
+ this->pushChar(g_utf8_get_char(iter), ps);
+ }
+ free(utf8);
+ }
+ else {
+ const gchar * firstInvalid;
+ g_utf8_validate(src, srcLen, &firstInvalid);
+ for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) {
+ this->pushChar(g_utf8_get_char(iter), ps);
+ }
+ used = firstInvalid - src;
+ }
+ size_t newBuf = srcLen - used;
+ if (newBuf) {
+ if (bufLen < newBuf) {
+ buf = (char*)realloc(buf, newBuf);
+ }
+ bufLen = newBuf;
+ memcpy(buf, src + used, bufLen);
}
else {
- return bytesLen;
+ bufLen = 0;
}
+ return bytesLen;
});
}
void pushChar(gunichar c, ParseState & ps) const
{
- if ((!ps.inQuotes) && (c == *newline.rbegin()) && (ps.tok.compare(ps.tok.length() - newlin.length(), newlin.length(), newlin) == 0)) {
+ if ((!ps.inQuotes) && (c == *newline.rbegin() && boost::algorithm::ends_with(ps.tok + c, newline))) {
if (skipheader) {
ps.skipheader -= 1;
}
else {
- ps.tok.erase(ps.tok.length() - newlin.length());
+ ps.tok.erase(ps.tok.length() - (newline.length() - 1));
if (!ps.tok.empty()) {
*ps.curCol++ = VariableType(ps.tok);
}
@@ -149,7 +179,6 @@ class StreamRows : public DefinedColumns, public RowSet {
const bool keepBlankRows;
const bool countBlankRows;
const Glib::ustring newline;
- const Glib::ustring newlin;
const std::string encoding;
const size_t skipheader;
bool convertRequired;