From f8130775360dd5c38d45316dc82dfa1b5ecb2030 Mon Sep 17 00:00:00 2001
From: randomdan <randomdan@localhost>
Date: Wed, 21 Mar 2012 22:18:23 +0000
Subject: Stream bug fix Fix trailing filerows ref

---
 project2/streams/streamRows.cpp | 51 ++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/project2/streams/streamRows.cpp b/project2/streams/streamRows.cpp
index 3bf1578..e0cefca 100644
--- a/project2/streams/streamRows.cpp
+++ b/project2/streams/streamRows.cpp
@@ -1,8 +1,10 @@
 #include <pch.hpp>
 #include "variables.h"
+#include "scopeObject.h"
 #include "stream.h"
 #include "definedColumns.h"
 #include "rowProcessor.h"
+#include <boost/algorithm/string/predicate.hpp>
 
 class RowProcessor;
 
@@ -47,7 +49,6 @@ class StreamRows : public DefinedColumns, public RowSet {
 			keepBlankRows(p->value("keepBlankRows", false)),
 			countBlankRows(p->value("countBlankRows", false)),
 			newline(p->value("newline", "\n").as<Glib::ustring>()),
-			newlin(newline, 0, newline.length() - 1),
 			encoding(p->value("encoding", "utf-8").as<std::string>()),
 			skipheader(p->value("skipheader", 0).as<int64_t>()),
 			convertRequired(encoding != "utf-8")
@@ -58,31 +59,60 @@ class StreamRows : public DefinedColumns, public RowSet {
 		void execute(const Glib::ustring &, const RowProcessor * rp) const
 		{
 			ParseState ps(this, rp);
+			char * buf = NULL;
+			size_t bufLen = 0;
+			ScopeObject tidy([&]{ free(buf); });
 			stream->runStream([&](const char * bytes, size_t bytesLen) -> size_t {
 					size_t used = 0, len = 0;
-					const gchar * utf8 = this->convertRequired ? g_convert(bytes, bytesLen, "utf-8", encoding.c_str(), &used, &len, NULL) : bytes;
-					for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) {
-						this->pushChar(*iter, ps);
+					const char * src;
+					size_t srcLen;
+					if (bufLen) {
+						src = buf = (char*)realloc(buf, bufLen + bytesLen);
+						memcpy(buf + bufLen, bytes, bytesLen);
+						srcLen = bufLen += bytesLen;
+					}
+					else {
+						src = bytes;
+						srcLen = bytesLen;
 					}
 					if (convertRequired) {
-						// We allocated it.. sooo....
-						free(const_cast<gchar *>(utf8));
-						return used;
+						gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding.c_str(), &used, &len, NULL);
+						for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) {
+							this->pushChar(g_utf8_get_char(iter), ps);
+						}
+						free(utf8);
+					}
+					else {
+						const gchar * firstInvalid;
+						g_utf8_validate(src, srcLen, &firstInvalid);
+						for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) {
+							this->pushChar(g_utf8_get_char(iter), ps);
+						}
+						used = firstInvalid - src;
+					}
+					size_t newBuf = srcLen - used;
+					if (newBuf) {
+						if (bufLen < newBuf) {
+							buf = (char*)realloc(buf, newBuf);
+						}
+						bufLen = newBuf;
+						memcpy(buf, src + used, bufLen);
 					}
 					else {
-						return bytesLen;
+						bufLen = 0;
 					}
+					return bytesLen;
 					});
 		}
 
 		void pushChar(gunichar c, ParseState & ps) const
 		{
-			if ((!ps.inQuotes) && (c == *newline.rbegin()) && (ps.tok.compare(ps.tok.length() - newlin.length(), newlin.length(), newlin) == 0)) {
+			if ((!ps.inQuotes) && (c == *newline.rbegin() && boost::algorithm::ends_with(ps.tok + c, newline))) {
 				if (skipheader) {
 					ps.skipheader -= 1;
 				}
 				else {
-					ps.tok.erase(ps.tok.length() - newlin.length());
+					ps.tok.erase(ps.tok.length() - (newline.length() - 1));
 					if (!ps.tok.empty()) {
 						*ps.curCol++ = VariableType(ps.tok);
 					}
@@ -149,7 +179,6 @@ class StreamRows : public DefinedColumns, public RowSet {
 		const bool keepBlankRows;
 		const bool countBlankRows;
 		const Glib::ustring newline;
-		const Glib::ustring newlin;
 		const std::string encoding;
 		const size_t skipheader;
 		bool convertRequired;
-- 
cgit v1.2.3