diff options
| -rw-r--r-- | project2/streams/streamRows.cpp | 51 | 
1 files changed, 40 insertions, 11 deletions
diff --git a/project2/streams/streamRows.cpp b/project2/streams/streamRows.cpp index 3bf1578..e0cefca 100644 --- a/project2/streams/streamRows.cpp +++ b/project2/streams/streamRows.cpp @@ -1,8 +1,10 @@  #include <pch.hpp>  #include "variables.h" +#include "scopeObject.h"  #include "stream.h"  #include "definedColumns.h"  #include "rowProcessor.h" +#include <boost/algorithm/string/predicate.hpp>  class RowProcessor; @@ -47,7 +49,6 @@ class StreamRows : public DefinedColumns, public RowSet {  			keepBlankRows(p->value("keepBlankRows", false)),  			countBlankRows(p->value("countBlankRows", false)),  			newline(p->value("newline", "\n").as<Glib::ustring>()), -			newlin(newline, 0, newline.length() - 1),  			encoding(p->value("encoding", "utf-8").as<std::string>()),  			skipheader(p->value("skipheader", 0).as<int64_t>()),  			convertRequired(encoding != "utf-8") @@ -58,31 +59,60 @@ class StreamRows : public DefinedColumns, public RowSet {  		void execute(const Glib::ustring &, const RowProcessor * rp) const  		{  			ParseState ps(this, rp); +			char * buf = NULL; +			size_t bufLen = 0; +			ScopeObject tidy([&]{ free(buf); });  			stream->runStream([&](const char * bytes, size_t bytesLen) -> size_t {  					size_t used = 0, len = 0; -					const gchar * utf8 = this->convertRequired ? g_convert(bytes, bytesLen, "utf-8", encoding.c_str(), &used, &len, NULL) : bytes; -					for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { -						this->pushChar(*iter, ps); +					const char * src; +					size_t srcLen; +					if (bufLen) { +						src = buf = (char*)realloc(buf, bufLen + bytesLen); +						memcpy(buf + bufLen, bytes, bytesLen); +						srcLen = bufLen += bytesLen; +					} +					else { +						src = bytes; +						srcLen = bytesLen;  					}  					if (convertRequired) { -						// We allocated it.. sooo.... -						free(const_cast<gchar *>(utf8)); -						return used; +						gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding.c_str(), &used, &len, NULL); +						for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { +							this->pushChar(g_utf8_get_char(iter), ps); +						} +						free(utf8); +					} +					else { +						const gchar * firstInvalid; +						g_utf8_validate(src, srcLen, &firstInvalid); +						for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) { +							this->pushChar(g_utf8_get_char(iter), ps); +						} +						used = firstInvalid - src; +					} +					size_t newBuf = srcLen - used; +					if (newBuf) { +						if (bufLen < newBuf) { +							buf = (char*)realloc(buf, newBuf); +						} +						bufLen = newBuf; +						memcpy(buf, src + used, bufLen);  					}  					else { -						return bytesLen; +						bufLen = 0;  					} +					return bytesLen;  					});  		}  		void pushChar(gunichar c, ParseState & ps) const  		{ -			if ((!ps.inQuotes) && (c == *newline.rbegin()) && (ps.tok.compare(ps.tok.length() - newlin.length(), newlin.length(), newlin) == 0)) { +			if ((!ps.inQuotes) && (c == *newline.rbegin() && boost::algorithm::ends_with(ps.tok + c, newline))) {  				if (skipheader) {  					ps.skipheader -= 1;  				}  				else { -					ps.tok.erase(ps.tok.length() - newlin.length()); +					ps.tok.erase(ps.tok.length() - (newline.length() - 1));  					if (!ps.tok.empty()) {  						*ps.curCol++ = VariableType(ps.tok);  					} @@ -149,7 +179,6 @@ class StreamRows : public DefinedColumns, public RowSet {  		const bool keepBlankRows;  		const bool countBlankRows;  		const Glib::ustring newline; -		const Glib::ustring newlin;  		const std::string encoding;  		const size_t skipheader;  		bool convertRequired;  | 
