diff options
| -rw-r--r-- | project2/common/stream.h | 3 | ||||
| -rw-r--r-- | project2/streams/streamRows.cpp | 62 | ||||
| -rw-r--r-- | project2/streams/textReader.cpp | 80 | ||||
| -rw-r--r-- | project2/streams/textReader.h | 19 | 
4 files changed, 106 insertions, 58 deletions
diff --git a/project2/common/stream.h b/project2/common/stream.h index 14293f0..eebefab 100644 --- a/project2/common/stream.h +++ b/project2/common/stream.h @@ -7,7 +7,8 @@  class Stream : public SourceObject {  	public:  		template<typename... X> Stream(const X &... x) : SourceObject(x...) { } -		typedef boost::function2<size_t, const char *, size_t> Sink; + +		typedef boost::function<size_t(const char *, size_t)> Sink;  		virtual void runStream(const Sink &) const = 0;  };  typedef boost::intrusive_ptr<Stream> StreamPtr; diff --git a/project2/streams/streamRows.cpp b/project2/streams/streamRows.cpp index f807e4f..78f6963 100644 --- a/project2/streams/streamRows.cpp +++ b/project2/streams/streamRows.cpp @@ -4,9 +4,9 @@  #include "stream.h"  #include "definedColumns.h"  #include "rowProcessor.h" +#include "textReader.h"  #include <boost/algorithm/string/predicate.hpp> -SimpleSysCallException(ReallocFail);  class RowProcessor;  /// Base class for Project2 components that create a row set based on the contents of a byte stream @@ -52,8 +52,7 @@ class StreamRows : public DefinedColumns, public RowSet {  			countBlankRows(p->value("countBlankRows", false)),  			newline(p->value("newline", "\n").as<Glib::ustring>()),  			encoding(p->value("encoding", "utf-8").as<std::string>()), -			skipheader(p->value("skipheader", 0).as<int64_t>()), -			convertRequired(encoding != "utf-8") +			skipheader(p->value("skipheader", 0).as<int64_t>())  		{  			p->script->loader.addLoadTarget(p, Storer::into<ElementLoader>(&stream));  		} @@ -61,60 +60,9 @@ class StreamRows : public DefinedColumns, public RowSet {  		void execute(const Glib::ustring &, const RowProcessor * rp) const  		{  			ParseState ps(this, rp); -			char * buf = NULL; -			size_t bufLen = 0; -			ScopeObject tidy([&]{ free(buf); }); -			stream->runStream([&](const char * bytes, size_t bytesLen) -> size_t { -					size_t used = 0, len = 0; -					const char * src; -					size_t srcLen; -					if (bufLen) { -						char * nbuf = (char*)realloc(buf, bufLen + bytesLen); -						if (!nbuf) { -							free(buf); -							throw ReallocFail(errno); -						} -						src = buf = nbuf; -						memcpy(buf + bufLen, bytes, bytesLen); -						srcLen = bufLen += bytesLen; -					} -					else { -						src = bytes; -						srcLen = bytesLen; -					} -					if (convertRequired) { -						gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding.c_str(), &used, &len, NULL); -						for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { -							this->pushChar(g_utf8_get_char(iter), ps); -						} -						free(utf8); -					} -					else { -						const gchar * firstInvalid; -						g_utf8_validate(src, srcLen, &firstInvalid); -						for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) { -							this->pushChar(g_utf8_get_char(iter), ps); -						} -						used = firstInvalid - src; -					} -					size_t newBuf = srcLen - used; -					if (newBuf) { -						if (bufLen < newBuf) { -							char * nbuf = (char*)realloc(buf, newBuf); -							if (!nbuf) { -								free(buf); -								throw ReallocFail(errno); -							} -							buf = nbuf; -						} -						bufLen = newBuf; -						memcpy(buf, src + used, bufLen); -					} -					else { -						bufLen = 0; -					} -					return bytesLen; -					}); +			TextReader::CharSink cs = boost::bind(&StreamRows::pushChar, this, _1, boost::ref(ps)); +			TextReader t(encoding.c_str()); +			stream->runStream(boost::bind(&TextReader::read, &t, _1, _2, cs));  		}  		void pushChar(gunichar c, ParseState & ps) const diff --git a/project2/streams/textReader.cpp b/project2/streams/textReader.cpp new file mode 100644 index 0000000..8a10d4f --- /dev/null +++ b/project2/streams/textReader.cpp @@ -0,0 +1,80 @@ +#include <pch.hpp> +#include "textReader.h" + +SimpleSysCallException(ReallocFail); + +TextReader::TextReader() : +	convertRequired(false), +	encoding(NULL), +	buf(NULL), +	bufLen(0) +{ +} + +TextReader::TextReader(const gchar * enc) : +	convertRequired(strcmp(enc, "utf-8")), +	encoding(enc), +	buf(NULL), +	bufLen(0) +{ +} + +TextReader::~TextReader() +{ +	free(buf); +} + +size_t +TextReader::read(const char * bytes, size_t bytesLen, const CharSink & pushChar) +{ +	size_t used = 0, len = 0; +	const char * src; +	size_t srcLen; +	if (bufLen) { +		char * nbuf = (char*)realloc(buf, bufLen + bytesLen); +		if (!nbuf) { +			free(buf); +			throw ReallocFail(errno); +		} +		src = buf = nbuf; +		memcpy(buf + bufLen, bytes, bytesLen); +		srcLen = bufLen += bytesLen; +	} +	else { +		src = bytes; +		srcLen = bytesLen; +	} +	if (convertRequired) { +		gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding, &used, &len, NULL); +		for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { +			pushChar(g_utf8_get_char(iter)); +		} +		free(utf8); +	} +	else { +		const gchar * firstInvalid; +		g_utf8_validate(src, srcLen, &firstInvalid); +		for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) { +			pushChar(g_utf8_get_char(iter)); +		} +		used = firstInvalid - src; +	} +	size_t newBuf = srcLen - used; +	if (newBuf) { +		if (bufLen < newBuf) { +			char * nbuf = (char*)realloc(buf, newBuf); +			if (!nbuf) { +				free(buf); +				throw ReallocFail(errno); +			} +			buf = nbuf; +		} +		bufLen = newBuf; +		memcpy(buf, src + used, bufLen); +	} +	else { +		bufLen = 0; +	} +	return bytesLen; +} + diff --git a/project2/streams/textReader.h b/project2/streams/textReader.h new file mode 100644 index 0000000..898d4ed --- /dev/null +++ b/project2/streams/textReader.h @@ -0,0 +1,19 @@ + + +class TextReader { +	public: +		typedef boost::function<void(gunichar)> CharSink; + +		TextReader(); +		TextReader(const gchar * encoding); +		~TextReader(); + +		size_t read(const char * bytes, size_t bytesLen, const CharSink &); + +	private: +		const bool convertRequired; +		const char * encoding; +		char * buf; +		size_t bufLen; +}; +  | 
