From 383ebbcd891fb69d66ae1141eb531aece91d3d8a Mon Sep 17 00:00:00 2001 From: randomdan Date: Sun, 24 Feb 2013 01:02:18 +0000 Subject: Move text reader into it's own class --- project2/common/stream.h | 3 +- project2/streams/streamRows.cpp | 62 +++----------------------------- project2/streams/textReader.cpp | 80 +++++++++++++++++++++++++++++++++++++++++ project2/streams/textReader.h | 19 ++++++++++ 4 files changed, 106 insertions(+), 58 deletions(-) create mode 100644 project2/streams/textReader.cpp create mode 100644 project2/streams/textReader.h diff --git a/project2/common/stream.h b/project2/common/stream.h index 14293f0..eebefab 100644 --- a/project2/common/stream.h +++ b/project2/common/stream.h @@ -7,7 +7,8 @@ class Stream : public SourceObject { public: template Stream(const X &... x) : SourceObject(x...) { } - typedef boost::function2 Sink; + + typedef boost::function Sink; virtual void runStream(const Sink &) const = 0; }; typedef boost::intrusive_ptr StreamPtr; diff --git a/project2/streams/streamRows.cpp b/project2/streams/streamRows.cpp index f807e4f..78f6963 100644 --- a/project2/streams/streamRows.cpp +++ b/project2/streams/streamRows.cpp @@ -4,9 +4,9 @@ #include "stream.h" #include "definedColumns.h" #include "rowProcessor.h" +#include "textReader.h" #include -SimpleSysCallException(ReallocFail); class RowProcessor; /// Base class for Project2 components that create a row set based on the contents of a byte stream @@ -52,8 +52,7 @@ class StreamRows : public DefinedColumns, public RowSet { countBlankRows(p->value("countBlankRows", false)), newline(p->value("newline", "\n").as()), encoding(p->value("encoding", "utf-8").as()), - skipheader(p->value("skipheader", 0).as()), - convertRequired(encoding != "utf-8") + skipheader(p->value("skipheader", 0).as()) { p->script->loader.addLoadTarget(p, Storer::into(&stream)); } @@ -61,60 +60,9 @@ class StreamRows : public DefinedColumns, public RowSet { void execute(const Glib::ustring &, const RowProcessor * rp) const { ParseState ps(this, rp); - char * buf = NULL; - size_t bufLen = 0; - ScopeObject tidy([&]{ free(buf); }); - stream->runStream([&](const char * bytes, size_t bytesLen) -> size_t { - size_t used = 0, len = 0; - const char * src; - size_t srcLen; - if (bufLen) { - char * nbuf = (char*)realloc(buf, bufLen + bytesLen); - if (!nbuf) { - free(buf); - throw ReallocFail(errno); - } - src = buf = nbuf; - memcpy(buf + bufLen, bytes, bytesLen); - srcLen = bufLen += bytesLen; - } - else { - src = bytes; - srcLen = bytesLen; - } - if (convertRequired) { - gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding.c_str(), &used, &len, NULL); - for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { - this->pushChar(g_utf8_get_char(iter), ps); - } - free(utf8); - } - else { - const gchar * firstInvalid; - g_utf8_validate(src, srcLen, &firstInvalid); - for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) { - this->pushChar(g_utf8_get_char(iter), ps); - } - used = firstInvalid - src; - } - size_t newBuf = srcLen - used; - if (newBuf) { - if (bufLen < newBuf) { - char * nbuf = (char*)realloc(buf, newBuf); - if (!nbuf) { - free(buf); - throw ReallocFail(errno); - } - buf = nbuf; - } - bufLen = newBuf; - memcpy(buf, src + used, bufLen); - } - else { - bufLen = 0; - } - return bytesLen; - }); + TextReader::CharSink cs = boost::bind(&StreamRows::pushChar, this, _1, boost::ref(ps)); + TextReader t(encoding.c_str()); + stream->runStream(boost::bind(&TextReader::read, &t, _1, _2, cs)); } void pushChar(gunichar c, ParseState & ps) const diff --git a/project2/streams/textReader.cpp b/project2/streams/textReader.cpp new file mode 100644 index 0000000..8a10d4f --- /dev/null +++ b/project2/streams/textReader.cpp @@ -0,0 +1,80 @@ +#include +#include "textReader.h" + +SimpleSysCallException(ReallocFail); + +TextReader::TextReader() : + convertRequired(false), + encoding(NULL), + buf(NULL), + bufLen(0) +{ +} + +TextReader::TextReader(const gchar * enc) : + convertRequired(strcmp(enc, "utf-8")), + encoding(enc), + buf(NULL), + bufLen(0) +{ +} + +TextReader::~TextReader() +{ + free(buf); +} + +size_t +TextReader::read(const char * bytes, size_t bytesLen, const CharSink & pushChar) +{ + size_t used = 0, len = 0; + const char * src; + size_t srcLen; + if (bufLen) { + char * nbuf = (char*)realloc(buf, bufLen + bytesLen); + if (!nbuf) { + free(buf); + throw ReallocFail(errno); + } + src = buf = nbuf; + memcpy(buf + bufLen, bytes, bytesLen); + srcLen = bufLen += bytesLen; + } + else { + src = bytes; + srcLen = bytesLen; + } + if (convertRequired) { + gchar * utf8 = g_convert(src, srcLen, "utf-8", encoding, &used, &len, NULL); + for (const gchar * iter = utf8; *iter; iter = g_utf8_next_char(iter)) { + pushChar(g_utf8_get_char(iter)); + } + free(utf8); + } + else { + const gchar * firstInvalid; + g_utf8_validate(src, srcLen, &firstInvalid); + for (const gchar * iter = src; iter < firstInvalid && *iter ; iter = g_utf8_next_char(iter)) { + pushChar(g_utf8_get_char(iter)); + } + used = firstInvalid - src; + } + size_t newBuf = srcLen - used; + if (newBuf) { + if (bufLen < newBuf) { + char * nbuf = (char*)realloc(buf, newBuf); + if (!nbuf) { + free(buf); + throw ReallocFail(errno); + } + buf = nbuf; + } + bufLen = newBuf; + memcpy(buf, src + used, bufLen); + } + else { + bufLen = 0; + } + return bytesLen; +} + diff --git a/project2/streams/textReader.h b/project2/streams/textReader.h new file mode 100644 index 0000000..898d4ed --- /dev/null +++ b/project2/streams/textReader.h @@ -0,0 +1,19 @@ + + +class TextReader { + public: + typedef boost::function CharSink; + + TextReader(); + TextReader(const gchar * encoding); + ~TextReader(); + + size_t read(const char * bytes, size_t bytesLen, const CharSink &); + + private: + const bool convertRequired; + const char * encoding; + char * buf; + size_t bufLen; +}; + -- cgit v1.2.3