diff options
-rw-r--r-- | project2/streams/streamNvpRows.cpp | 183 |
1 files changed, 183 insertions, 0 deletions
diff --git a/project2/streams/streamNvpRows.cpp b/project2/streams/streamNvpRows.cpp new file mode 100644 index 0000000..895826a --- /dev/null +++ b/project2/streams/streamNvpRows.cpp @@ -0,0 +1,183 @@ +#include <pch.hpp> +#include "variables.h" +#include "scopeObject.h" +#include "stream.h" +#include "definedColumns.h" +#include "rowProcessor.h" +#include "textReader.h" +#include <boost/algorithm/string/predicate.hpp> +#include <boost/algorithm/string/trim.hpp> + +class RowProcessor; + +/// Base class for Project2 components that create a row set based on the contents of a byte stream +class StreamNvpRows : public RowSet { + public: + class ParseState : public RowState { + public: + typedef boost::function<bool (size_t start)> Process; + + class Token { + public: + Token(const Glib::ustring cs, Process p) : + chars(cs), + process(p) + { + } + bool operator<(const Token & other) const { + return (other.chars.length() < this->chars.length() || + (other.chars.length() == this->chars.length() && this->chars < other.chars)); + } + const Glib::ustring chars; + const Process process; + mutable size_t firstMatch; + }; + + ParseState(const StreamNvpRows * rows, const RowProcessor * proc) : + sr(rows), + rp(proc), + inQuotes(false), + inValue(false), + prevWasQuote(false) + { + tokens.insert(Token(sr->newline, boost::bind(&StreamNvpRows::ParseState::newRecord, this, _1))); + tokens.insert(Token(sr->assign, boost::bind(&StreamNvpRows::ParseState::newField, this, _1))); + tokens.insert(Token(sr->fieldSep, boost::bind(&StreamNvpRows::ParseState::newValue, this, _1))); + if (!sr->quoteChar.empty()) { + tokens.insert(Token(sr->quoteChar, boost::bind(&StreamNvpRows::ParseState::quote, this, _1))); + } + } + + ~ParseState() + { + if (!std::uncaught_exception()) { + newRecord(tok.length()); + } + } + const Columns & getColumns() const { + return columns; + } + + void pushChar(gunichar c) + { + tok += c; + if (inQuotes) { + if (boost::algorithm::ends_with(tok, sr->quoteChar)) { + quote(tok.length() - sr->quoteChar.length()); + } + else { + prevWasQuote = false; + } + } + else { + if (tok.length() >= tokens.begin()->chars.length()) { + BOOST_FOREACH(auto & t, tokens) { + t.firstMatch = tok.rfind(t.chars); + if (t.firstMatch < tok.length() - tokens.begin()->chars.length()) { + t.firstMatch = -1; + } + } + Tokens::const_iterator t = std::min_element(tokens.begin(), tokens.end(), + [](const Token & a, const Token & b) { return a.firstMatch < b.firstMatch; }); + if (t->firstMatch != (size_t)-1) { + if (t->process(t->firstMatch)) { + tok = tok.substr(t->firstMatch + t->chars.length()); + } + } + } + } + } + + private: + bool newRecord(size_t start) { + if (start > 0) { + if (inValue) { + newValue(start); + } + else { + newField(start); + } + } + if (!columns.empty()) { + process(rp); + } + fields.clear(); + columns.clear(); + return true; + } + + bool newField(size_t start) { + columns.insert(new Column(columns.size(), + boost::algorithm::trim_copy_if(tok.substr(0, start), g_unichar_isspace))); + fields.push_back(Null()); + inValue = true; + return true; + } + + bool newValue(size_t start) { + fields.back() = + boost::algorithm::trim_copy_if(tok.substr(0, start), g_unichar_isspace); + inValue = false; + return true; + } + + bool quote(size_t start) { + if (prevWasQuote) { + prevWasQuote = false; + inQuotes = !inQuotes; + } + else { + prevWasQuote = inQuotes; + inQuotes = !inQuotes; + tok.erase(start, sr->quoteChar.length()); + } + return false; + } + + const StreamNvpRows * sr; + const RowProcessor * rp; + bool inQuotes; + bool inValue; + bool prevWasQuote; + Glib::ustring tok; + mutable Columns columns; + + typedef std::set<Token> Tokens; + Tokens tokens; + + }; + + StreamNvpRows(ScriptNodePtr p) : + RowSet(p), + fieldSep(p->value("fieldSep", ",").as<Glib::ustring>()), + quoteChar(p->value("quoteChar", "\"").as<Glib::ustring>()), + keepBlankRows(p->value("keepBlankRows", false)), + countBlankRows(p->value("countBlankRows", false)), + newline(p->value("newline", "\n").as<Glib::ustring>()), + assign(p->value("assign", "=").as<Glib::ustring>()), + encoding(p->value("encoding", "utf-8").as<std::string>()) + { + p->script->loader.addLoadTarget(p, Storer::into<ElementLoader>(&stream)); + } + + void execute(const Glib::ustring &, const RowProcessor * rp) const + { + ParseState ps(this, rp); + TextReader::CharSink cs = boost::bind(&StreamNvpRows::ParseState::pushChar, &ps, _1); + TextReader t(encoding.c_str()); + stream->runStream(boost::bind(&TextReader::read, &t, _1, _2, cs)); + } + + private: + StreamPtr stream; + const Glib::ustring fieldSep; + const Glib::ustring quoteChar; + const bool keepBlankRows; + const bool countBlankRows; + const Glib::ustring newline; + const Glib::ustring assign; + const std::string encoding; + bool convertRequired; +}; +DECLARE_LOADER("streamnvprows", StreamNvpRows); + |