summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--project2/streams/streamNvpRows.cpp183
1 files changed, 183 insertions, 0 deletions
diff --git a/project2/streams/streamNvpRows.cpp b/project2/streams/streamNvpRows.cpp
new file mode 100644
index 0000000..895826a
--- /dev/null
+++ b/project2/streams/streamNvpRows.cpp
@@ -0,0 +1,183 @@
+#include <pch.hpp>
+#include "variables.h"
+#include "scopeObject.h"
+#include "stream.h"
+#include "definedColumns.h"
+#include "rowProcessor.h"
+#include "textReader.h"
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string/trim.hpp>
+
+class RowProcessor;
+
+/// Base class for Project2 components that create a row set based on the contents of a byte stream
+class StreamNvpRows : public RowSet {
+ public:
+ class ParseState : public RowState {
+ public:
+ typedef boost::function<bool (size_t start)> Process;
+
+ class Token {
+ public:
+ Token(const Glib::ustring cs, Process p) :
+ chars(cs),
+ process(p)
+ {
+ }
+ bool operator<(const Token & other) const {
+ return (other.chars.length() < this->chars.length() ||
+ (other.chars.length() == this->chars.length() && this->chars < other.chars));
+ }
+ const Glib::ustring chars;
+ const Process process;
+ mutable size_t firstMatch;
+ };
+
+ ParseState(const StreamNvpRows * rows, const RowProcessor * proc) :
+ sr(rows),
+ rp(proc),
+ inQuotes(false),
+ inValue(false),
+ prevWasQuote(false)
+ {
+ tokens.insert(Token(sr->newline, boost::bind(&StreamNvpRows::ParseState::newRecord, this, _1)));
+ tokens.insert(Token(sr->assign, boost::bind(&StreamNvpRows::ParseState::newField, this, _1)));
+ tokens.insert(Token(sr->fieldSep, boost::bind(&StreamNvpRows::ParseState::newValue, this, _1)));
+ if (!sr->quoteChar.empty()) {
+ tokens.insert(Token(sr->quoteChar, boost::bind(&StreamNvpRows::ParseState::quote, this, _1)));
+ }
+ }
+
+ ~ParseState()
+ {
+ if (!std::uncaught_exception()) {
+ newRecord(tok.length());
+ }
+ }
+ const Columns & getColumns() const {
+ return columns;
+ }
+
+ void pushChar(gunichar c)
+ {
+ tok += c;
+ if (inQuotes) {
+ if (boost::algorithm::ends_with(tok, sr->quoteChar)) {
+ quote(tok.length() - sr->quoteChar.length());
+ }
+ else {
+ prevWasQuote = false;
+ }
+ }
+ else {
+ if (tok.length() >= tokens.begin()->chars.length()) {
+ BOOST_FOREACH(auto & t, tokens) {
+ t.firstMatch = tok.rfind(t.chars);
+ if (t.firstMatch < tok.length() - tokens.begin()->chars.length()) {
+ t.firstMatch = -1;
+ }
+ }
+ Tokens::const_iterator t = std::min_element(tokens.begin(), tokens.end(),
+ [](const Token & a, const Token & b) { return a.firstMatch < b.firstMatch; });
+ if (t->firstMatch != (size_t)-1) {
+ if (t->process(t->firstMatch)) {
+ tok = tok.substr(t->firstMatch + t->chars.length());
+ }
+ }
+ }
+ }
+ }
+
+ private:
+ bool newRecord(size_t start) {
+ if (start > 0) {
+ if (inValue) {
+ newValue(start);
+ }
+ else {
+ newField(start);
+ }
+ }
+ if (!columns.empty()) {
+ process(rp);
+ }
+ fields.clear();
+ columns.clear();
+ return true;
+ }
+
+ bool newField(size_t start) {
+ columns.insert(new Column(columns.size(),
+ boost::algorithm::trim_copy_if(tok.substr(0, start), g_unichar_isspace)));
+ fields.push_back(Null());
+ inValue = true;
+ return true;
+ }
+
+ bool newValue(size_t start) {
+ fields.back() =
+ boost::algorithm::trim_copy_if(tok.substr(0, start), g_unichar_isspace);
+ inValue = false;
+ return true;
+ }
+
+ bool quote(size_t start) {
+ if (prevWasQuote) {
+ prevWasQuote = false;
+ inQuotes = !inQuotes;
+ }
+ else {
+ prevWasQuote = inQuotes;
+ inQuotes = !inQuotes;
+ tok.erase(start, sr->quoteChar.length());
+ }
+ return false;
+ }
+
+ const StreamNvpRows * sr;
+ const RowProcessor * rp;
+ bool inQuotes;
+ bool inValue;
+ bool prevWasQuote;
+ Glib::ustring tok;
+ mutable Columns columns;
+
+ typedef std::set<Token> Tokens;
+ Tokens tokens;
+
+ };
+
+ StreamNvpRows(ScriptNodePtr p) :
+ RowSet(p),
+ fieldSep(p->value("fieldSep", ",").as<Glib::ustring>()),
+ quoteChar(p->value("quoteChar", "\"").as<Glib::ustring>()),
+ keepBlankRows(p->value("keepBlankRows", false)),
+ countBlankRows(p->value("countBlankRows", false)),
+ newline(p->value("newline", "\n").as<Glib::ustring>()),
+ assign(p->value("assign", "=").as<Glib::ustring>()),
+ encoding(p->value("encoding", "utf-8").as<std::string>())
+ {
+ p->script->loader.addLoadTarget(p, Storer::into<ElementLoader>(&stream));
+ }
+
+ void execute(const Glib::ustring &, const RowProcessor * rp) const
+ {
+ ParseState ps(this, rp);
+ TextReader::CharSink cs = boost::bind(&StreamNvpRows::ParseState::pushChar, &ps, _1);
+ TextReader t(encoding.c_str());
+ stream->runStream(boost::bind(&TextReader::read, &t, _1, _2, cs));
+ }
+
+ private:
+ StreamPtr stream;
+ const Glib::ustring fieldSep;
+ const Glib::ustring quoteChar;
+ const bool keepBlankRows;
+ const bool countBlankRows;
+ const Glib::ustring newline;
+ const Glib::ustring assign;
+ const std::string encoding;
+ bool convertRequired;
+};
+DECLARE_LOADER("streamnvprows", StreamNvpRows);
+