A stream processing module that reads sets of name=value pairs

author: randomdan <randomdan@localhost> 2013-03-28 00:12:42 +0000
committer: randomdan <randomdan@localhost> 2013-03-28 00:12:42 +0000
commit: d602b562737fae286e96bdc248765085490b9675 (patch)
tree: f91847e6e038dc4776d6161fd7f879530f604630
parent: Adds a presenter loader that performs basic HTTP content negotiation (diff)
download: project2-d602b562737fae286e96bdc248765085490b9675.tar.bz2
project2-d602b562737fae286e96bdc248765085490b9675.tar.xz
project2-d602b562737fae286e96bdc248765085490b9675.zip
1 files changed, 183 insertions, 0 deletions
diff --git a/project2/streams/streamNvpRows.cpp b/project2/streams/streamNvpRows.cpp
new file mode 100644
index 0000000..895826a
--- /dev/null
+++ b/project2/streams/streamNvpRows.cpp
@@ -0,0 +1,183 @@
+#include <pch.hpp>
+#include "variables.h"
+#include "scopeObject.h"
+#include "stream.h"
+#include "definedColumns.h"
+#include "rowProcessor.h"
+#include "textReader.h"
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string/trim.hpp>
+
+class RowProcessor;
+
+/// Base class for Project2 components that create a row set based on the contents of a byte stream
+class StreamNvpRows : public RowSet {
+	public:
+		class ParseState : public RowState {
+			public:
+				typedef boost::function<bool (size_t start)> Process;
+
+				class Token {
+					public:
+						Token(const Glib::ustring cs, Process p) :
+							chars(cs),
+							process(p)
+						{
+						}
+						bool operator<(const Token & other) const {
+							return (other.chars.length() < this->chars.length() ||
+									(other.chars.length() == this->chars.length() && this->chars < other.chars));
+						}
+						const Glib::ustring chars;
+						const Process process;
+						mutable size_t firstMatch;
+				};
+
+				ParseState(const StreamNvpRows * rows, const RowProcessor * proc) :
+					sr(rows),
+					rp(proc),
+					inQuotes(false),
+					inValue(false),
+					prevWasQuote(false)
+				{
+					tokens.insert(Token(sr->newline, boost::bind(&StreamNvpRows::ParseState::newRecord, this, _1)));
+					tokens.insert(Token(sr->assign, boost::bind(&StreamNvpRows::ParseState::newField, this, _1)));
+					tokens.insert(Token(sr->fieldSep, boost::bind(&StreamNvpRows::ParseState::newValue, this, _1)));
+					if (!sr->quoteChar.empty()) {
+						tokens.insert(Token(sr->quoteChar, boost::bind(&StreamNvpRows::ParseState::quote, this, _1)));
+					}
+				}
+
+				~ParseState()
+				{
+					if (!std::uncaught_exception()) {
+						newRecord(tok.length());
+					}
+				}
+				const Columns & getColumns() const {
+					return columns;
+				}
+
+				void pushChar(gunichar c)
+				{
+					tok += c;
+					if (inQuotes) {
+						if (boost::algorithm::ends_with(tok, sr->quoteChar)) {
+							quote(tok.length() - sr->quoteChar.length());
+						}
+						else {
+							prevWasQuote = false;
+						}
+					}
+					else {
+						if (tok.length() >= tokens.begin()->chars.length()) {
+							BOOST_FOREACH(auto & t, tokens) {
+								t.firstMatch = tok.rfind(t.chars);
+								if (t.firstMatch < tok.length() - tokens.begin()->chars.length()) {
+									t.firstMatch = -1;
+								}
+							}
+							Tokens::const_iterator t  = std::min_element(tokens.begin(), tokens.end(),
+									[](const Token & a, const Token & b) { return a.firstMatch < b.firstMatch; });
+							if (t->firstMatch != (size_t)-1) {
+								if (t->process(t->firstMatch)) {
+									tok = tok.substr(t->firstMatch + t->chars.length());
+								}
+							}
+						}
+					}
+				}
+
+			private:
+				bool newRecord(size_t start) {
+					if (start > 0) {
+						if (inValue) {
+							newValue(start);
+						}
+						else {
+							newField(start);
+						}
+					}
+					if (!columns.empty()) {
+						process(rp);
+					}
+					fields.clear();
+					columns.clear();
+					return true;
+				}
+
+				bool newField(size_t start) {
+					columns.insert(new Column(columns.size(),
+								boost::algorithm::trim_copy_if(tok.substr(0, start), g_unichar_isspace)));
+					fields.push_back(Null());
+					inValue = true;
+					return true;
+				}
+
+				bool newValue(size_t start) {
+					fields.back() =
+						boost::algorithm::trim_copy_if(tok.substr(0, start), g_unichar_isspace);
+					inValue = false;
+					return true;
+				}
+
+				bool quote(size_t start) {
+					if (prevWasQuote) {
+						prevWasQuote = false;
+						inQuotes = !inQuotes;
+					}
+					else {
+						prevWasQuote = inQuotes;
+						inQuotes = !inQuotes;
+						tok.erase(start, sr->quoteChar.length());
+					}
+					return false;
+				}
+
+				const StreamNvpRows * sr;
+				const RowProcessor * rp;
+				bool inQuotes;
+				bool inValue;
+				bool prevWasQuote;
+				Glib::ustring tok;
+				mutable Columns columns;
+
+				typedef std::set<Token> Tokens;
+				Tokens tokens;
+
+		};
+
+		StreamNvpRows(ScriptNodePtr p) :
+			RowSet(p),
+			fieldSep(p->value("fieldSep", ",").as<Glib::ustring>()),
+			quoteChar(p->value("quoteChar", "\"").as<Glib::ustring>()),
+			keepBlankRows(p->value("keepBlankRows", false)),
+			countBlankRows(p->value("countBlankRows", false)),
+			newline(p->value("newline", "\n").as<Glib::ustring>()),
+			assign(p->value("assign", "=").as<Glib::ustring>()),
+			encoding(p->value("encoding", "utf-8").as<std::string>())
+		{
+			p->script->loader.addLoadTarget(p, Storer::into<ElementLoader>(&stream));
+		}
+
+		void execute(const Glib::ustring &, const RowProcessor * rp) const
+		{
+			ParseState ps(this, rp);
+			TextReader::CharSink cs = boost::bind(&StreamNvpRows::ParseState::pushChar, &ps, _1);
+			TextReader t(encoding.c_str());
+			stream->runStream(boost::bind(&TextReader::read, &t, _1, _2, cs));
+		}
+
+	private:
+		StreamPtr stream;
+		const Glib::ustring fieldSep;
+		const Glib::ustring quoteChar;
+		const bool keepBlankRows;
+		const bool countBlankRows;
+		const Glib::ustring newline;
+		const Glib::ustring assign;
+		const std::string encoding;
+		bool convertRequired;
+};
+DECLARE_LOADER("streamnvprows", StreamNvpRows);
+
author	randomdan <randomdan@localhost>	2013-03-28 00:12:42 +0000
committer	randomdan <randomdan@localhost>	2013-03-28 00:12:42 +0000
commit	d602b562737fae286e96bdc248765085490b9675 (patch)
tree	f91847e6e038dc4776d6161fd7f879530f604630
parent	Adds a presenter loader that performs basic HTTP content negotiation (diff)
download	project2-d602b562737fae286e96bdc248765085490b9675.tar.bz2 project2-d602b562737fae286e96bdc248765085490b9675.tar.xz project2-d602b562737fae286e96bdc248765085490b9675.zip