summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrandomdan <randomdan@localhost>2010-11-30 20:19:07 +0000
committerrandomdan <randomdan@localhost>2010-11-30 20:19:07 +0000
commitd145f35a7e2bed53f564d2109d41f4696909ce11 (patch)
treeba6a93237f89a0e5bc679d48ff33445bb7f2ba94
parentTidy up jam stuff (diff)
downloadproject2-d145f35a7e2bed53f564d2109d41f4696909ce11.tar.bz2
project2-d145f35a7e2bed53f564d2109d41f4696909ce11.tar.xz
project2-d145f35a7e2bed53f564d2109d41f4696909ce11.zip
Add an iterator and viewer for XML documents capable of processing huge docs
Add a task for dumping out the values of an iteratation
-rw-r--r--project2/Jamfile.jam11
-rw-r--r--project2/dumpTask.cpp41
-rw-r--r--project2/dumpTask.h19
-rw-r--r--project2/xmlRows.cpp167
-rw-r--r--project2/xmlRows.h54
5 files changed, 291 insertions, 1 deletions
diff --git a/project2/Jamfile.jam b/project2/Jamfile.jam
index c1aeb4c..4a2e6d1 100644
--- a/project2/Jamfile.jam
+++ b/project2/Jamfile.jam
@@ -18,7 +18,7 @@ lib curl : : <name>curl ;
lib p2common :
appEngine.cpp dataSource.cpp environment.cpp fileStarGlibIoChannel.cpp iHaveParameters.cpp
- iterate.cpp paramChecker.cpp perRowValues.cpp presenter.cpp rawView.cpp
+ iterate.cpp paramChecker.cpp perRowValues.cpp presenter.cpp rawView.cpp dumpTask.cpp
sourceObject.cpp task.cpp variables.cpp view.cpp xmlObjectLoader.cpp
sessionClearTask.cpp session.cpp sessionSetTask.cpp
:
@@ -27,6 +27,13 @@ lib p2common :
<library>boost_filesystem
;
+lib p2xml :
+ xmlRows.cpp
+ :
+ <library>../libmisc//misc
+ <library>libxmlpp
+ ;
+
lib p2processes :
procRows.cpp
:
@@ -91,6 +98,7 @@ exe p2web :
<library>p2files
<library>p2regex
<library>p2processes
+ <library>p2xml
<library>p2xmlSession
<library>fcgi++
<library>fcgi
@@ -106,6 +114,7 @@ exe p2console :
<library>p2sql
<library>p2mail
<library>p2regex
+ <library>p2xml
;
explicit install ;
diff --git a/project2/dumpTask.cpp b/project2/dumpTask.cpp
new file mode 100644
index 0000000..1e94e65
--- /dev/null
+++ b/project2/dumpTask.cpp
@@ -0,0 +1,41 @@
+#include "dumpTask.h"
+#include "perRowValues.h"
+#include <boost/foreach.hpp>
+#include <stdio.h>
+
+ElementLoaderImpl<_DumpTask> dumptaskLoader("dumptask");
+
+_DumpTask::_DumpTask(const xmlpp::Element * p) :
+ _SourceObject(p),
+ _Task(p)
+{
+}
+
+_DumpTask::~_DumpTask()
+{
+}
+
+void
+_DumpTask::loadComplete()
+{
+}
+
+void
+_DumpTask::execute() const
+{
+ const PerRowValues::RowValuesStack::value_type & r = PerRowValues::Stack().back();
+ unsigned int cols = r->columnCount();
+ for (unsigned int c = 0; c < cols; c += 1) {
+ if (c > 0) {
+ fprintf(stderr, ", ");
+ }
+ try {
+ fprintf(stderr, "%s = '%s'", r->getColumnName(c).c_str(), r->getCurrentValue(c).c_str());
+ }
+ catch (const PerRowValues::FieldDoesNotExist &) {
+ fprintf(stderr, "%s = null", r->getColumnName(c).c_str());
+ }
+ }
+ fprintf(stderr, "\n");
+}
+
diff --git a/project2/dumpTask.h b/project2/dumpTask.h
new file mode 100644
index 0000000..ec004c4
--- /dev/null
+++ b/project2/dumpTask.h
@@ -0,0 +1,19 @@
+#ifndef DUMPTASK_H
+#define DUMPTASK_H
+
+#include "task.h"
+
+class _DumpTask : public _Task {
+ public:
+ _DumpTask(const xmlpp::Element * p);
+ virtual ~_DumpTask();
+ virtual void loadComplete();
+ virtual void execute() const;
+
+};
+typedef boost::intrusive_ptr<_DumpTask> DumpTask;
+typedef std::map<std::string, DumpTask> DumpTasks;
+
+#endif
+
+
diff --git a/project2/xmlRows.cpp b/project2/xmlRows.cpp
new file mode 100644
index 0000000..8ed912e
--- /dev/null
+++ b/project2/xmlRows.cpp
@@ -0,0 +1,167 @@
+#include "xmlRows.h"
+#include "xml.h"
+#include <libxml++/nodes/textnode.h>
+#include <libxml/xmlreader.h>
+#include <stdexcept>
+#include "xmlObjectLoader.h"
+#include "appEngine.h"
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/join.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+
+ElementLoaderImpl<_XmlIterate> xmliterateLoader("xmliterate");
+ElementLoaderImpl<_XmlView> xmlviewLoader("xmlview");
+
+_XmlRows::_XmlRows(const xmlpp::Element * p) :
+ recordRoot(p->get_attribute_value("recordroot")),
+ recordTrigger(p->get_attribute_value("recordtrigger")),
+ filename(p->get_attribute_value("filename")),
+ anyInterestingAttributes(false)
+{
+ typedef boost::split_iterator<Glib::ustring::iterator> ssi;
+
+ boost::split(root, recordRoot, boost::is_any_of("/"));
+ boost::split(trigger, recordTrigger, boost::is_any_of("/"));
+
+ BOOST_FOREACH(const xmlpp::Node * node, p->find("fields/field")) {
+ const xmlpp::Element * elem = dynamic_cast<const xmlpp::Element *>(node);
+ if (elem) {
+ Path p(root);
+ Glib::ustring path(elem->get_child_text()->get_content());
+
+ for(ssi It = make_split_iterator(path, first_finder("/", boost::is_equal())); It!=ssi(); ++It) {
+ if (It->front() == '@') {
+ anyInterestingAttributes = true;
+ }
+ p.push_back(Glib::ustring(It->begin(), It->end()));
+ }
+
+ fields[p] = elem->get_attribute_value("name");
+ }
+ }
+ BOOST_FOREACH(const Interests::value_type & v, fields) {
+ fieldNames.push_back(v.second);
+ }
+}
+
+_XmlRows::~_XmlRows()
+{
+}
+
+ void
+_XmlRows::loadComplete()
+{
+}
+
+const Glib::ustring &
+_XmlRows::getCurrentValue(const Glib::ustring & id) const
+{
+ Values::const_iterator i = values.find(id);
+ if (i == values.end()) {
+ throw FieldDoesNotExist();
+ }
+ return i->second;
+}
+
+const Glib::ustring &
+_XmlRows::getCurrentValue(unsigned int col) const
+{
+ Values::const_iterator i = values.find(fieldNames[col]);
+ if (i == values.end()) {
+ throw FieldDoesNotExist();
+ }
+ return i->second;
+}
+
+bool
+_XmlRows::isNull(unsigned int col) const
+{
+ return (values.find(fieldNames[col]) == values.end());
+}
+
+unsigned int
+_XmlRows::columnCount() const
+{
+ return fields.size();
+}
+
+const Glib::ustring &
+_XmlRows::getColumnName(unsigned int col) const
+{
+ return fieldNames[col];
+}
+
+void
+store(const _XmlRows::Path & position, _XmlRows::Values & values, const _XmlRows::Interests & fields, const xmlChar * val)
+{
+ _XmlRows::Interests::const_iterator i = fields.find(position);
+ if (i != fields.end()) {
+ values[i->second] = (const char *)val;
+ }
+}
+
+void
+_XmlRows::execute() const
+{
+ xmlTextReaderPtr reader = xmlReaderForFile(filename.c_str(), NULL, 0);
+ if (reader == NULL) {
+ throw std::runtime_error("Failed to open file");
+ }
+
+ Path position;
+ bool enableCapture = false;
+ while (xmlTextReaderRead(reader) == 1) {
+ switch (xmlTextReaderNodeType(reader)) {
+ case XML_READER_TYPE_ELEMENT:
+ {
+ position.push_back((const char *)xmlTextReaderConstName(reader));
+ enableCapture = (boost::algorithm::starts_with(position, root));
+ bool empty = xmlTextReaderIsEmptyElement(reader);
+ if (enableCapture && anyInterestingAttributes && xmlTextReaderHasAttributes(reader)) {
+ while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
+ Path p(position);
+ std::string attr("@");
+ attr += (const char *)xmlTextReaderConstName(reader);
+ p.push_back(attr);
+ store(p, values, fields, xmlTextReaderConstValue(reader));
+ }
+ }
+ if (empty) {
+ if (position == trigger) {
+ rowReady();
+ }
+ if (position == root) {
+ values.clear();
+ }
+ position.pop_back();
+ }
+ }
+ break;
+ case XML_READER_TYPE_TEXT:
+ if (enableCapture) {
+ store(position, values, fields, xmlTextReaderConstValue(reader));
+ }
+ break;
+ case XML_READER_TYPE_END_ELEMENT:
+ if (enableCapture) {
+ if (position == trigger) {
+ rowReady();
+ }
+ if (position == root) {
+ values.clear();
+ }
+ }
+ position.pop_back();
+ break;
+ }
+ }
+ xmlFreeTextReader(reader);
+ xmlCleanupParser();
+}
+
+#include "view.hpp"
+template class _GenericView<_XmlRows>;
+#include "iterate.hpp"
+template class _GenericIterate<_XmlRows>;
+
diff --git a/project2/xmlRows.h b/project2/xmlRows.h
new file mode 100644
index 0000000..8b19e9a
--- /dev/null
+++ b/project2/xmlRows.h
@@ -0,0 +1,54 @@
+#ifndef XMLITERATE_H
+#define XMLITERATE_H
+
+#include <libxml++/nodes/element.h>
+#include <boost/intrusive_ptr.hpp>
+#include <map>
+#include "iterate.h"
+#include "view.h"
+
+class ApplicationEngine;
+
+class _XmlRows : public PerRowValues {
+ public:
+ typedef std::map<Glib::ustring, Glib::ustring> Values;
+ typedef std::vector<std::string> Path;
+ typedef std::map<Path, Glib::ustring> Interests;
+
+ _XmlRows(const xmlpp::Element * p);
+ ~_XmlRows();
+
+ void execute() const;
+ virtual void loadComplete();
+ unsigned int columnCount() const;
+ const Glib::ustring & getColumnName(unsigned int col) const;
+ const Glib::ustring & getCurrentValue(const Glib::ustring & id) const;
+ const Glib::ustring & getCurrentValue(unsigned int col) const;
+ bool isNull(unsigned int col) const;
+ virtual void rowReady() const = 0;
+
+ const Glib::ustring recordRoot;
+ const Glib::ustring recordTrigger;
+ const std::string filename;
+
+ private:
+ mutable Values values;
+
+ Path root;
+ Path trigger;
+ Interests fields;
+ bool anyInterestingAttributes;
+ std::vector<Glib::ustring> fieldNames;
+};
+typedef boost::intrusive_ptr<_XmlRows> XmlRows;
+
+typedef _GenericView<_XmlRows> _XmlView;
+typedef boost::intrusive_ptr<_XmlView> XmlView;
+typedef std::map<std::string, XmlView> XmlViews;
+
+typedef _GenericIterate<_XmlRows> _XmlIterate;
+typedef boost::intrusive_ptr<_XmlIterate> XmlIterate;
+typedef std::map<std::string, XmlIterate> XmlIterates;
+
+#endif
+