From 6ee876e1d6901027d6f86c64690cc321d343c8c3 Mon Sep 17 00:00:00 2001 From: randomdan Date: Tue, 30 Nov 2010 20:19:07 +0000 Subject: Add an iterator and viewer for XML documents capable of processing huge docs Add a task for dumping out the values of an iteratation --- project2/Jamfile.jam | 11 +++- project2/dumpTask.cpp | 41 +++++++++++++ project2/dumpTask.h | 19 ++++++ project2/xmlRows.cpp | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++ project2/xmlRows.h | 54 ++++++++++++++++ 5 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 project2/dumpTask.cpp create mode 100644 project2/dumpTask.h create mode 100644 project2/xmlRows.cpp create mode 100644 project2/xmlRows.h diff --git a/project2/Jamfile.jam b/project2/Jamfile.jam index c1aeb4c..4a2e6d1 100644 --- a/project2/Jamfile.jam +++ b/project2/Jamfile.jam @@ -18,7 +18,7 @@ lib curl : : curl ; lib p2common : appEngine.cpp dataSource.cpp environment.cpp fileStarGlibIoChannel.cpp iHaveParameters.cpp - iterate.cpp paramChecker.cpp perRowValues.cpp presenter.cpp rawView.cpp + iterate.cpp paramChecker.cpp perRowValues.cpp presenter.cpp rawView.cpp dumpTask.cpp sourceObject.cpp task.cpp variables.cpp view.cpp xmlObjectLoader.cpp sessionClearTask.cpp session.cpp sessionSetTask.cpp : @@ -27,6 +27,13 @@ lib p2common : boost_filesystem ; +lib p2xml : + xmlRows.cpp + : + ../libmisc//misc + libxmlpp + ; + lib p2processes : procRows.cpp : @@ -91,6 +98,7 @@ exe p2web : p2files p2regex p2processes + p2xml p2xmlSession fcgi++ fcgi @@ -106,6 +114,7 @@ exe p2console : p2sql p2mail p2regex + p2xml ; explicit install ; diff --git a/project2/dumpTask.cpp b/project2/dumpTask.cpp new file mode 100644 index 0000000..1e94e65 --- /dev/null +++ b/project2/dumpTask.cpp @@ -0,0 +1,41 @@ +#include "dumpTask.h" +#include "perRowValues.h" +#include +#include + +ElementLoaderImpl<_DumpTask> dumptaskLoader("dumptask"); + +_DumpTask::_DumpTask(const xmlpp::Element * p) : + _SourceObject(p), + _Task(p) +{ +} + +_DumpTask::~_DumpTask() +{ +} + +void +_DumpTask::loadComplete() +{ +} + +void +_DumpTask::execute() const +{ + const PerRowValues::RowValuesStack::value_type & r = PerRowValues::Stack().back(); + unsigned int cols = r->columnCount(); + for (unsigned int c = 0; c < cols; c += 1) { + if (c > 0) { + fprintf(stderr, ", "); + } + try { + fprintf(stderr, "%s = '%s'", r->getColumnName(c).c_str(), r->getCurrentValue(c).c_str()); + } + catch (const PerRowValues::FieldDoesNotExist &) { + fprintf(stderr, "%s = null", r->getColumnName(c).c_str()); + } + } + fprintf(stderr, "\n"); +} + diff --git a/project2/dumpTask.h b/project2/dumpTask.h new file mode 100644 index 0000000..ec004c4 --- /dev/null +++ b/project2/dumpTask.h @@ -0,0 +1,19 @@ +#ifndef DUMPTASK_H +#define DUMPTASK_H + +#include "task.h" + +class _DumpTask : public _Task { + public: + _DumpTask(const xmlpp::Element * p); + virtual ~_DumpTask(); + virtual void loadComplete(); + virtual void execute() const; + +}; +typedef boost::intrusive_ptr<_DumpTask> DumpTask; +typedef std::map DumpTasks; + +#endif + + diff --git a/project2/xmlRows.cpp b/project2/xmlRows.cpp new file mode 100644 index 0000000..8ed912e --- /dev/null +++ b/project2/xmlRows.cpp @@ -0,0 +1,167 @@ +#include "xmlRows.h" +#include "xml.h" +#include +#include +#include +#include "xmlObjectLoader.h" +#include "appEngine.h" +#include +#include +#include +#include + +ElementLoaderImpl<_XmlIterate> xmliterateLoader("xmliterate"); +ElementLoaderImpl<_XmlView> xmlviewLoader("xmlview"); + +_XmlRows::_XmlRows(const xmlpp::Element * p) : + recordRoot(p->get_attribute_value("recordroot")), + recordTrigger(p->get_attribute_value("recordtrigger")), + filename(p->get_attribute_value("filename")), + anyInterestingAttributes(false) +{ + typedef boost::split_iterator ssi; + + boost::split(root, recordRoot, boost::is_any_of("/")); + boost::split(trigger, recordTrigger, boost::is_any_of("/")); + + BOOST_FOREACH(const xmlpp::Node * node, p->find("fields/field")) { + const xmlpp::Element * elem = dynamic_cast(node); + if (elem) { + Path p(root); + Glib::ustring path(elem->get_child_text()->get_content()); + + for(ssi It = make_split_iterator(path, first_finder("/", boost::is_equal())); It!=ssi(); ++It) { + if (It->front() == '@') { + anyInterestingAttributes = true; + } + p.push_back(Glib::ustring(It->begin(), It->end())); + } + + fields[p] = elem->get_attribute_value("name"); + } + } + BOOST_FOREACH(const Interests::value_type & v, fields) { + fieldNames.push_back(v.second); + } +} + +_XmlRows::~_XmlRows() +{ +} + + void +_XmlRows::loadComplete() +{ +} + +const Glib::ustring & +_XmlRows::getCurrentValue(const Glib::ustring & id) const +{ + Values::const_iterator i = values.find(id); + if (i == values.end()) { + throw FieldDoesNotExist(); + } + return i->second; +} + +const Glib::ustring & +_XmlRows::getCurrentValue(unsigned int col) const +{ + Values::const_iterator i = values.find(fieldNames[col]); + if (i == values.end()) { + throw FieldDoesNotExist(); + } + return i->second; +} + +bool +_XmlRows::isNull(unsigned int col) const +{ + return (values.find(fieldNames[col]) == values.end()); +} + +unsigned int +_XmlRows::columnCount() const +{ + return fields.size(); +} + +const Glib::ustring & +_XmlRows::getColumnName(unsigned int col) const +{ + return fieldNames[col]; +} + +void +store(const _XmlRows::Path & position, _XmlRows::Values & values, const _XmlRows::Interests & fields, const xmlChar * val) +{ + _XmlRows::Interests::const_iterator i = fields.find(position); + if (i != fields.end()) { + values[i->second] = (const char *)val; + } +} + +void +_XmlRows::execute() const +{ + xmlTextReaderPtr reader = xmlReaderForFile(filename.c_str(), NULL, 0); + if (reader == NULL) { + throw std::runtime_error("Failed to open file"); + } + + Path position; + bool enableCapture = false; + while (xmlTextReaderRead(reader) == 1) { + switch (xmlTextReaderNodeType(reader)) { + case XML_READER_TYPE_ELEMENT: + { + position.push_back((const char *)xmlTextReaderConstName(reader)); + enableCapture = (boost::algorithm::starts_with(position, root)); + bool empty = xmlTextReaderIsEmptyElement(reader); + if (enableCapture && anyInterestingAttributes && xmlTextReaderHasAttributes(reader)) { + while (xmlTextReaderMoveToNextAttribute(reader) == 1) { + Path p(position); + std::string attr("@"); + attr += (const char *)xmlTextReaderConstName(reader); + p.push_back(attr); + store(p, values, fields, xmlTextReaderConstValue(reader)); + } + } + if (empty) { + if (position == trigger) { + rowReady(); + } + if (position == root) { + values.clear(); + } + position.pop_back(); + } + } + break; + case XML_READER_TYPE_TEXT: + if (enableCapture) { + store(position, values, fields, xmlTextReaderConstValue(reader)); + } + break; + case XML_READER_TYPE_END_ELEMENT: + if (enableCapture) { + if (position == trigger) { + rowReady(); + } + if (position == root) { + values.clear(); + } + } + position.pop_back(); + break; + } + } + xmlFreeTextReader(reader); + xmlCleanupParser(); +} + +#include "view.hpp" +template class _GenericView<_XmlRows>; +#include "iterate.hpp" +template class _GenericIterate<_XmlRows>; + diff --git a/project2/xmlRows.h b/project2/xmlRows.h new file mode 100644 index 0000000..8b19e9a --- /dev/null +++ b/project2/xmlRows.h @@ -0,0 +1,54 @@ +#ifndef XMLITERATE_H +#define XMLITERATE_H + +#include +#include +#include +#include "iterate.h" +#include "view.h" + +class ApplicationEngine; + +class _XmlRows : public PerRowValues { + public: + typedef std::map Values; + typedef std::vector Path; + typedef std::map Interests; + + _XmlRows(const xmlpp::Element * p); + ~_XmlRows(); + + void execute() const; + virtual void loadComplete(); + unsigned int columnCount() const; + const Glib::ustring & getColumnName(unsigned int col) const; + const Glib::ustring & getCurrentValue(const Glib::ustring & id) const; + const Glib::ustring & getCurrentValue(unsigned int col) const; + bool isNull(unsigned int col) const; + virtual void rowReady() const = 0; + + const Glib::ustring recordRoot; + const Glib::ustring recordTrigger; + const std::string filename; + + private: + mutable Values values; + + Path root; + Path trigger; + Interests fields; + bool anyInterestingAttributes; + std::vector fieldNames; +}; +typedef boost::intrusive_ptr<_XmlRows> XmlRows; + +typedef _GenericView<_XmlRows> _XmlView; +typedef boost::intrusive_ptr<_XmlView> XmlView; +typedef std::map XmlViews; + +typedef _GenericIterate<_XmlRows> _XmlIterate; +typedef boost::intrusive_ptr<_XmlIterate> XmlIterate; +typedef std::map XmlIterates; + +#endif + -- cgit v1.2.3