summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrandomdan <randomdan@localhost>2010-08-10 23:19:58 +0000
committerrandomdan <randomdan@localhost>2010-08-10 23:19:58 +0000
commita4f38b0d7c7b2e465d8840abd508a88eb59d8784 (patch)
tree57608cf4cf6434a65602524c49e3103898a77eed
parentTidy up hierarchy (diff)
downloadproject2-a4f38b0d7c7b2e465d8840abd508a88eb59d8784.tar.bz2
project2-a4f38b0d7c7b2e465d8840abd508a88eb59d8784.tar.xz
project2-a4f38b0d7c7b2e465d8840abd508a88eb59d8784.zip
Minor fixes to text parsers
Add a URL base row implementation
-rw-r--r--project2/Jamfile.jam3
-rw-r--r--project2/fileRows.cpp5
-rw-r--r--project2/iterate.cpp3
-rw-r--r--project2/urlRows.cpp181
-rw-r--r--project2/urlRows.h61
-rw-r--r--project2/view.cpp2
6 files changed, 253 insertions, 2 deletions
diff --git a/project2/Jamfile.jam b/project2/Jamfile.jam
index e5633a9..ac82e15 100644
--- a/project2/Jamfile.jam
+++ b/project2/Jamfile.jam
@@ -14,6 +14,7 @@ lib boost_regex : : <name>boost_regex ;
lib boost_filesystem : : <name>boost_filesystem ;
lib cgicc : : <name>cgicc ;
lib esmtp : : <name>esmtp ;
+lib curl : : <name>curl ;
exe p2web :
libxmlpp
@@ -28,6 +29,7 @@ exe p2web :
<library>odbc
<library>esmtp
<library>cgicc
+ <library>curl
<library>fcgi++
<library>fcgi ;
@@ -43,4 +45,5 @@ exe p2console :
<library>boost_regex
<library>boost_filesystem
<library>odbc
+ <library>curl
<library>esmtp ;
diff --git a/project2/fileRows.cpp b/project2/fileRows.cpp
index 955767c..d9c26bc 100644
--- a/project2/fileRows.cpp
+++ b/project2/fileRows.cpp
@@ -51,10 +51,11 @@ _FileRows::execute() const
if (prevWasQuote) {
tok += c;
prevWasQuote = false;
+ inQuotes = !inQuotes;
}
else {
+ prevWasQuote = inQuotes;
inQuotes = !inQuotes;
- prevWasQuote = true;
}
}
else if ((!inQuotes) && (c == fieldSep)) {
@@ -82,7 +83,7 @@ _FileRows::execute() const
curCol++;
}
}
- if (!mkCols) {
+ if (!mkCols && !values.empty()) {
while (values.size() < columns.size()) {
values.push_back(ValPtr(new Glib::ustring()));
curCol++;
diff --git a/project2/iterate.cpp b/project2/iterate.cpp
index f6d4820..f29328b 100644
--- a/project2/iterate.cpp
+++ b/project2/iterate.cpp
@@ -4,6 +4,7 @@
#include "xmlObjectLoader.h"
#include "sqlRows.h"
#include "fileRows.h"
+#include "urlRows.h"
#include "procRows.h"
#include "task.h"
@@ -27,6 +28,7 @@ _Iterate::AddLoaders(Loaders & l, NoOutputExecutes & iterates)
l.insert(LoadersVT("sqliterate", _LoaderBase::Make<_SqlIterate, _NoOutputExecute, unsigned int, _SourceObject, &_SourceObject::order>(&iterates)));
l.insert(LoadersVT("fileiterate", _LoaderBase::Make<_FileIterate, _NoOutputExecute, unsigned int, _SourceObject, &_SourceObject::order>(&iterates)));
l.insert(LoadersVT("prociterate", _LoaderBase::Make<_ProcIterate, _NoOutputExecute, unsigned int, _SourceObject, &_SourceObject::order>(&iterates)));
+ l.insert(LoadersVT("urliterate", _LoaderBase::Make<_UrlIterate, _NoOutputExecute, unsigned int, _SourceObject, &_SourceObject::order>(&iterates)));
}
void
@@ -35,6 +37,7 @@ _Iterate::AddLoaders(Loaders & l, Iterates & iterates)
l.insert(LoadersVT("sqliterate", _LoaderBase::Make<_SqlIterate, _Iterate, std::string, _SourceObject, &_SourceObject::name>(&iterates)));
l.insert(LoadersVT("fileiterate", _LoaderBase::Make<_FileIterate, _Iterate, std::string, _SourceObject, &_SourceObject::name>(&iterates)));
l.insert(LoadersVT("prociterate", _LoaderBase::Make<_ProcIterate, _Iterate, std::string, _SourceObject, &_SourceObject::name>(&iterates)));
+ l.insert(LoadersVT("urliterate", _LoaderBase::Make<_UrlIterate, _Iterate, std::string, _SourceObject, &_SourceObject::name>(&iterates)));
}
void
diff --git a/project2/urlRows.cpp b/project2/urlRows.cpp
new file mode 100644
index 0000000..ddae45c
--- /dev/null
+++ b/project2/urlRows.cpp
@@ -0,0 +1,181 @@
+#include "urlRows.h"
+#include "../libmisc/curlsup.h"
+#include <stdexcept>
+#include <queue>
+
+_UrlRows::_UrlRows(const xmlpp::Element * p) :
+ url(p->get_attribute_value("url")),
+ fieldSep(p->get_attribute_value("fieldSep")[0]),
+ quoteChar(p->get_attribute_value("quoteChar")[0]),
+ newline(p->get_attribute_value("newline")[0]),
+ encoding(p->get_attribute_value("encoding")),
+ skipheader(atoi(p->get_attribute_value("skipheader").c_str())),
+ inQuotes(false),
+ prevWasQuote(false)
+{
+ BOOST_FOREACH(const xmlpp::Node * node, p->find("columns/column")) {
+ const xmlpp::Element * elem = dynamic_cast<const xmlpp::Element *>(node);
+ if (elem) {
+ columns.push_back(elem->get_child_text()->get_content());
+ }
+ }
+ mkCols = columns.empty();
+}
+
+_UrlRows::~_UrlRows()
+{
+}
+
+unsigned int
+_UrlRows::columnCount() const
+{
+ return columns.size();
+}
+
+const Glib::ustring &
+_UrlRows::getColumnName(unsigned int col) const
+{
+ return columns[col];
+}
+
+const Glib::ustring &
+_UrlRows::getCurrentValue(unsigned int col) const
+{
+ return *values[col];
+}
+
+const Glib::ustring &
+_UrlRows::getCurrentValue(const Glib::ustring & id) const
+{
+ Values::const_iterator v = values.begin();
+ for (Columns::const_iterator i = columns.begin(); i != columns.end(); i++, v++) {
+ if (*i == id) {
+ return **v;
+ }
+ }
+ throw PerRowValues::FieldDoesNotExist();
+}
+
+size_t
+_UrlRows::_handleData(const char * ptr, size_t size, size_t nmemb, void *stream)
+{
+ size_t used = static_cast<const _UrlRows *>(stream)->handleData(ptr, size * nmemb);
+ return used;
+}
+
+size_t
+_UrlRows::handleData(const char * bytes, size_t bytesLen) const
+{
+ size_t used = 0, len = 0;
+ char * utf8 = g_convert(bytes, bytesLen, "utf-8", encoding.c_str(), &used, &len, NULL);
+ Glib::ustring str(utf8);
+ free(utf8);
+
+ BOOST_FOREACH(gunichar c, str) {
+ if (c == newline) {
+ if (skipheader) {
+ skipheader -= 1;
+ }
+ else {
+ if (!mkCols) {
+ if (!tok.empty()) {
+ values.push_back(ValPtr(new Glib::ustring(tok)));
+ }
+ while (values.size() < columns.size()) {
+ values.push_back(ValPtr(new Glib::ustring()));
+ }
+ rowReady();
+ }
+ else {
+ mkCols = false;
+ }
+ }
+ values.clear();
+ tok.clear();
+ }
+ else if (c == quoteChar) {
+ if (prevWasQuote) {
+ tok += c;
+ prevWasQuote = false;
+ inQuotes = !inQuotes;
+ }
+ else {
+ prevWasQuote = inQuotes;
+ inQuotes = !inQuotes;
+ }
+ }
+ else if ((!inQuotes) && (c == fieldSep)) {
+ prevWasQuote = false;
+ if (skipheader == 0) {
+ if (mkCols) {
+ addColumn(tok);
+ }
+ else {
+ values.push_back(ValPtr(new Glib::ustring(tok)));
+ }
+ }
+ tok.clear();
+ }
+ else {
+ prevWasQuote = false;
+ tok += c;
+ }
+ }
+
+ return used;
+}
+
+void
+_UrlRows::addColumn(const Glib::ustring & rawtok) const
+{
+ columns.push_back(rawtok);
+ Glib::ustring & tok(columns.back());
+ for (Glib::ustring::iterator i = tok.begin(); i != tok.end(); ) {
+ if (!isalnum(*i)) {
+ tok.erase(i);
+ }
+ else {
+ i++;
+ }
+ }
+}
+
+void
+_UrlRows::execute() const
+{
+ CurlHandle::Ptr c = new CurlHandle();
+ c->setopt(CURLOPT_URL, url.c_str());
+ //c->setopt(CURLOPT_PROXY, proxy.c_str());
+ c->setopt(CURLOPT_FOLLOWLOCATION, 1);
+ //c->setopt(CURLOPT_COOKIEFILE, (std::string(cacheRoot) + "/ytfs.cookies").c_str());
+ //c->setopt(CURLOPT_COOKIEJAR, (std::string(cacheRoot) + "/ytfs.cookies").c_str());
+ c->setopt(CURLOPT_ENCODING, "deflate, gzip");
+ c->setopt(CURLOPT_USERAGENT, "project2/0.3");
+ c->setopt(CURLOPT_WRITEDATA, this);
+ c->setopt(CURLOPT_WRITEFUNCTION, &_handleData);
+ c->perform();
+ if (!tok.empty()) {
+ if (skipheader == 0) {
+ if (mkCols) {
+ addColumn(tok);
+ }
+ else {
+ values.push_back(ValPtr(new Glib::ustring(tok)));
+ }
+ }
+ }
+ if (!values.empty()) {
+ while (values.size() < columns.size()) {
+ values.push_back(ValPtr(new Glib::ustring()));
+ }
+ rowReady();
+ values.clear();
+ }
+ values.clear();
+}
+
+#include "view.hpp"
+template class _GenericView<_UrlRows>;
+#include "iterate.hpp"
+template class _GenericIterate<_UrlRows>;
+
diff --git a/project2/urlRows.h b/project2/urlRows.h
new file mode 100644
index 0000000..f94e149
--- /dev/null
+++ b/project2/urlRows.h
@@ -0,0 +1,61 @@
+#ifndef URLROWS_H
+#define URLROWS_H
+
+#include <libxml++/nodes/element.h>
+#include <boost/shared_ptr.hpp>
+#include <map>
+#include "view.h"
+#include "iterate.h"
+
+class _UrlRows : public PerRowValues {
+ public:
+ _UrlRows(const xmlpp::Element * p);
+ ~_UrlRows();
+
+ void execute() const;
+ unsigned int columnCount() const;
+ const Glib::ustring & getColumnName(unsigned int col) const;
+ const Glib::ustring & getCurrentValue(const Glib::ustring & id) const;
+ const Glib::ustring & getCurrentValue(unsigned int col) const;
+ virtual void rowReady() const = 0;
+
+ typedef std::set<gunichar> CharSet;
+ const Glib::ustring url;
+
+ protected:
+ void addColumn(const Glib::ustring & rawtok) const;
+ typedef boost::shared_ptr<Glib::ustring> ValPtr;
+ typedef std::vector<ValPtr> Values;
+ mutable Values values;
+
+ private:
+ static size_t _handleData(const char * ptr, size_t size, size_t nmemb, void * stream);
+ size_t handleData(const char * bytes, size_t bytesLen) const;
+ gunichar fieldSep;
+ gunichar quoteChar;
+ gunichar newline;
+ std::string encoding;
+ mutable size_t skipheader;
+ typedef std::vector<Glib::ustring> Columns;
+ mutable Columns columns;
+
+ // Used in CURL callback
+ mutable bool mkCols;
+ mutable bool inQuotes;
+ mutable bool prevWasQuote;
+ mutable Glib::ustring tok;
+};
+typedef boost::shared_ptr<_UrlRows> UrlRows;
+
+typedef _GenericView<_UrlRows> _UrlView;
+typedef boost::shared_ptr<_UrlView> UrlView;
+typedef std::map<std::string, UrlView> UrlViews;
+
+typedef _GenericIterate<_UrlRows> _UrlIterate;
+typedef boost::shared_ptr<_UrlIterate> UrlIterate;
+typedef std::map<std::string, UrlIterate> UrlIterates;
+
+#endif
+
+
+
diff --git a/project2/view.cpp b/project2/view.cpp
index 530d7b5..217424f 100644
--- a/project2/view.cpp
+++ b/project2/view.cpp
@@ -3,6 +3,7 @@
#include "xmlObjectLoader.h"
#include "rawView.h"
#include "fileRows.h"
+#include "urlRows.h"
#include "sqlRows.h"
#include "procRows.h"
@@ -26,6 +27,7 @@ _View::AddLoaders(Loaders & l, Views & views)
l.insert(LoadersVT("rawview", _LoaderBase::Make<_RawView, _View, std::string, _SourceObject, &_SourceObject::name>(&views)));
l.insert(LoadersVT("fileview", _LoaderBase::Make<_FileView, _View, std::string, _SourceObject, &_SourceObject::name>(&views)));
l.insert(LoadersVT("procview", _LoaderBase::Make<_ProcView, _View, std::string, _SourceObject, &_SourceObject::name>(&views)));
+ l.insert(LoadersVT("urlview", _LoaderBase::Make<_UrlView, _View, std::string, _SourceObject, &_SourceObject::name>(&views)));
}
void