From 4f32f67921c311b264ab318e9a10a1aeb745cd04 Mon Sep 17 00:00:00 2001 From: randomdan Date: Tue, 19 Jul 2011 19:05:39 +0000 Subject: Support queuing multiple XSL row URLs and then fetching them all at once Use this feature to fetch all sites in parallel --- project2/Jamfile.jam | 2 +- project2/viewHost.h | 2 +- project2/xslPreFetch.cpp | 56 +++++++++++++++++++++++++++++++++ project2/xslPreFetch.h | 30 ++++++++++++++++++ project2/xslRows.cpp | 43 +++++++------------------ project2/xslRows.h | 11 ++++--- project2/xslRowsCache.cpp | 80 +++++++++++++++++++++++++++++++++++++++++++++++ project2/xslRowsCache.h | 35 +++++++++++++++++++++ 8 files changed, 221 insertions(+), 38 deletions(-) create mode 100644 project2/xslPreFetch.cpp create mode 100644 project2/xslPreFetch.h create mode 100644 project2/xslRowsCache.cpp create mode 100644 project2/xslRowsCache.h diff --git a/project2/Jamfile.jam b/project2/Jamfile.jam index af19cee..1b8c17d 100644 --- a/project2/Jamfile.jam +++ b/project2/Jamfile.jam @@ -80,7 +80,7 @@ lib p2common : ; lib p2xml : - xmlRows.cpp xslRows.cpp + xmlRows.cpp xslRows.cpp xslRowsCache.cpp xslPreFetch.cpp : ../libmisc libxmlpp diff --git a/project2/viewHost.h b/project2/viewHost.h index 1428a0e..5995d82 100644 --- a/project2/viewHost.h +++ b/project2/viewHost.h @@ -37,7 +37,7 @@ class ViewHost : virtual public XmlScriptParser, virtual public CheckHost { private: mutable PresenterMultiplexer pmp; - typedef ANONSTORAGEOF(View) Views; + typedef ANONORDEREDSTORAGEOF(View) Views; Views views; }; typedef boost::intrusive_ptr ViewHostPtr; diff --git a/project2/xslPreFetch.cpp b/project2/xslPreFetch.cpp new file mode 100644 index 0000000..7486c9f --- /dev/null +++ b/project2/xslPreFetch.cpp @@ -0,0 +1,56 @@ +#include "xslPreFetch.h" +#include "xmlObjectLoader.h" + +DECLARE_LOADER("xslprefetch", XslPreFetch); + +XslPreFetch::XslPreFetch(const xmlpp::Element * p) : + SourceObject(p), + View(p), + NoOutputExecute(p), + CurlHelper(p), + html(p->get_attribute_value("html") == "true"), + warnings(p->get_attribute_value("warnings") != "false"), + encoding(p, "encoding", false) +{ +} + +XslPreFetch::~XslPreFetch() +{ +} + +void +XslPreFetch::execute(const Presenter*) const +{ + execute(); +} + +void +XslPreFetch::execute() const +{ + queue(url(), encoding()); +} + +void +XslPreFetch::loadComplete(const CommonObjects *) +{ +} + + +CurlHandle::Ptr +XslPreFetch::newCurl() const +{ + return CurlHelper::newCurl(); +} + +bool +XslPreFetch::asHtml() const +{ + return html; +} + +bool +XslPreFetch::withWarnings() const +{ + return warnings; +} + diff --git a/project2/xslPreFetch.h b/project2/xslPreFetch.h new file mode 100644 index 0000000..000166d --- /dev/null +++ b/project2/xslPreFetch.h @@ -0,0 +1,30 @@ +#ifndef XSLPREFETCH_H +#define XSLPREFETCH_H + +#include "xslRowsCache.h" +#include "curlHelper.h" +#include "view.h" +#include "noOutputExecute.h" +#include + +/// Project2 component to queue up CURL objects to be downloaded +class XslPreFetch : public View, public NoOutputExecute, XslRowsCache, CurlHelper { + public: + XslPreFetch(const xmlpp::Element * p); + ~XslPreFetch(); + + void execute(const Presenter*) const; + void execute() const; + void loadComplete(const CommonObjects *); + + const bool html; + const bool warnings; + const Variable encoding; + + CurlHandle::Ptr newCurl() const; + bool asHtml() const; + bool withWarnings() const; +}; + +#endif + diff --git a/project2/xslRows.cpp b/project2/xslRows.cpp index 53ba7a2..b30d9b6 100644 --- a/project2/xslRows.cpp +++ b/project2/xslRows.cpp @@ -5,7 +5,6 @@ #include "exceptions.h" #include "xmlObjectLoader.h" #include -#include #include #include #include "../libmisc/curlsup.h" @@ -13,7 +12,6 @@ DECLARE_LOADER("xslrows", XslRows); -SimpleMessageException(XmlParseError); SimpleMessageException(XpathInitError); SimpleMessageException(XpathEvalError); @@ -58,39 +56,22 @@ XslRows::setFilter(const Glib::ustring & f) fv = i->second; } -size_t -XslRows::handleDataHelper(const char * ptr, size_t size, size_t nmemb, void *stream) +bool +XslRows::asHtml() const { - std::string * buf = static_cast(stream); - buf->append(ptr, size * nmemb); - return size * nmemb; + return html; } -xmlDocPtr -XslRows::getDocument(const Glib::ustring & url, const char * encoding) const +bool +XslRows::withWarnings() const { - Documents::const_iterator i = documents.find(url); - if (i == documents.end()) { - CurlHandle::Ptr c = newCurl(); - std::string buf; - c->setopt(CURLOPT_WRITEDATA, &buf); - c->setopt(CURLOPT_WRITEFUNCTION, &handleDataHelper); - c->perform(); - - int flags = 0; - flags |= warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR; - xmlDocPtr doc = html ? - htmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags) : - xmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags); - if (!doc) { - throw XmlParseError(xmlGetLastError()->message); - } - documents.insert(Documents::value_type(url, Documents::value_type::second_type(doc, xmlFreeDoc))); - return doc; - } - else { - return i->second.get(); - } + return warnings; +} + +CurlHandle::Ptr +XslRows::newCurl() const +{ + return CurlHelper::newCurl(); } void diff --git a/project2/xslRows.h b/project2/xslRows.h index 5075386..a30ca02 100644 --- a/project2/xslRows.h +++ b/project2/xslRows.h @@ -7,10 +7,11 @@ #include #include "rowSet.h" #include "variables.h" +#include "xslRowsCache.h" #include "curlHelper.h" /// Project2 component to create a row set based on the contents of an XML resource and specific XPaths with its hierarchy -class XslRows : public RowSet, CurlHelper { +class XslRows : public RowSet, XslRowsCache, CurlHelper { public: XslRows(const xmlpp::Element * p); ~XslRows(); @@ -44,14 +45,14 @@ class XslRows : public RowSet, CurlHelper { FilterViews fvs; FilterViewPtr fv; + virtual CurlHandle::Ptr newCurl() const; + virtual bool asHtml() const; + virtual bool withWarnings() const; + typedef std::map Namespaces; mutable Namespaces namespaces; typedef std::map Values; mutable Values values; - typedef std::map > Documents; - mutable Documents documents; - static size_t handleDataHelper(const char * ptr, size_t size, size_t nmemb, void *stream); - xmlDocPtr getDocument(const Glib::ustring & url, const char * encoding) const; const Variable encoding; }; diff --git a/project2/xslRowsCache.cpp b/project2/xslRowsCache.cpp new file mode 100644 index 0000000..73b4c42 --- /dev/null +++ b/project2/xslRowsCache.cpp @@ -0,0 +1,80 @@ +#include "xslRowsCache.h" +#include +#include "exceptions.h" + +XslRowsCache::Documents XslRowsCache::documents; +XslRowsCache::Queued XslRowsCache::queued; +CurlBulkFetcher XslRowsCache::cbf; + +SimpleMessageException(XmlParseError); +SimpleMessageException(DownloadFailed); + +class XslCachePopulator : public CurlCompleteCallback { + public: + XslCachePopulator(CurlHandle::Ptr ch, const Glib::ustring & u, bool w, bool h, const char * e) : + CurlCompleteCallback(ch), + url(u), + html(h), + warnings(w), + encoding(e) + { + curl->setopt(CURLOPT_WRITEDATA, &buf); + curl->setopt(CURLOPT_WRITEFUNCTION, &XslRowsCache::handleDataHelper); + } + void call(CurlBulkFetcher *) + { + int flags = 0; + flags |= warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR; + xmlDocPtr doc = html ? + htmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags) : + xmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags); + if (!doc) { + throw XmlParseError(xmlGetLastError()->message); + } + XslRowsCache::documents.insert(XslRowsCache::Documents::value_type(url, + XslRowsCache::Documents::value_type::second_type(doc, xmlFreeDoc))); + } + + std::string buf; + const Glib::ustring url; + const bool html; + const bool warnings; + const char * encoding; +}; + +size_t +XslRowsCache::handleDataHelper(const char * ptr, size_t size, size_t nmemb, void *stream) +{ + std::string * buf = static_cast(stream); + buf->append(ptr, size * nmemb); + return size * nmemb; +} + +xmlDocPtr +XslRowsCache::getDocument(const Glib::ustring & url, const char * encoding) const +{ + Documents::const_iterator i = documents.find(url); + if (i == documents.end()) { + queue(url, encoding); + cbf.perform(); + queued.clear(); + } + i = documents.find(url); + if (i == documents.end()) { + // This should never happen + throw DownloadFailed(url); + } + else { + return i->second.get(); + } +} + +void +XslRowsCache::queue(const Glib::ustring & url, const char * encoding) const +{ + if (queued.find(url) == queued.end()) { + cbf.curls.insert(new XslCachePopulator(newCurl(), url, asHtml(), withWarnings(), encoding)); + queued.insert(url); + } +} + diff --git a/project2/xslRowsCache.h b/project2/xslRowsCache.h new file mode 100644 index 0000000..55b8674 --- /dev/null +++ b/project2/xslRowsCache.h @@ -0,0 +1,35 @@ +#ifndef XSLROWSCACHE_H +#define XSLROWSCACHE_H + +#include +#include +#include +#include +#include "../libmisc/curlsup.h" +#include + +class XslRowsCache { + protected: + typedef std::set Queued; + typedef std::map > Documents; + + static Queued queued; + static Documents documents; + + void queue(const Glib::ustring & url, const char * encoding) const; + + virtual CurlHandle::Ptr newCurl() const = 0; + virtual bool asHtml() const = 0; + virtual bool withWarnings() const = 0; + + protected: + xmlDocPtr getDocument(const Glib::ustring & url, const char * encoding) const; + + private: + static size_t handleDataHelper(const char * ptr, size_t size, size_t nmemb, void *stream); + static CurlBulkFetcher cbf; + + friend class XslCachePopulator; +}; + +#endif -- cgit v1.2.3