diff options
author | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-10-05 13:25:41 +0100 |
---|---|---|
committer | Dan Goodliffe <dan@randomdan.homeip.net> | 2015-10-05 13:25:41 +0100 |
commit | 3ad9f8990942cca33b32002c820475bd715a253e (patch) | |
tree | 722edb31fe8deac65edd4daf4ace46b10a27d30b | |
parent | Slash the amount of inclusion in pre-compiled headers (diff) | |
download | project2-3ad9f8990942cca33b32002c820475bd715a253e.tar.bz2 project2-3ad9f8990942cca33b32002c820475bd715a253e.tar.xz project2-3ad9f8990942cca33b32002c820475bd715a253e.zip |
Restore HTML parse functionality
-rw-r--r-- | project2/xml/rawView.cpp | 2 | ||||
-rw-r--r-- | project2/xml/xmlDocumentCache.cpp | 70 | ||||
-rw-r--r-- | project2/xml/xmlDocumentCache.h | 5 | ||||
-rw-r--r-- | project2/xml/xmlDocumentPrefetch.cpp | 2 | ||||
-rw-r--r-- | project2/xml/xpathRows.cpp | 2 |
5 files changed, 63 insertions, 18 deletions
diff --git a/project2/xml/rawView.cpp b/project2/xml/rawView.cpp index 69009aa..c67a677 100644 --- a/project2/xml/rawView.cpp +++ b/project2/xml/rawView.cpp @@ -82,7 +82,7 @@ class XmlResourceView : public RawViewBase, XmlDocumentCache, VariableCurlHelper protected: const xmlpp::Element * getCopyRoot(ExecContext * ec) const { - return getDocument(url(ec), encoding(ec), ec)->get_root_node(); + return getDocument(url(ec), encoding(ec), false, false, ec)->get_root_node(); } bool asHtml(ExecContext *) const { return false; } bool withWarnings(ExecContext *) const { return true; } diff --git a/project2/xml/xmlDocumentCache.cpp b/project2/xml/xmlDocumentCache.cpp index 98e4993..e0665b0 100644 --- a/project2/xml/xmlDocumentCache.cpp +++ b/project2/xml/xmlDocumentCache.cpp @@ -21,37 +21,81 @@ template <class Exception> static XmlDocumentCache::DocumentPtr helperThrow(const std::string & msg) { throw Exception(msg); } -static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DomParserPtr dp) { +static XmlDocumentCache::DocumentPtr helperReturnDom(XmlDocumentCache::DomParserPtr dp) { return dp->get_document(); } +static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DocumentPtr dp) { + return dp; +} XmlDocumentCache::DocumentPtr -XmlDocumentCache::getDocument(const Glib::ustring & url, const char * encoding, ExecContext * ec) const +XmlDocumentCache::getDocument(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext * ec) const { Documents::const_iterator i = documents.find(url); if (i == documents.end()) { - queue(url, encoding, ec); + queue(url, encoding, html, warnings, ec); cbf.performAll(); queued.clear(); } return AdHoc::safeMapLookup<DownloadFailed>(documents, url)(); } +int +xmlReadFunc(void * context, char * buffer, int len) +{ + try { + std::istream * strm = static_cast<std::istream *>(context); + strm->read(buffer, len); + return strm->gcount(); + } + catch (const AdHoc::Net::CurlException & error) { + return -1; + } +} + +int +xmlCloseFunc(void *) +{ + return 0; +} + void -XmlDocumentCache::queue(const Glib::ustring & url, const char *, ExecContext *) const +XmlDocumentCache::queue(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext *) const { if (queued.find(url) == queued.end()) { - cbf.addCurl(url, [url](std::istream & strm) { - try { - DomParserPtr doc = DomParserPtr(new xmlpp::DomParser()); - doc->parse_stream(strm); + cbf.addCurl(url, [url, encoding, html, warnings](std::istream & strm) { + if (html) { + int flags = warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR; + htmlDocPtr doc = htmlReadIO(xmlReadFunc, xmlCloseFunc, &strm, url.c_str(), + encoding ? encoding->c_str() : nullptr, flags); + if (!doc) { + Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, xmlGetLastError()->message); + XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, + boost::bind(helperThrow<XmlParseError>, std::string(xmlGetLastError()->message)))); + return; + } + + // Dirty hack alert + // xmlpp doesn't play nicely with HTML documents... + // sooo ummm, lie and hope it doesn't break something else + doc->type = XML_DOCUMENT_NODE; + // end hack + XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, - boost::bind(helperReturnDocument, doc))); + boost::bind(helperReturnDocument, XmlDocumentCache::DocumentPtr(new xmlpp::Document(doc))))); } - catch (const AdHoc::Net::CurlException & error) { - Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, error.message); - XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, - boost::bind(helperThrow<DownloadFailed>, error.message))); + else { + try { + DomParserPtr doc = DomParserPtr(new xmlpp::DomParser()); + doc->parse_stream(strm); + XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, + boost::bind(helperReturnDom, doc))); + } + catch (const AdHoc::Net::CurlException & error) { + Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, error.message); + XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, + boost::bind(helperThrow<DownloadFailed>, error.message))); + } } })->setopt(CURLOPT_ENCODING, "deflate, gzip"); queued.insert(url); diff --git a/project2/xml/xmlDocumentCache.h b/project2/xml/xmlDocumentCache.h index fb5a650..e0a14a6 100644 --- a/project2/xml/xmlDocumentCache.h +++ b/project2/xml/xmlDocumentCache.h @@ -2,6 +2,7 @@ #define XMLDOCUMENTROWSCACHE_H #include <boost/shared_ptr.hpp> +#include <boost/optional.hpp> #include <map> #include <set> #include <curlHelper.h> @@ -22,14 +23,14 @@ class XmlDocumentCache { static Queued queued; static Documents documents; - void queue(const Glib::ustring & url, const char * encoding, ExecContext *) const; + void queue(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext *) const; virtual CurlPtr newCurl(ExecContext *) const = 0; virtual bool asHtml(ExecContext * ec) const = 0; virtual bool withWarnings(ExecContext * ec) const = 0; protected: - DocumentPtr getDocument(const Glib::ustring & url, const char * encoding, ExecContext * ec) const; + DocumentPtr getDocument(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext * ec) const; private: static AdHoc::Net::CurlMultiHandle cbf; diff --git a/project2/xml/xmlDocumentPrefetch.cpp b/project2/xml/xmlDocumentPrefetch.cpp index 357f50b..58489ce 100644 --- a/project2/xml/xmlDocumentPrefetch.cpp +++ b/project2/xml/xmlDocumentPrefetch.cpp @@ -29,7 +29,7 @@ XmlDocumentPrefetch::execute(const MultiRowSetPresenter*, ExecContext * ec) cons void XmlDocumentPrefetch::execute(ExecContext * ec) const { - queue(url(ec), encoding(ec), ec); + queue(url(ec), encoding(ec), asHtml(ec), warnings(ec), ec); } CurlPtr diff --git a/project2/xml/xpathRows.cpp b/project2/xml/xpathRows.cpp index 0501012..eefcbc2 100644 --- a/project2/xml/xpathRows.cpp +++ b/project2/xml/xpathRows.cpp @@ -59,7 +59,7 @@ XPathRows::execute(const Glib::ustring & filter, const RowProcessorCallback & rp typedef boost::shared_ptr<xmlXPathObject> xmlXPathObjectSPtr; typedef boost::shared_ptr<xmlXPathContext> xmlXPathContextSPtr; - xmlDocPtr doc = getDocument(url(ec), encoding(ec), ec)->cobj(); + xmlDocPtr doc = getDocument(url(ec), encoding(ec), html(ec), warnings(ec), ec)->cobj(); xmlXPathContextSPtr xpathCtx = xmlXPathContextSPtr(xmlXPathNewContext(doc), xmlXPathFreeContext); if (!xpathCtx) { throw XpathInitError(xmlGetLastError()->message); |