summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2015-10-05 13:25:41 +0100
committerDan Goodliffe <dan@randomdan.homeip.net>2015-10-05 13:25:41 +0100
commit3ad9f8990942cca33b32002c820475bd715a253e (patch)
tree722edb31fe8deac65edd4daf4ace46b10a27d30b
parentSlash the amount of inclusion in pre-compiled headers (diff)
downloadproject2-3ad9f8990942cca33b32002c820475bd715a253e.tar.bz2
project2-3ad9f8990942cca33b32002c820475bd715a253e.tar.xz
project2-3ad9f8990942cca33b32002c820475bd715a253e.zip
Restore HTML parse functionality
-rw-r--r--project2/xml/rawView.cpp2
-rw-r--r--project2/xml/xmlDocumentCache.cpp70
-rw-r--r--project2/xml/xmlDocumentCache.h5
-rw-r--r--project2/xml/xmlDocumentPrefetch.cpp2
-rw-r--r--project2/xml/xpathRows.cpp2
5 files changed, 63 insertions, 18 deletions
diff --git a/project2/xml/rawView.cpp b/project2/xml/rawView.cpp
index 69009aa..c67a677 100644
--- a/project2/xml/rawView.cpp
+++ b/project2/xml/rawView.cpp
@@ -82,7 +82,7 @@ class XmlResourceView : public RawViewBase, XmlDocumentCache, VariableCurlHelper
protected:
const xmlpp::Element * getCopyRoot(ExecContext * ec) const
{
- return getDocument(url(ec), encoding(ec), ec)->get_root_node();
+ return getDocument(url(ec), encoding(ec), false, false, ec)->get_root_node();
}
bool asHtml(ExecContext *) const { return false; }
bool withWarnings(ExecContext *) const { return true; }
diff --git a/project2/xml/xmlDocumentCache.cpp b/project2/xml/xmlDocumentCache.cpp
index 98e4993..e0665b0 100644
--- a/project2/xml/xmlDocumentCache.cpp
+++ b/project2/xml/xmlDocumentCache.cpp
@@ -21,37 +21,81 @@ template <class Exception>
static XmlDocumentCache::DocumentPtr helperThrow(const std::string & msg) {
throw Exception(msg);
}
-static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DomParserPtr dp) {
+static XmlDocumentCache::DocumentPtr helperReturnDom(XmlDocumentCache::DomParserPtr dp) {
return dp->get_document();
}
+static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DocumentPtr dp) {
+ return dp;
+}
XmlDocumentCache::DocumentPtr
-XmlDocumentCache::getDocument(const Glib::ustring & url, const char * encoding, ExecContext * ec) const
+XmlDocumentCache::getDocument(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext * ec) const
{
Documents::const_iterator i = documents.find(url);
if (i == documents.end()) {
- queue(url, encoding, ec);
+ queue(url, encoding, html, warnings, ec);
cbf.performAll();
queued.clear();
}
return AdHoc::safeMapLookup<DownloadFailed>(documents, url)();
}
+int
+xmlReadFunc(void * context, char * buffer, int len)
+{
+ try {
+ std::istream * strm = static_cast<std::istream *>(context);
+ strm->read(buffer, len);
+ return strm->gcount();
+ }
+ catch (const AdHoc::Net::CurlException & error) {
+ return -1;
+ }
+}
+
+int
+xmlCloseFunc(void *)
+{
+ return 0;
+}
+
void
-XmlDocumentCache::queue(const Glib::ustring & url, const char *, ExecContext *) const
+XmlDocumentCache::queue(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext *) const
{
if (queued.find(url) == queued.end()) {
- cbf.addCurl(url, [url](std::istream & strm) {
- try {
- DomParserPtr doc = DomParserPtr(new xmlpp::DomParser());
- doc->parse_stream(strm);
+ cbf.addCurl(url, [url, encoding, html, warnings](std::istream & strm) {
+ if (html) {
+ int flags = warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR;
+ htmlDocPtr doc = htmlReadIO(xmlReadFunc, xmlCloseFunc, &strm, url.c_str(),
+ encoding ? encoding->c_str() : nullptr, flags);
+ if (!doc) {
+ Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, xmlGetLastError()->message);
+ XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
+ boost::bind(helperThrow<XmlParseError>, std::string(xmlGetLastError()->message))));
+ return;
+ }
+
+ // Dirty hack alert
+ // xmlpp doesn't play nicely with HTML documents...
+ // sooo ummm, lie and hope it doesn't break something else
+ doc->type = XML_DOCUMENT_NODE;
+ // end hack
+
XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
- boost::bind(helperReturnDocument, doc)));
+ boost::bind(helperReturnDocument, XmlDocumentCache::DocumentPtr(new xmlpp::Document(doc)))));
}
- catch (const AdHoc::Net::CurlException & error) {
- Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, error.message);
- XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
- boost::bind(helperThrow<DownloadFailed>, error.message)));
+ else {
+ try {
+ DomParserPtr doc = DomParserPtr(new xmlpp::DomParser());
+ doc->parse_stream(strm);
+ XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
+ boost::bind(helperReturnDom, doc)));
+ }
+ catch (const AdHoc::Net::CurlException & error) {
+ Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, error.message);
+ XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
+ boost::bind(helperThrow<DownloadFailed>, error.message)));
+ }
}
})->setopt(CURLOPT_ENCODING, "deflate, gzip");
queued.insert(url);
diff --git a/project2/xml/xmlDocumentCache.h b/project2/xml/xmlDocumentCache.h
index fb5a650..e0a14a6 100644
--- a/project2/xml/xmlDocumentCache.h
+++ b/project2/xml/xmlDocumentCache.h
@@ -2,6 +2,7 @@
#define XMLDOCUMENTROWSCACHE_H
#include <boost/shared_ptr.hpp>
+#include <boost/optional.hpp>
#include <map>
#include <set>
#include <curlHelper.h>
@@ -22,14 +23,14 @@ class XmlDocumentCache {
static Queued queued;
static Documents documents;
- void queue(const Glib::ustring & url, const char * encoding, ExecContext *) const;
+ void queue(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext *) const;
virtual CurlPtr newCurl(ExecContext *) const = 0;
virtual bool asHtml(ExecContext * ec) const = 0;
virtual bool withWarnings(ExecContext * ec) const = 0;
protected:
- DocumentPtr getDocument(const Glib::ustring & url, const char * encoding, ExecContext * ec) const;
+ DocumentPtr getDocument(const Glib::ustring & url, boost::optional<std::string> encoding, bool html, bool warnings, ExecContext * ec) const;
private:
static AdHoc::Net::CurlMultiHandle cbf;
diff --git a/project2/xml/xmlDocumentPrefetch.cpp b/project2/xml/xmlDocumentPrefetch.cpp
index 357f50b..58489ce 100644
--- a/project2/xml/xmlDocumentPrefetch.cpp
+++ b/project2/xml/xmlDocumentPrefetch.cpp
@@ -29,7 +29,7 @@ XmlDocumentPrefetch::execute(const MultiRowSetPresenter*, ExecContext * ec) cons
void
XmlDocumentPrefetch::execute(ExecContext * ec) const
{
- queue(url(ec), encoding(ec), ec);
+ queue(url(ec), encoding(ec), asHtml(ec), warnings(ec), ec);
}
CurlPtr
diff --git a/project2/xml/xpathRows.cpp b/project2/xml/xpathRows.cpp
index 0501012..eefcbc2 100644
--- a/project2/xml/xpathRows.cpp
+++ b/project2/xml/xpathRows.cpp
@@ -59,7 +59,7 @@ XPathRows::execute(const Glib::ustring & filter, const RowProcessorCallback & rp
typedef boost::shared_ptr<xmlXPathObject> xmlXPathObjectSPtr;
typedef boost::shared_ptr<xmlXPathContext> xmlXPathContextSPtr;
- xmlDocPtr doc = getDocument(url(ec), encoding(ec), ec)->cobj();
+ xmlDocPtr doc = getDocument(url(ec), encoding(ec), html(ec), warnings(ec), ec)->cobj();
xmlXPathContextSPtr xpathCtx = xmlXPathContextSPtr(xmlXPathNewContext(doc), xmlXPathFreeContext);
if (!xpathCtx) {
throw XpathInitError(xmlGetLastError()->message);