#include "pch.hpp" #include #include "xmlDocumentCache.h" #include #include #include #include #include "exceptions.h" #include "curlHelper.h" #include "safeMapFind.h" XmlDocumentCache::Documents XmlDocumentCache::documents; XmlDocumentCache::Queued XmlDocumentCache::queued; CurlBulkFetcher XmlDocumentCache::cbf; SimpleMessageException(XmlParseError); SimpleMessageException(DownloadFailed); template static XmlDocumentCache::DocumentPtr helperThrow(const std::string & msg) { throw Exception(msg); } static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DocumentPtr dp) { return dp; } class XmlDocumentCachePopulator : public CurlCompleteCallback { public: XmlDocumentCachePopulator(CurlPtr ch, const Glib::ustring & u, bool h, bool w, const char * e) : CurlCompleteCallback(ch), handler(boost::bind(&XmlDocumentCachePopulator::append, this, _1, _2)), url(u), html(h), warnings(w), encoding(e ? strdup(e) : NULL) { ch->setReadHandler(handler); } ~XmlDocumentCachePopulator() { free(encoding); } void call(CurlBulkFetcher *) { int flags = 0; flags |= warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR; xmlDocPtr doc = html ? htmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags) : xmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags); if (!doc) { Logger()->messagebf(LOG_DEBUG, "Download of '%s' succeeded, but parsing failed with error '%s'", url, xmlGetLastError()->message); XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, boost::bind(helperThrow, std::string(xmlGetLastError()->message)))); } // Dirty hack alert // xmlpp doesn't play nicely with HTML documents... // sooo ummm, lie and hope it doesn't break something else doc->type = XML_DOCUMENT_NODE; // end hack XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, boost::bind(helperReturnDocument, XmlDocumentCache::DocumentPtr(new xmlpp::Document(doc))))); Logger()->messagebf(LOG_DEBUG, "Download of '%s' completed, stored", url); } void error(CurlBulkFetcher *, const char * error) { Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, error); XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url, boost::bind(helperThrow, std::string(error)))); } size_t append(const char * c, size_t b) { buf.append(c, b); return b; } Curl::ReadHandler handler; std::string buf; const Glib::ustring url; const bool html; const bool warnings; char * encoding; }; XmlDocumentCache::DocumentPtr XmlDocumentCache::getDocument(const Glib::ustring & url, const char * encoding) const { Documents::const_iterator i = documents.find(url); if (i == documents.end()) { queue(url, encoding); cbf.perform(); queued.clear(); } return safeMapLookup(documents, url)(); } void XmlDocumentCache::queue(const Glib::ustring & url, const char * encoding) const { if (queued.find(url) == queued.end()) { cbf.curls.insert(new XmlDocumentCachePopulator(newCurl(), url, asHtml(), withWarnings(), encoding)); queued.insert(url); } } class XmlDocumentCacheClearer : public ComponentLoader { public: void onIteration() { Logger()->messagef(LOG_DEBUG, "%s: Clearing XML document cache", __PRETTY_FUNCTION__); XmlDocumentCache::documents.clear(); Logger()->messagef(LOG_DEBUG, "%s: Cleared XML document cache", __PRETTY_FUNCTION__); } }; DECLARE_CUSTOM_COMPONENT_LOADER("XmlDocumentCacheClearer", XmlDocumentCacheClearer, XmlDocumentCacheClearer, ComponentLoader);