summaryrefslogtreecommitdiff
path: root/project2/xml/xmlDocumentCache.cpp
blob: cbfaf745c9273e558f6237914cae018caf819267 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include <pch.hpp>
#include <scriptLoader.h>
#include "xmlDocumentCache.h"
#include <string.h>
#include <logger.h>
#include <libxml/HTMLparser.h>
#include <boost/bind.hpp>
#include "exceptions.h"
#include "curlHelper.h"
#include "safeMapFind.h"

XmlDocumentCache::Documents XmlDocumentCache::documents;
XmlDocumentCache::Queued XmlDocumentCache::queued;
CurlBulkFetcher XmlDocumentCache::cbf;

SimpleMessageException(XmlParseError);
SimpleMessageException(DownloadFailed);

template <class Exception>
static XmlDocumentCache::DocumentPtr helperThrow(const std::string & msg) {
	throw Exception(msg);
}
static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DocumentPtr dp) {
	return dp;
}

class XmlDocumentCachePopulator : public CurlCompleteCallback {
	public:
		XmlDocumentCachePopulator(CurlPtr ch, const Glib::ustring & u, bool h, bool w, const char * e) :
			CurlCompleteCallback(ch),
			handler(boost::bind(&XmlDocumentCachePopulator::append, this, _1, _2)),
			url(u),
			html(h),
			warnings(w),
			encoding(e ? strdup(e) : NULL)
		{
			ch->setReadHandler(handler);
		}
		~XmlDocumentCachePopulator()
		{
			free(encoding);
		}
		void call(CurlBulkFetcher *)
		{
			int flags = 0;
			flags |= warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR;
			xmlDocPtr doc = html ?
				htmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags) :
				xmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags);
			if (!doc) {
				Logger()->messagebf(LOG_DEBUG, "Download of '%s' succeeded, but parsing failed with error  '%s'", url, xmlGetLastError()->message);
				XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
							boost::bind(helperThrow<XmlParseError>, std::string(xmlGetLastError()->message))));
				return;
			}

			// Dirty hack alert
			// xmlpp doesn't play nicely with HTML documents...
			// sooo ummm, lie and hope it doesn't break something else
			doc->type = XML_DOCUMENT_NODE;
			// end hack

			XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
						boost::bind(helperReturnDocument, XmlDocumentCache::DocumentPtr(new xmlpp::Document(doc)))));
			Logger()->messagebf(LOG_DEBUG, "Download of '%s' completed, stored", url);
		}
		void error(CurlBulkFetcher *, const char * error)
		{
			Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error  '%s'", url, error);
			XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
						boost::bind(helperThrow<DownloadFailed>, std::string(error))));
		}
		size_t append(const char * c, size_t b)
		{
			buf.append(c, b);
			return b;
		}

		Curl::ReadHandler handler;
		std::string buf;
		const Glib::ustring url;
		const bool html;
		const bool warnings;
		char * encoding;
};

XmlDocumentCache::DocumentPtr
XmlDocumentCache::getDocument(const Glib::ustring & url, const char * encoding, ExecContext * ec) const
{
	Documents::const_iterator i = documents.find(url);
	if (i == documents.end()) {
		queue(url, encoding, ec);
		cbf.perform();
		queued.clear();
	}
	return safeMapLookup<DownloadFailed>(documents, url)();
}

void
XmlDocumentCache::queue(const Glib::ustring & url, const char * encoding, ExecContext * ec) const
{
	if (queued.find(url) == queued.end()) {
		cbf.curls.insert(new XmlDocumentCachePopulator(newCurl(ec), url, asHtml(ec), withWarnings(ec), encoding));
		queued.insert(url);
	}
}

class XmlDocumentCacheClearer : public ComponentLoader {
	public:
		typedef bool KeyType;

		void onIteration()
		{
			Logger()->messagef(LOG_DEBUG, "%s: Clearing XML document cache", __PRETTY_FUNCTION__);
			XmlDocumentCache::documents.clear();
			Logger()->messagef(LOG_DEBUG, "%s: Cleared XML document cache", __PRETTY_FUNCTION__);
		}
};
DECLARE_CUSTOM_COMPONENT_LOADER("XmlDocumentCacheClearer", XmlDocumentCacheClearer, XmlDocumentCacheClearer, XmlDocumentCacheClearer);