1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#include <pch.hpp>
#include <scriptLoader.h>
#include "xmlDocumentCache.h"
#include <string.h>
#include <logger.h>
#include <libxml/HTMLparser.h>
#include <boost/bind.hpp>
#include "exceptions.h"
#include "curlHelper.h"
#include "safeMapFind.h"
XmlDocumentCache::Documents XmlDocumentCache::documents;
XmlDocumentCache::Queued XmlDocumentCache::queued;
CurlBulkFetcher XmlDocumentCache::cbf;
SimpleMessageException(XmlParseError);
SimpleMessageException(DownloadFailed);
template <class Exception>
static XmlDocumentCache::DocumentPtr helperThrow(const std::string & msg) {
throw Exception(msg);
}
static XmlDocumentCache::DocumentPtr helperReturnDocument(XmlDocumentCache::DocumentPtr dp) {
return dp;
}
class XmlDocumentCachePopulator : public CurlCompleteCallback {
public:
XmlDocumentCachePopulator(CurlPtr ch, const Glib::ustring & u, bool h, bool w, const char * e) :
CurlCompleteCallback(ch),
handler(boost::bind(&XmlDocumentCachePopulator::append, this, _1, _2)),
url(u),
html(h),
warnings(w),
encoding(e ? strdup(e) : NULL)
{
ch->setReadHandler(handler);
}
~XmlDocumentCachePopulator()
{
free(encoding);
}
void call(CurlBulkFetcher *)
{
int flags = 0;
flags |= warnings ? 0 : XML_PARSE_NOWARNING | XML_PARSE_NOERROR;
xmlDocPtr doc = html ?
htmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags) :
xmlReadMemory(buf.c_str(), buf.length(), url.c_str(), encoding, flags);
if (!doc) {
Logger()->messagebf(LOG_DEBUG, "Download of '%s' succeeded, but parsing failed with error '%s'", url, xmlGetLastError()->message);
XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
boost::bind(helperThrow<XmlParseError>, std::string(xmlGetLastError()->message))));
return;
}
// Dirty hack alert
// xmlpp doesn't play nicely with HTML documents...
// sooo ummm, lie and hope it doesn't break something else
doc->type = XML_DOCUMENT_NODE;
// end hack
XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
boost::bind(helperReturnDocument, XmlDocumentCache::DocumentPtr(new xmlpp::Document(doc)))));
Logger()->messagebf(LOG_DEBUG, "Download of '%s' completed, stored", url);
}
void error(CurlBulkFetcher *, const char * error)
{
Logger()->messagebf(LOG_DEBUG, "Download of '%s' failed with error '%s'", url, error);
XmlDocumentCache::documents.insert(XmlDocumentCache::Documents::value_type(url,
boost::bind(helperThrow<DownloadFailed>, std::string(error))));
}
size_t append(const char * c, size_t b)
{
buf.append(c, b);
return b;
}
Curl::ReadHandler handler;
std::string buf;
const Glib::ustring url;
const bool html;
const bool warnings;
char * encoding;
};
XmlDocumentCache::DocumentPtr
XmlDocumentCache::getDocument(const Glib::ustring & url, const char * encoding, ExecContext * ec) const
{
Documents::const_iterator i = documents.find(url);
if (i == documents.end()) {
queue(url, encoding, ec);
cbf.perform();
queued.clear();
}
return safeMapLookup<DownloadFailed>(documents, url)();
}
void
XmlDocumentCache::queue(const Glib::ustring & url, const char * encoding, ExecContext * ec) const
{
if (queued.find(url) == queued.end()) {
cbf.curls.insert(new XmlDocumentCachePopulator(newCurl(ec), url, asHtml(ec), withWarnings(ec), encoding));
queued.insert(url);
}
}
class XmlDocumentCacheClearer : public ComponentLoader {
public:
typedef bool KeyType;
void onIteration() override
{
Logger()->messagef(LOG_DEBUG, "%s: Clearing XML document cache", __PRETTY_FUNCTION__);
XmlDocumentCache::documents.clear();
Logger()->messagef(LOG_DEBUG, "%s: Cleared XML document cache", __PRETTY_FUNCTION__);
}
};
DECLARE_COMPONENT("XmlDocumentCacheClearer", XmlDocumentCacheClearer);
|