1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
#include "apiImpl.h"
#include "uptr.h"
#include <sql/getServices.sql.h>
#include <buffer.h>
#include <compileTimeFormatter.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
namespace MirrorSearch {
SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) :
IceTray::AbstractDatabaseClient(db),
log(LOGMANAGER()->getLogger<SearchImpl>())
{
}
SearchServices SearchImpl::getServices(const ::Ice::Current&)
{
return fetch<SearchServices>(sql::getServices);
}
template<typename Fmt, typename ... P>
void
libxmlErrorHandler(const std::string & fn, const P & ... p)
{
throw XmlError(Fmt::get(fn, xmlGetLastError()->message, p...));
}
template<typename Fmt, typename ... P>
auto lEHB(const P & ... p)
{
return std::bind(&libxmlErrorHandler<Fmt, P...>, std::placeholders::_1, p...);
}
typedef UPtr<xmlDoc> xmlDocSPtr;
typedef UPtr<xmlXPathContext> xmlXPathContextSPtr;
typedef UPtr<xmlXPathObject> xmlXPathObjectSPtr;
AdHocFormatter(Read, "Failed to read in %? (%?) [%?, %?]");
static auto getDoc(const ::std::string & url, int flags)
{
return make_unique(htmlReadFile, xmlFreeDoc, lEHB<Read>(url, flags), url.c_str(), (const char*)NULL, flags);
}
AdHocFormatter(XPathCtx, "Failed to create xpath context in %? (%?)");
static auto getXPathCxt(const xmlDocSPtr & doc)
{
return make_unique(xmlXPathNewContext, xmlXPathFreeContext, lEHB<XPathCtx>(), doc.get());
}
AdHocFormatter(XPathEval, "Failed to evaluate xpath in %? (%?) [%?]");
static auto getXPathObj(const ::std::string & xpath, const xmlXPathContextSPtr & ctx, xmlXPathObjectType type)
{
auto xpathObj = make_unique(xmlXPathEvalExpression, xmlXPathFreeObject, lEHB<XPathEval>(xpath), BAD_CAST xpath.c_str(), ctx.get());
if (xpathObj->type != type) {
throw XmlError("Xpath evaluates to wrong type " + xpath);
}
return xpathObj;
}
void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const
{
auto fmt = AdHoc::Buffer::getFormat(s->baseurl);
auto url = (*fmt % fn).str();
auto doc = getDoc(url,
HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD | HTML_PARSE_NOIMPLIED |
HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
auto xpathCtx = getXPathCxt(doc);
auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET);
log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath);
for (int row = 0; row < xpathObj->nodesetval->nodeNr; row += 1) {
xpathCtx->node = xpathObj->nodesetval->nodeTab[row];
auto xpathObjI = getXPathObj(s->urlxpath, xpathCtx, xmlXPathObjectType::XPATH_STRING);
if (xpathObjI->stringval && *xpathObjI->stringval) {
sh.push_back(new SearchHit(0, s->id, (const char *) xpathObjI->stringval));
}
}
}
SearchHits SearchImpl::getMatches(const ::std::string & fn, const ::Ice::Current & c)
{
SearchHits sh;
for (const auto & s : getServices(c)) {
callService(fn, s, sh);
}
return sh;
}
::IceUtil::Optional<::std::string> SearchImpl::feelingLucky(const ::std::string & fn, const ::Ice::Current & c)
{
const auto ms = getMatches(fn, c);
if (ms.empty())
return IceUtil::None;
return ms.front()->url;
}
}
|