1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
#include "apiImpl.h"
#include <sql/getServices.sql.h>
#include <buffer.h>
#include <memory>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
namespace MirrorSearch {
SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) :
IceTray::AbstractDatabaseClient(db),
log(LOGMANAGER()->getLogger<SearchImpl>())
{
}
SearchServices SearchImpl::getServices(const ::Ice::Current&)
{
return fetch<SearchServices>(sql::getServices);
}
typedef std::shared_ptr<xmlDoc> xmlDocSPtr;
typedef std::shared_ptr<xmlXPathContext> xmlXPathContextSPtr;
typedef std::shared_ptr<xmlXPathObject> xmlXPathObjectSPtr;
static auto getDoc(const ::std::string & url, int flags)
{
if (auto doc = xmlDocSPtr(htmlReadFile(url.c_str(), NULL, flags), xmlFreeDoc)) {
return doc;
}
throw XmlError("Failed to open " + url);
}
static auto getXPathCxt(const xmlDocSPtr & doc)
{
if (auto xpathCtx = xmlXPathContextSPtr(xmlXPathNewContext(doc.get()), xmlXPathFreeContext)) {
return xpathCtx;
}
throw XmlError("Failed to create xpath context");
}
static auto getXPathObj(const ::std::string & xpath, const xmlXPathContextSPtr & ctx, xmlXPathObjectType type)
{
if (auto xpathObj = xmlXPathObjectSPtr(xmlXPathEvalExpression(BAD_CAST xpath.c_str(), ctx.get()), xmlXPathFreeObject)) {
if (xpathObj->type != type) {
throw XmlError("Xpath evaluates to wrong type " + xpath);
}
return xpathObj;
}
throw XmlError("Failed to evaluate xpath " + xpath);
}
void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const
{
auto fmt = AdHoc::Buffer::getFormat(s->baseurl);
auto url = (*fmt % fn).str();
auto doc = getDoc(url,
HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD | HTML_PARSE_NOIMPLIED |
HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
auto xpathCtx = getXPathCxt(doc);
auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET);
log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath);
for (int row = 0; row < xpathObj->nodesetval->nodeNr; row += 1) {
xpathCtx->node = xpathObj->nodesetval->nodeTab[row];
auto xpathObjI = getXPathObj(s->urlxpath, xpathCtx, xmlXPathObjectType::XPATH_STRING);
if (xpathObjI->stringval && *xpathObjI->stringval) {
sh.push_back(new SearchHit(0, s->id, (const char *) xpathObjI->stringval));
}
}
}
SearchHits SearchImpl::getMatches(const ::std::string & fn, const ::Ice::Current & c)
{
SearchHits sh;
for (const auto & s : getServices(c)) {
callService(fn, s, sh);
}
return sh;
}
::IceUtil::Optional<::std::string> SearchImpl::feelingLucky(const ::std::string & fn, const ::Ice::Current & c)
{
const auto ms = getMatches(fn, c);
if (ms.empty())
return IceUtil::None;
return ms.front()->url;
}
}
|