summaryrefslogtreecommitdiff
path: root/service/apiImpl.cpp
blob: 588d360f2ecd2675490d4bfe32dbc31b8601db76 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include "apiImpl.h"

#include <sql/getServices.sql.h>
#include <buffer.h>
#include <memory>

#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>

namespace MirrorSearch {
	SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) :
		IceTray::AbstractDatabaseClient(db),
		log(LOGMANAGER()->getLogger<SearchImpl>())
	{
	}

	SearchServices SearchImpl::getServices(const ::Ice::Current&)
	{
		return fetch<SearchServices>(sql::getServices);
	}

	typedef std::shared_ptr<xmlDoc> xmlDocSPtr;
	typedef std::shared_ptr<xmlXPathContext> xmlXPathContextSPtr;
	typedef std::shared_ptr<xmlXPathObject> xmlXPathObjectSPtr;

	static auto getDoc(const ::std::string & url, int flags)
	{
		if (auto doc = xmlDocSPtr(htmlReadFile(url.c_str(), NULL, flags), xmlFreeDoc)) {
			return doc;
		}
		throw XmlError("Failed to open " + url);
	}

	static auto getXPathCxt(const xmlDocSPtr & doc)
	{
		if (auto xpathCtx = xmlXPathContextSPtr(xmlXPathNewContext(doc.get()), xmlXPathFreeContext)) {
			return xpathCtx;
		}
		throw XmlError("Failed to create xpath context");
	}

	static auto getXPathObj(const ::std::string & xpath, const xmlXPathContextSPtr & ctx, xmlXPathObjectType type)
	{
		if (auto xpathObj = xmlXPathObjectSPtr(xmlXPathEvalExpression(BAD_CAST xpath.c_str(), ctx.get()), xmlXPathFreeObject)) {
			if (xpathObj->type != type) {
				throw XmlError("Xpath evaluates to wrong type " + xpath);
			}
			return xpathObj;
		}
		throw XmlError("Failed to evaluate xpath " + xpath);
	}

	void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const
	{
		auto fmt = AdHoc::Buffer::getFormat(s->baseurl);
		auto url = (*fmt % fn).str();
		auto doc = getDoc(url,
				HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD | HTML_PARSE_NOIMPLIED |
				HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
		auto xpathCtx = getXPathCxt(doc);
		auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET);
		log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath);
		for (int row = 0; row < xpathObj->nodesetval->nodeNr; row += 1) {
			xpathCtx->node = xpathObj->nodesetval->nodeTab[row];
			auto xpathObjI = getXPathObj(s->urlxpath, xpathCtx, xmlXPathObjectType::XPATH_STRING);
			if (xpathObjI->stringval && *xpathObjI->stringval) {
				sh.push_back(new SearchHit(0, s->id, (const char *) xpathObjI->stringval));
			}
		}
	}

	SearchHits SearchImpl::getMatches(const ::std::string & fn, const ::Ice::Current & c)
	{
		SearchHits sh;
		for (const auto & s : getServices(c)) {
			callService(fn, s, sh);
		}
		return sh;
	}

	::IceUtil::Optional<::std::string> SearchImpl::feelingLucky(const ::std::string & fn, const ::Ice::Current & c)
	{
		const auto ms = getMatches(fn, c);
		if (ms.empty())
			return IceUtil::None;
		return ms.front()->url;
	}
}