summaryrefslogtreecommitdiff
path: root/service/apiImpl.cpp
blob: 1de31cb2cbd7170354314cc29b79182f6471cfa3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include "apiImpl.h"
#include "uptr.h"

#include <sql/getServices.sql.h>
#include <buffer.h>
#include <compileTimeFormatter.h>

#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>

namespace MirrorSearch {
	SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) :
		IceTray::AbstractDatabaseClient(db),
		log(LOGMANAGER()->getLogger<SearchImpl>())
	{
	}

	SearchServices SearchImpl::getServices(const ::Ice::Current&)
	{
		return fetch<SearchServices>(sql::getServices);
	}

	template<typename Fmt, typename ... P>
	void
	libxmlErrorHandler(const std::string & fn, const P & ... p)
	{
		throw XmlError(Fmt::get(fn, xmlGetLastError()->message, p...));
	}
	template<typename Fmt, typename ... P>
	auto lEHB(const P & ... p)
	{
		return std::bind(&libxmlErrorHandler<Fmt, P...>, std::placeholders::_1, p...);
	}

	typedef UPtr<xmlDoc> xmlDocSPtr;
	typedef UPtr<xmlXPathContext> xmlXPathContextSPtr;
	typedef UPtr<xmlXPathObject> xmlXPathObjectSPtr;

	AdHocFormatter(Read, "Failed to read in %? (%?) [%?, %?]");
	static auto getDoc(const ::std::string & url, int flags)
	{
		return make_unique(htmlReadFile, xmlFreeDoc, lEHB<Read>(url, flags), url.c_str(), (const char*)NULL, flags);
	}

	AdHocFormatter(XPathCtx, "Failed to create xpath context in %? (%?)");
	static auto getXPathCxt(const xmlDocSPtr & doc)
	{
		return make_unique(xmlXPathNewContext, xmlXPathFreeContext, lEHB<XPathCtx>(), doc.get());
	}

	AdHocFormatter(XPathEval, "Failed to evaluate xpath in %? (%?) [%?]");
	static auto getXPathObj(const ::std::string & xpath, const xmlXPathContextSPtr & ctx, xmlXPathObjectType type)
	{
		auto xpathObj = make_unique(xmlXPathEvalExpression, xmlXPathFreeObject, lEHB<XPathEval>(xpath), BAD_CAST xpath.c_str(), ctx.get());
		if (xpathObj->type != type) {
			throw XmlError("Xpath evaluates to wrong type " + xpath);
		}
		return xpathObj;
	}

	void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const
	{
		auto fmt = AdHoc::Buffer::getFormat(s->baseurl);
		auto url = (*fmt % fn).str();
		auto doc = getDoc(url,
				HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD | HTML_PARSE_NOIMPLIED |
				HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
		auto xpathCtx = getXPathCxt(doc);
		auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET);
		log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath);
		for (int row = 0; row < xpathObj->nodesetval->nodeNr; row += 1) {
			xpathCtx->node = xpathObj->nodesetval->nodeTab[row];
			auto xpathObjI = getXPathObj(s->urlxpath, xpathCtx, xmlXPathObjectType::XPATH_STRING);
			if (xpathObjI->stringval && *xpathObjI->stringval) {
				sh.push_back(new SearchHit(0, s->id, (const char *) xpathObjI->stringval));
			}
		}
	}

	SearchHits SearchImpl::getMatches(const ::std::string & fn, const ::Ice::Current & c)
	{
		SearchHits sh;
		for (const auto & s : getServices(c)) {
			callService(fn, s, sh);
		}
		return sh;
	}

	::IceUtil::Optional<::std::string> SearchImpl::feelingLucky(const ::std::string & fn, const ::Ice::Current & c)
	{
		const auto ms = getMatches(fn, c);
		if (ms.empty())
			return IceUtil::None;
		return ms.front()->url;
	}
}