#include "apiImpl.h" #include "uptr.h" #include #include #include #include #include #include #include #include #define CESSO(curl, opt, expr) \ BOOST_VERIFY_MSG(CURLE_OK == curl_easy_setopt(curl.get(), opt, expr), "Failed setting option " #opt); namespace MirrorSearch { SearchImpl::SearchImpl(const DB::ConnectionPoolPtr & db) : IceTray::AbstractDatabaseClient(db), log(LOGMANAGER()->getLogger()) { } SearchServices SearchImpl::getServices(const ::Ice::Current&) { return fetch(sql::getServices); } template void libxmlErrorHandler(const std::string & fn, const P & ... p) { throw XmlError(Fmt::get(fn, xmlGetLastError()->message, p...)); } template auto lEHB(const P & ... p) { return std::bind(&libxmlErrorHandler, std::placeholders::_1, p...); } template void curlErrorHandler(const std::string & fn, const char * errbuf, const P & ... p) { throw CurlError(Fmt::get(fn, errbuf, p...)); } template auto cEHB(const char * errbuf, const P & ... p) { return std::bind(&curlErrorHandler, std::placeholders::_1, errbuf, p...); } typedef UPtr xmlDocSPtr; typedef UPtr xmlXPathContextSPtr; typedef UPtr xmlXPathObjectSPtr; typedef UPtr xmlParserCtxtSPtr; typedef std::function CurlWriteCallback; static size_t write_callback(char * ptr, size_t size, size_t nmemb, void * userdata) { return (*(MirrorSearch::CurlWriteCallback *)(userdata))(ptr, size * nmemb); } AdHocFormatter(Read, "Failed to read in %? (%?) [%?]"); UPtr getDoc(const SearchServicePtr & ss, const std::string & fn) { auto fmt = AdHoc::Buffer::getFormat(ss->baseurl); auto url = (fmt % fn).str(); char errbuf[CURL_ERROR_SIZE] = ""; xmlParserCtxtSPtr ctx { nullptr, nullptr }; auto curl = make_unique(curl_easy_init, curl_easy_cleanup, cEHB(errbuf, url)); BOOST_ASSERT(curl); CESSO(curl, CURLOPT_URL, url.c_str()); CESSO(curl, CURLOPT_WRITEFUNCTION, write_callback); CurlWriteCallback cb = [&ctx, &url, &ss](auto data, auto size) { if (!ctx) { ctx = make_unique(htmlCreatePushParserCtxt, htmlFreeParserCtxt, lEHB(url), (xmlSAXHandlerPtr)NULL, (void*)NULL, data, size, url.c_str(), XML_CHAR_ENCODING_NONE); htmlCtxtUseOptions(ctx.get(), ss->parserflags); } else { htmlParseChunk(ctx.get(), data, size, 0); } return size; }; CESSO(curl, CURLOPT_WRITEDATA, &cb); if (ss->referrer) { CESSO(curl, CURLOPT_REFERER, ss->referrer->c_str()); } if (ss->useragent) { CESSO(curl, CURLOPT_USERAGENT, ss->useragent->c_str()); } CESSO(curl, CURLOPT_TIMEOUT, 5L); // Enables compression CESSO(curl, CURLOPT_ACCEPT_ENCODING, ""); CESSO(curl, CURLOPT_HTTP_CONTENT_DECODING, 1L); CESSO(curl, CURLOPT_TCP_FASTOPEN, 1L); CESSO(curl, CURLOPT_FAILONERROR, 1L); if (curl_easy_perform(curl.get()) != CURLE_OK) { curlErrorHandler(failingFunction((void*)&curl_easy_perform), errbuf, url); } if (!ctx) { throw CurlError("Did not retrieve any data."); } htmlParseChunk(ctx.get(), "", 0, 1); if (!ctx->myDoc) { throw XmlError("Could not construct a document."); } UPtr doc = { ctx->myDoc, xmlFreeDoc }; return doc; } AdHocFormatter(XPathCtx, "Failed to create xpath context in %? (%?)"); static auto getXPathCxt(const xmlDocSPtr & doc) { return make_unique(xmlXPathNewContext, xmlXPathFreeContext, lEHB(), doc.get()); } AdHocFormatter(XPathEval, "Failed to evaluate xpath in %? (%?) [%?]"); static auto getXPathObj(const ::std::string & xpath, const xmlXPathContextSPtr & ctx, xmlXPathObjectType type) { auto xpathObj = make_unique(xmlXPathEvalExpression, xmlXPathFreeObject, lEHB(xpath), BAD_CAST xpath.c_str(), ctx.get()); if (xpathObj->type != type) { throw XmlError("Xpath evaluates to wrong type " + xpath); } return xpathObj; } void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const { auto doc = getDoc(s, fn); auto xpathCtx = getXPathCxt(doc); auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET); if (!xpathObj->nodesetval) { throw XmlError("Nodeset is null"); } log->messagebf(LOG::LogLevel::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath); for (int row = 0; row < xpathObj->nodesetval->nodeNr; row += 1) { xpathCtx->node = xpathObj->nodesetval->nodeTab[row]; auto xpathObjI = getXPathObj(s->urlxpath, xpathCtx, xmlXPathObjectType::XPATH_STRING); if (xpathObjI->stringval && *xpathObjI->stringval) { sh.push_back(std::make_shared(0, s->id, (const char *) xpathObjI->stringval)); } } } SearchHits SearchImpl::getMatches(const ::std::string fn, const ::Ice::Current & c) { SearchHits sh; for (const auto & s : getServices(c)) { callService(fn, s, sh); } return sh; } ::IceUtil::Optional<::std::string> SearchImpl::feelingLucky(const ::std::string fn, const ::Ice::Current & c) { const auto ms = getMatches(fn, c); if (ms.empty()) return IceUtil::None; return ms.front()->url; } }