From 6eafb8d4bbc9bfdd225723dbd1ed902d722c7f71 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Wed, 7 Mar 2018 19:33:36 +0000 Subject: Use curl instead of libxml2's networking as we need more control over HTTP requests --- service/Jamfile.jam | 2 ++ service/apiImpl.cpp | 62 +++++++++++++++++++++++++++++++++++++++++++++++------ service/main.cpp | 12 +++++++++++ 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/service/Jamfile.jam b/service/Jamfile.jam index e7bfed9..8f15002 100644 --- a/service/Jamfile.jam +++ b/service/Jamfile.jam @@ -5,11 +5,13 @@ import testing ; lib boost_utf : : boost_unit_test_framework ; lib dbpp-postgresql : : : : /usr/include/dbpp-postgresql ; lib dryice : : : : /usr/include/icetray ; +lib curl ; lib mirrorsearch : [ glob *.cpp *.ice sql/*.sql : test.cpp ] : yes + curl ..//adhocutil ..//dbppcore ..//boost_system diff --git a/service/apiImpl.cpp b/service/apiImpl.cpp index 28d88b4..ab86d98 100644 --- a/service/apiImpl.cpp +++ b/service/apiImpl.cpp @@ -9,6 +9,10 @@ #include #include #include +#include + +#define CESSO(curl, opt, expr) \ + BOOST_VERIFY_MSG(CURLE_OK == curl_easy_setopt(curl.get(), opt, expr), "Failed setting option " #opt); namespace MirrorSearch { SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) : @@ -34,14 +38,62 @@ namespace MirrorSearch { return std::bind(&libxmlErrorHandler, std::placeholders::_1, p...); } + template + void + curlErrorHandler(const std::string & fn, const char * errbuf, const P & ... p) + { + throw XmlError(Fmt::get(fn, errbuf, p...)); + } + template + auto cEHB(const char * errbuf, const P & ... p) + { + return std::bind(&curlErrorHandler, std::placeholders::_1, errbuf, p...); + } + typedef UPtr xmlDocSPtr; typedef UPtr xmlXPathContextSPtr; typedef UPtr xmlXPathObjectSPtr; + typedef UPtr xmlParserCtxtSPtr; + + typedef std::function CurlWriteCallback; - AdHocFormatter(Read, "Failed to read in %? (%?) [%?, %?]"); - static auto getDoc(const ::std::string & url, int flags) + static size_t write_callback(char * ptr, size_t size, size_t nmemb, void * userdata) { - return make_unique(htmlReadFile, xmlFreeDoc, lEHB(url, flags), url.c_str(), (const char*)NULL, flags); + return (*(MirrorSearch::CurlWriteCallback *)(userdata))(ptr, size * nmemb); + } + + AdHocFormatter(Read, "Failed to read in %? (%?) [%?]"); + UPtr getDoc(const SearchServicePtr & ss, const std::string & fn) { + auto fmt = AdHoc::Buffer::getFormat(ss->baseurl); + auto url = (*fmt % fn).str(); + char errbuf[CURL_ERROR_SIZE] = ""; + + xmlParserCtxtSPtr ctx { nullptr, nullptr }; + + auto curl = make_unique(curl_easy_init, curl_easy_cleanup, cEHB(errbuf, url)); + BOOST_ASSERT(curl); + CESSO(curl, CURLOPT_URL, url.c_str()); + CESSO(curl, CURLOPT_WRITEFUNCTION, write_callback); + CurlWriteCallback cb = [&ctx, &url, &ss](auto data, auto size) { + if (!ctx) { + ctx = make_unique(htmlCreatePushParserCtxt, htmlFreeParserCtxt, lEHB(url), + (xmlSAXHandlerPtr)NULL, (void*)NULL, data, size, url.c_str(), XML_CHAR_ENCODING_NONE); + htmlCtxtUseOptions(ctx.get(), ss->parserflags); + } + else { + htmlParseChunk(ctx.get(), data, size, 0); + } + return size; + }; + CESSO(curl, CURLOPT_WRITEDATA, &cb); + if (curl_easy_perform(curl.get()) != CURLE_OK) { + curlErrorHandler(failingFunction((void*)&curl_easy_perform), errbuf, url); + } + BOOST_VERIFY_MSG(ctx, "No ctx and no previous error should never happen."); + htmlParseChunk(ctx.get(), "", 0, 1); + + UPtr doc = { ctx->myDoc, xmlFreeDoc }; + return doc; } AdHocFormatter(XPathCtx, "Failed to create xpath context in %? (%?)"); @@ -62,9 +114,7 @@ namespace MirrorSearch { void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const { - auto fmt = AdHoc::Buffer::getFormat(s->baseurl); - auto url = (*fmt % fn).str(); - auto doc = getDoc(url, s->parserflags); + auto doc = getDoc(s, fn); auto xpathCtx = getXPathCxt(doc); auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET); log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath); diff --git a/service/main.cpp b/service/main.cpp index 97bc89e..00563f4 100644 --- a/service/main.cpp +++ b/service/main.cpp @@ -2,10 +2,22 @@ #include #include #include "apiImpl.h" +#include +#include namespace MirrorSearch { class Api : public IceTray::Service { public: + Api() + { + xmlInitParser(); + curl_global_init(0); + } + ~Api() + { + xmlCleanupParser(); + curl_global_cleanup(); + } void addObjects(const std::string &, const Ice::CommunicatorPtr & ic, const Ice::StringSeq &, const Ice::ObjectAdapterPtr & adp) override { auto dbpool = getConnectionPool(ic, "postgresql", "MirrorSearch"); -- cgit v1.2.3