summaryrefslogtreecommitdiff
path: root/service
diff options
context:
space:
mode:
authorDan Goodliffe <dan@randomdan.homeip.net>2018-03-07 19:33:36 +0000
committerDan Goodliffe <dan@randomdan.homeip.net>2018-03-07 19:33:36 +0000
commit6eafb8d4bbc9bfdd225723dbd1ed902d722c7f71 (patch)
tree2b11754eede3d672d47310645db9f023131497a5 /service
parentStore parser flags in service config (diff)
downloadmirrorsearch-6eafb8d4bbc9bfdd225723dbd1ed902d722c7f71.tar.bz2
mirrorsearch-6eafb8d4bbc9bfdd225723dbd1ed902d722c7f71.tar.xz
mirrorsearch-6eafb8d4bbc9bfdd225723dbd1ed902d722c7f71.zip
Use curl instead of libxml2's networking as we need more control over HTTP requests
Diffstat (limited to 'service')
-rw-r--r--service/Jamfile.jam2
-rw-r--r--service/apiImpl.cpp62
-rw-r--r--service/main.cpp12
3 files changed, 70 insertions, 6 deletions
diff --git a/service/Jamfile.jam b/service/Jamfile.jam
index e7bfed9..8f15002 100644
--- a/service/Jamfile.jam
+++ b/service/Jamfile.jam
@@ -5,11 +5,13 @@ import testing ;
lib boost_utf : : <name>boost_unit_test_framework ;
lib dbpp-postgresql : : : : <include>/usr/include/dbpp-postgresql ;
lib dryice : : : : <include>/usr/include/icetray ;
+lib curl ;
lib mirrorsearch :
[ glob *.cpp *.ice sql/*.sql : test.cpp ]
:
<slicer>yes
+ <library>curl
<library>..//adhocutil
<library>..//dbppcore
<library>..//boost_system
diff --git a/service/apiImpl.cpp b/service/apiImpl.cpp
index 28d88b4..ab86d98 100644
--- a/service/apiImpl.cpp
+++ b/service/apiImpl.cpp
@@ -9,6 +9,10 @@
#include <libxml/xpathInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
+#include <curl/curl.h>
+
+#define CESSO(curl, opt, expr) \
+ BOOST_VERIFY_MSG(CURLE_OK == curl_easy_setopt(curl.get(), opt, expr), "Failed setting option " #opt);
namespace MirrorSearch {
SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) :
@@ -34,14 +38,62 @@ namespace MirrorSearch {
return std::bind(&libxmlErrorHandler<Fmt, P...>, std::placeholders::_1, p...);
}
+ template<typename Fmt, typename ... P>
+ void
+ curlErrorHandler(const std::string & fn, const char * errbuf, const P & ... p)
+ {
+ throw XmlError(Fmt::get(fn, errbuf, p...));
+ }
+ template<typename Fmt, typename ... P>
+ auto cEHB(const char * errbuf, const P & ... p)
+ {
+ return std::bind(&curlErrorHandler<Fmt, P...>, std::placeholders::_1, errbuf, p...);
+ }
+
typedef UPtr<xmlDoc> xmlDocSPtr;
typedef UPtr<xmlXPathContext> xmlXPathContextSPtr;
typedef UPtr<xmlXPathObject> xmlXPathObjectSPtr;
+ typedef UPtr<xmlParserCtxt> xmlParserCtxtSPtr;
+
+ typedef std::function<size_t(const char *, size_t)> CurlWriteCallback;
- AdHocFormatter(Read, "Failed to read in %? (%?) [%?, %?]");
- static auto getDoc(const ::std::string & url, int flags)
+ static size_t write_callback(char * ptr, size_t size, size_t nmemb, void * userdata)
{
- return make_unique(htmlReadFile, xmlFreeDoc, lEHB<Read>(url, flags), url.c_str(), (const char*)NULL, flags);
+ return (*(MirrorSearch::CurlWriteCallback *)(userdata))(ptr, size * nmemb);
+ }
+
+ AdHocFormatter(Read, "Failed to read in %? (%?) [%?]");
+ UPtr<xmlDoc> getDoc(const SearchServicePtr & ss, const std::string & fn) {
+ auto fmt = AdHoc::Buffer::getFormat(ss->baseurl);
+ auto url = (*fmt % fn).str();
+ char errbuf[CURL_ERROR_SIZE] = "";
+
+ xmlParserCtxtSPtr ctx { nullptr, nullptr };
+
+ auto curl = make_unique(curl_easy_init, curl_easy_cleanup, cEHB<Read>(errbuf, url));
+ BOOST_ASSERT(curl);
+ CESSO(curl, CURLOPT_URL, url.c_str());
+ CESSO(curl, CURLOPT_WRITEFUNCTION, write_callback);
+ CurlWriteCallback cb = [&ctx, &url, &ss](auto data, auto size) {
+ if (!ctx) {
+ ctx = make_unique(htmlCreatePushParserCtxt, htmlFreeParserCtxt, lEHB<Read>(url),
+ (xmlSAXHandlerPtr)NULL, (void*)NULL, data, size, url.c_str(), XML_CHAR_ENCODING_NONE);
+ htmlCtxtUseOptions(ctx.get(), ss->parserflags);
+ }
+ else {
+ htmlParseChunk(ctx.get(), data, size, 0);
+ }
+ return size;
+ };
+ CESSO(curl, CURLOPT_WRITEDATA, &cb);
+ if (curl_easy_perform(curl.get()) != CURLE_OK) {
+ curlErrorHandler<Read>(failingFunction((void*)&curl_easy_perform), errbuf, url);
+ }
+ BOOST_VERIFY_MSG(ctx, "No ctx and no previous error should never happen.");
+ htmlParseChunk(ctx.get(), "", 0, 1);
+
+ UPtr<xmlDoc> doc = { ctx->myDoc, xmlFreeDoc };
+ return doc;
}
AdHocFormatter(XPathCtx, "Failed to create xpath context in %? (%?)");
@@ -62,9 +114,7 @@ namespace MirrorSearch {
void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const
{
- auto fmt = AdHoc::Buffer::getFormat(s->baseurl);
- auto url = (*fmt % fn).str();
- auto doc = getDoc(url, s->parserflags);
+ auto doc = getDoc(s, fn);
auto xpathCtx = getXPathCxt(doc);
auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET);
log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath);
diff --git a/service/main.cpp b/service/main.cpp
index 97bc89e..00563f4 100644
--- a/service/main.cpp
+++ b/service/main.cpp
@@ -2,10 +2,22 @@
#include <Ice/ObjectAdapter.h>
#include <icetrayService.h>
#include "apiImpl.h"
+#include <curl/curl.h>
+#include <libxml/parser.h>
namespace MirrorSearch {
class Api : public IceTray::Service {
public:
+ Api()
+ {
+ xmlInitParser();
+ curl_global_init(0);
+ }
+ ~Api()
+ {
+ xmlCleanupParser();
+ curl_global_cleanup();
+ }
void addObjects(const std::string &, const Ice::CommunicatorPtr & ic, const Ice::StringSeq &, const Ice::ObjectAdapterPtr & adp) override
{
auto dbpool = getConnectionPool(ic, "postgresql", "MirrorSearch");