diff options
| -rw-r--r-- | service/Jamfile.jam | 2 | ||||
| -rw-r--r-- | service/apiImpl.cpp | 62 | ||||
| -rw-r--r-- | service/main.cpp | 12 | 
3 files changed, 70 insertions, 6 deletions
diff --git a/service/Jamfile.jam b/service/Jamfile.jam index e7bfed9..8f15002 100644 --- a/service/Jamfile.jam +++ b/service/Jamfile.jam @@ -5,11 +5,13 @@ import testing ;  lib boost_utf : : <name>boost_unit_test_framework ;  lib dbpp-postgresql : : : : <include>/usr/include/dbpp-postgresql ;  lib dryice : : : : <include>/usr/include/icetray ; +lib curl ;  lib mirrorsearch :  	[ glob *.cpp *.ice sql/*.sql : test.cpp ]  	:  	<slicer>yes +	<library>curl  	<library>..//adhocutil  	<library>..//dbppcore  	<library>..//boost_system diff --git a/service/apiImpl.cpp b/service/apiImpl.cpp index 28d88b4..ab86d98 100644 --- a/service/apiImpl.cpp +++ b/service/apiImpl.cpp @@ -9,6 +9,10 @@  #include <libxml/xpathInternals.h>  #include <libxml/HTMLparser.h>  #include <libxml/HTMLtree.h> +#include <curl/curl.h> + +#define CESSO(curl, opt, expr) \ +	BOOST_VERIFY_MSG(CURLE_OK == curl_easy_setopt(curl.get(), opt, expr), "Failed setting option " #opt);  namespace MirrorSearch {  	SearchImpl::SearchImpl(IceTray::DatabasePoolPtr db) : @@ -34,14 +38,62 @@ namespace MirrorSearch {  		return std::bind(&libxmlErrorHandler<Fmt, P...>, std::placeholders::_1, p...);  	} +	template<typename Fmt, typename ... P> +	void +	curlErrorHandler(const std::string & fn, const char * errbuf, const P & ... p) +	{ +		throw XmlError(Fmt::get(fn, errbuf, p...)); +	} +	template<typename Fmt, typename ... P> +	auto cEHB(const char * errbuf, const P & ... p) +	{ +		return std::bind(&curlErrorHandler<Fmt, P...>, std::placeholders::_1, errbuf, p...); +	} +  	typedef UPtr<xmlDoc> xmlDocSPtr;  	typedef UPtr<xmlXPathContext> xmlXPathContextSPtr;  	typedef UPtr<xmlXPathObject> xmlXPathObjectSPtr; +	typedef UPtr<xmlParserCtxt> xmlParserCtxtSPtr; + +	typedef std::function<size_t(const char *, size_t)> CurlWriteCallback; -	AdHocFormatter(Read, "Failed to read in %? (%?) [%?, %?]"); -	static auto getDoc(const ::std::string & url, int flags) +	static size_t write_callback(char * ptr, size_t size, size_t nmemb, void * userdata)  	{ -		return make_unique(htmlReadFile, xmlFreeDoc, lEHB<Read>(url, flags), url.c_str(), (const char*)NULL, flags); +		return (*(MirrorSearch::CurlWriteCallback *)(userdata))(ptr, size * nmemb); +	} + +	AdHocFormatter(Read, "Failed to read in %? (%?) [%?]"); +	UPtr<xmlDoc> getDoc(const SearchServicePtr & ss, const std::string & fn) { +		auto fmt = AdHoc::Buffer::getFormat(ss->baseurl); +		auto url = (*fmt % fn).str(); +		char errbuf[CURL_ERROR_SIZE] = ""; + +		xmlParserCtxtSPtr ctx { nullptr, nullptr }; + +		auto curl = make_unique(curl_easy_init, curl_easy_cleanup, cEHB<Read>(errbuf, url)); +		BOOST_ASSERT(curl); +		CESSO(curl, CURLOPT_URL, url.c_str()); +		CESSO(curl, CURLOPT_WRITEFUNCTION, write_callback); +		CurlWriteCallback cb = [&ctx, &url, &ss](auto data, auto size) { +			if (!ctx) { +				ctx = make_unique(htmlCreatePushParserCtxt, htmlFreeParserCtxt, lEHB<Read>(url), +						(xmlSAXHandlerPtr)NULL, (void*)NULL, data, size, url.c_str(), XML_CHAR_ENCODING_NONE); +				htmlCtxtUseOptions(ctx.get(), ss->parserflags); +			} +			else { +				htmlParseChunk(ctx.get(), data, size, 0); +			} +			return size; +		}; +		CESSO(curl, CURLOPT_WRITEDATA, &cb); +		if (curl_easy_perform(curl.get()) != CURLE_OK) { +			curlErrorHandler<Read>(failingFunction((void*)&curl_easy_perform), errbuf, url); +		} +		BOOST_VERIFY_MSG(ctx, "No ctx and no previous error should never happen."); +		htmlParseChunk(ctx.get(), "", 0, 1); + +		UPtr<xmlDoc> doc = { ctx->myDoc, xmlFreeDoc }; +		return doc;  	}  	AdHocFormatter(XPathCtx, "Failed to create xpath context in %? (%?)"); @@ -62,9 +114,7 @@ namespace MirrorSearch {  	void SearchImpl::callService(const ::std::string & fn, const SearchServicePtr & s, SearchHits & sh) const  	{ -		auto fmt = AdHoc::Buffer::getFormat(s->baseurl); -		auto url = (*fmt % fn).str(); -		auto doc = getDoc(url, s->parserflags); +		auto doc = getDoc(s, fn);  		auto xpathCtx = getXPathCxt(doc);  		auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET);  		log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath); diff --git a/service/main.cpp b/service/main.cpp index 97bc89e..00563f4 100644 --- a/service/main.cpp +++ b/service/main.cpp @@ -2,10 +2,22 @@  #include <Ice/ObjectAdapter.h>  #include <icetrayService.h>  #include "apiImpl.h" +#include <curl/curl.h> +#include <libxml/parser.h>  namespace MirrorSearch {  	class Api : public IceTray::Service {  		public: +			Api() +			{ +				xmlInitParser(); +				curl_global_init(0); +			} +			~Api() +			{ +				xmlCleanupParser(); +				curl_global_cleanup(); +			}  			void addObjects(const std::string &, const Ice::CommunicatorPtr & ic, const Ice::StringSeq &, const Ice::ObjectAdapterPtr & adp) override  			{  				auto dbpool = getConnectionPool(ic, "postgresql", "MirrorSearch");  | 
