From 6fa901871df81cd8ecdc07d30ef1a20d8c7b1b1d Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Wed, 7 Mar 2018 00:03:51 +0000 Subject: Store parser flags in service config --- service/apiImpl.cpp | 4 +--- service/data.sql | 4 ++-- service/models.ice | 1 + service/schema.sql | 1 + service/sql/getServices.sql | 2 +- service/test.cpp | 1 + 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/service/apiImpl.cpp b/service/apiImpl.cpp index 1de31cb..28d88b4 100644 --- a/service/apiImpl.cpp +++ b/service/apiImpl.cpp @@ -64,9 +64,7 @@ namespace MirrorSearch { { auto fmt = AdHoc::Buffer::getFormat(s->baseurl); auto url = (*fmt % fn).str(); - auto doc = getDoc(url, - HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD | HTML_PARSE_NOIMPLIED | - HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR); + auto doc = getDoc(url, s->parserflags); auto xpathCtx = getXPathCxt(doc); auto xpathObj = getXPathObj(s->listxpath, xpathCtx, xmlXPathObjectType::XPATH_NODESET); log->messagebf(LOG::INFO, "%d nodes matched %s", xpathObj->nodesetval->nodeNr, s->listxpath); diff --git a/service/data.sql b/service/data.sql index e14e721..b056d0c 100644 --- a/service/data.sql +++ b/service/data.sql @@ -1,3 +1,3 @@ -INSERT INTO searchservices(name, baseurl, listxpath, urlxpath) - VALUES('file searching mock', 'file://$SCRIPTDIR/fixtures/filesearching/%s.html', '//pre[@class=''list'']/a[@class=''lf'']', 'string(@href)') +INSERT INTO searchservices(name, baseurl, parserflags, listxpath, urlxpath) + VALUES('file searching mock', 'file://$SCRIPTDIR/fixtures/filesearching/%s.html', 97, '//pre[@class=''list'']/a[@class=''lf'']', 'string(@href)') ; diff --git a/service/models.ice b/service/models.ice index cb48459..971e7b9 100644 --- a/service/models.ice +++ b/service/models.ice @@ -9,6 +9,7 @@ module MirrorSearch { string baseurl; string listxpath; string urlxpath; + int parserflags; }; class SearchHit { ["slicer:db:pkey"] diff --git a/service/schema.sql b/service/schema.sql index b529172..93a87d3 100644 --- a/service/schema.sql +++ b/service/schema.sql @@ -2,6 +2,7 @@ CREATE TABLE searchservices( id int GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, name text not null, baseurl text not null, + parserflags int not null, listxpath text not null, urlxpath text not null ); diff --git a/service/sql/getServices.sql b/service/sql/getServices.sql index 40382f2..be2fba6 100644 --- a/service/sql/getServices.sql +++ b/service/sql/getServices.sql @@ -1,3 +1,3 @@ -SELECT id, name, baseurl, listxpath, urlxpath +SELECT id, name, baseurl, parserflags, listxpath, urlxpath FROM searchservices ORDER BY id diff --git a/service/test.cpp b/service/test.cpp index 8b93cf8..979d43d 100644 --- a/service/test.cpp +++ b/service/test.cpp @@ -43,6 +43,7 @@ BOOST_AUTO_TEST_CASE(getServices) BOOST_CHECK_EQUAL(ss.front()->name, "file searching mock"); BOOST_CHECK_NE(ss.front()->baseurl, "file://$SCRIPTDIR/fixtures/filesearching/%s.html"); BOOST_CHECK_EQUAL(ss.front()->baseurl.substr(0, 8), "file:///"); + BOOST_CHECK_EQUAL(ss.front()->parserflags, 97); BOOST_CHECK(!ss.front()->listxpath.empty()); BOOST_CHECK(!ss.front()->urlxpath.empty()); } -- cgit v1.2.3