From d3a9ed2552416b3caef079649a920057f8c48f27 Mon Sep 17 00:00:00 2001 From: randomdan Date: Mon, 9 Jan 2012 21:03:19 +0000 Subject: Add some configurability to the Html2Text transform Add option for repeating a testCgi run for benchmarking --- project2/cgi/testCgi.cpp | 10 ++++++- project2/xml/transformText.cpp | 59 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/project2/cgi/testCgi.cpp b/project2/cgi/testCgi.cpp index 624f2d7..6250cb0 100644 --- a/project2/cgi/testCgi.cpp +++ b/project2/cgi/testCgi.cpp @@ -56,6 +56,7 @@ class TestInput : public cgicc::CgiInput { TESTOPT("HTTP_REFERER", "", "Referrer") TESTOPT("HTTP_COOKIE", "", "Cookie") TESTOPT("HTTPS", "No", "HTTPS?") + ("runCount", Options::value(&runCount, 1), "Repeat run this many times") ; FileOptions fo(".testCgi.settings"); opts.reset(); @@ -71,15 +72,22 @@ class TestInput : public cgicc::CgiInput { StrPtr def(new std::string()); return *defaultMapFind(optStore, varName, def); } + void run() + { + for (int run = 0; run < runCount; run += 1) { + cgiServe(this, std::cout); + } + } private: Options opts; + int runCount; }; int main(int, char **) { TestInput ti; - cgiServe(&ti, std::cout); + ti.run(); } diff --git a/project2/xml/transformText.cpp b/project2/xml/transformText.cpp index 2002734..efe0c0b 100644 --- a/project2/xml/transformText.cpp +++ b/project2/xml/transformText.cpp @@ -34,19 +34,53 @@ TextDocument::getContentClass() const { return ClassPlain; } +class TransformHtmlToText; +class TransformHtmlToTextLoader : public TransformLoaderImpl { + public: + TransformHtmlToTextLoader() : + opts("Transform HTML to text options") + { + opts + ("tx.html2txt.width", Options::value(&defaultWidth, 105), + "Default width in the resulting text document") + ("tx.html2txt.links", Options::value(&defaultLinks, false), + "Default flag for whether the document should contain link references"); + } + const Options * options() const + { + return &opts; + } + + static int defaultWidth; + static bool defaultLinks; + + private: + Options opts; +}; + class TransformHtmlToText : public TransformImpl { public: + TransformHtmlToText() : + width(TransformHtmlToTextLoader::defaultWidth), + links(TransformHtmlToTextLoader::defaultLinks) { + } void transform(const HtmlDocument * cdoc, TextDocument * str) const { xmlDoc * doc = const_cast(cdoc->doc); str->doc.clear(); int fds[2]; - const char * callLynx[] = { -#ifdef STRACE_LYNX - "/usr/bin/strace", "-o", "/tmp/lynx", -#endif - "/usr/bin/lynx", "-dump", "-stdin", "-width=105", "-nonumbers", "-nolist", NULL }; - popenrw(callLynx, fds); + std::vector callLynx; + callLynx.push_back("/usr/bin/lynx"); + callLynx.push_back("-dump"); + callLynx.push_back("-stdin"); + std::string widthArg = "-width=" + width.as(); + callLynx.push_back(widthArg.c_str()); + if (!links) { + callLynx.push_back("-nonumbers"); + callLynx.push_back("-nolist"); + } + callLynx.push_back(NULL); + popenrw(&callLynx.front(), fds); FILE * lynxIn = fdopen(fds[0], "w"); // Fixed encoding as we want the result to go back into a ustring htmlNodeDumpFileFormat(lynxIn, doc, xmlDocGetRootElement(doc), "utf-8", 0); @@ -64,8 +98,19 @@ class TransformHtmlToText : public TransformImpl { throw std::runtime_error("Lynx failed"); } } + void configure(ScriptNodePtr s) + { + s->applyValue("width", width); + s->applyValue("links", links); + } + private: + VariableType width; + VariableType links; }; -DECLARE_TRANSFORM(TransformHtmlToText); +int TransformHtmlToTextLoader::defaultWidth; +bool TransformHtmlToTextLoader::defaultLinks; + +DECLARE_CUSTOM_COMPONENT_LOADER("TransformHtmlToText", TransformHtmlToText, TransformHtmlToTextLoader, TransformLoader); DECLARE_TRANSFORMTARGET("textdocument", TextDocument) -- cgit v1.2.3