diff options
author | randomdan <randomdan@localhost> | 2012-01-09 21:03:19 +0000 |
---|---|---|
committer | randomdan <randomdan@localhost> | 2012-01-09 21:03:19 +0000 |
commit | d3a9ed2552416b3caef079649a920057f8c48f27 (patch) | |
tree | dd0528be91331a27c1c592c7745c2702b6995b28 | |
parent | Don't implicitally follow symlinks (diff) | |
download | project2-d3a9ed2552416b3caef079649a920057f8c48f27.tar.bz2 project2-d3a9ed2552416b3caef079649a920057f8c48f27.tar.xz project2-d3a9ed2552416b3caef079649a920057f8c48f27.zip |
Add some configurability to the Html2Text transform
Add option for repeating a testCgi run for benchmarking
-rw-r--r-- | project2/cgi/testCgi.cpp | 10 | ||||
-rw-r--r-- | project2/xml/transformText.cpp | 59 |
2 files changed, 61 insertions, 8 deletions
diff --git a/project2/cgi/testCgi.cpp b/project2/cgi/testCgi.cpp index 624f2d7..6250cb0 100644 --- a/project2/cgi/testCgi.cpp +++ b/project2/cgi/testCgi.cpp @@ -56,6 +56,7 @@ class TestInput : public cgicc::CgiInput { TESTOPT("HTTP_REFERER", "", "Referrer") TESTOPT("HTTP_COOKIE", "", "Cookie") TESTOPT("HTTPS", "No", "HTTPS?") + ("runCount", Options::value(&runCount, 1), "Repeat run this many times") ; FileOptions fo(".testCgi.settings"); opts.reset(); @@ -71,15 +72,22 @@ class TestInput : public cgicc::CgiInput { StrPtr def(new std::string()); return *defaultMapFind(optStore, varName, def); } + void run() + { + for (int run = 0; run < runCount; run += 1) { + cgiServe(this, std::cout); + } + } private: Options opts; + int runCount; }; int main(int, char **) { TestInput ti; - cgiServe(&ti, std::cout); + ti.run(); } diff --git a/project2/xml/transformText.cpp b/project2/xml/transformText.cpp index 2002734..efe0c0b 100644 --- a/project2/xml/transformText.cpp +++ b/project2/xml/transformText.cpp @@ -34,19 +34,53 @@ TextDocument::getContentClass() const { return ClassPlain; } +class TransformHtmlToText; +class TransformHtmlToTextLoader : public TransformLoaderImpl<TransformHtmlToText> { + public: + TransformHtmlToTextLoader() : + opts("Transform HTML to text options") + { + opts + ("tx.html2txt.width", Options::value(&defaultWidth, 105), + "Default width in the resulting text document") + ("tx.html2txt.links", Options::value(&defaultLinks, false), + "Default flag for whether the document should contain link references"); + } + const Options * options() const + { + return &opts; + } + + static int defaultWidth; + static bool defaultLinks; + + private: + Options opts; +}; + class TransformHtmlToText : public TransformImpl<HtmlDocument, TextDocument> { public: + TransformHtmlToText() : + width(TransformHtmlToTextLoader::defaultWidth), + links(TransformHtmlToTextLoader::defaultLinks) { + } void transform(const HtmlDocument * cdoc, TextDocument * str) const { xmlDoc * doc = const_cast<xmlDoc *>(cdoc->doc); str->doc.clear(); int fds[2]; - const char * callLynx[] = { -#ifdef STRACE_LYNX - "/usr/bin/strace", "-o", "/tmp/lynx", -#endif - "/usr/bin/lynx", "-dump", "-stdin", "-width=105", "-nonumbers", "-nolist", NULL }; - popenrw(callLynx, fds); + std::vector<const char *> callLynx; + callLynx.push_back("/usr/bin/lynx"); + callLynx.push_back("-dump"); + callLynx.push_back("-stdin"); + std::string widthArg = "-width=" + width.as<std::string>(); + callLynx.push_back(widthArg.c_str()); + if (!links) { + callLynx.push_back("-nonumbers"); + callLynx.push_back("-nolist"); + } + callLynx.push_back(NULL); + popenrw(&callLynx.front(), fds); FILE * lynxIn = fdopen(fds[0], "w"); // Fixed encoding as we want the result to go back into a ustring htmlNodeDumpFileFormat(lynxIn, doc, xmlDocGetRootElement(doc), "utf-8", 0); @@ -64,8 +98,19 @@ class TransformHtmlToText : public TransformImpl<HtmlDocument, TextDocument> { throw std::runtime_error("Lynx failed"); } } + void configure(ScriptNodePtr s) + { + s->applyValue("width", width); + s->applyValue("links", links); + } + private: + VariableType width; + VariableType links; }; -DECLARE_TRANSFORM(TransformHtmlToText); +int TransformHtmlToTextLoader::defaultWidth; +bool TransformHtmlToTextLoader::defaultLinks; + +DECLARE_CUSTOM_COMPONENT_LOADER("TransformHtmlToText", TransformHtmlToText, TransformHtmlToTextLoader, TransformLoader); DECLARE_TRANSFORMTARGET("textdocument", TextDocument) |