summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrandomdan <randomdan@localhost>2012-01-09 21:03:19 +0000
committerrandomdan <randomdan@localhost>2012-01-09 21:03:19 +0000
commitd3a9ed2552416b3caef079649a920057f8c48f27 (patch)
treedd0528be91331a27c1c592c7745c2702b6995b28
parentDon't implicitally follow symlinks (diff)
downloadproject2-d3a9ed2552416b3caef079649a920057f8c48f27.tar.bz2
project2-d3a9ed2552416b3caef079649a920057f8c48f27.tar.xz
project2-d3a9ed2552416b3caef079649a920057f8c48f27.zip
Add some configurability to the Html2Text transform
Add option for repeating a testCgi run for benchmarking
-rw-r--r--project2/cgi/testCgi.cpp10
-rw-r--r--project2/xml/transformText.cpp59
2 files changed, 61 insertions, 8 deletions
diff --git a/project2/cgi/testCgi.cpp b/project2/cgi/testCgi.cpp
index 624f2d7..6250cb0 100644
--- a/project2/cgi/testCgi.cpp
+++ b/project2/cgi/testCgi.cpp
@@ -56,6 +56,7 @@ class TestInput : public cgicc::CgiInput {
TESTOPT("HTTP_REFERER", "", "Referrer")
TESTOPT("HTTP_COOKIE", "", "Cookie")
TESTOPT("HTTPS", "No", "HTTPS?")
+ ("runCount", Options::value(&runCount, 1), "Repeat run this many times")
;
FileOptions fo(".testCgi.settings");
opts.reset();
@@ -71,15 +72,22 @@ class TestInput : public cgicc::CgiInput {
StrPtr def(new std::string());
return *defaultMapFind(optStore, varName, def);
}
+ void run()
+ {
+ for (int run = 0; run < runCount; run += 1) {
+ cgiServe(this, std::cout);
+ }
+ }
private:
Options opts;
+ int runCount;
};
int
main(int, char **)
{
TestInput ti;
- cgiServe(&ti, std::cout);
+ ti.run();
}
diff --git a/project2/xml/transformText.cpp b/project2/xml/transformText.cpp
index 2002734..efe0c0b 100644
--- a/project2/xml/transformText.cpp
+++ b/project2/xml/transformText.cpp
@@ -34,19 +34,53 @@ TextDocument::getContentClass() const {
return ClassPlain;
}
+class TransformHtmlToText;
+class TransformHtmlToTextLoader : public TransformLoaderImpl<TransformHtmlToText> {
+ public:
+ TransformHtmlToTextLoader() :
+ opts("Transform HTML to text options")
+ {
+ opts
+ ("tx.html2txt.width", Options::value(&defaultWidth, 105),
+ "Default width in the resulting text document")
+ ("tx.html2txt.links", Options::value(&defaultLinks, false),
+ "Default flag for whether the document should contain link references");
+ }
+ const Options * options() const
+ {
+ return &opts;
+ }
+
+ static int defaultWidth;
+ static bool defaultLinks;
+
+ private:
+ Options opts;
+};
+
class TransformHtmlToText : public TransformImpl<HtmlDocument, TextDocument> {
public:
+ TransformHtmlToText() :
+ width(TransformHtmlToTextLoader::defaultWidth),
+ links(TransformHtmlToTextLoader::defaultLinks) {
+ }
void transform(const HtmlDocument * cdoc, TextDocument * str) const
{
xmlDoc * doc = const_cast<xmlDoc *>(cdoc->doc);
str->doc.clear();
int fds[2];
- const char * callLynx[] = {
-#ifdef STRACE_LYNX
- "/usr/bin/strace", "-o", "/tmp/lynx",
-#endif
- "/usr/bin/lynx", "-dump", "-stdin", "-width=105", "-nonumbers", "-nolist", NULL };
- popenrw(callLynx, fds);
+ std::vector<const char *> callLynx;
+ callLynx.push_back("/usr/bin/lynx");
+ callLynx.push_back("-dump");
+ callLynx.push_back("-stdin");
+ std::string widthArg = "-width=" + width.as<std::string>();
+ callLynx.push_back(widthArg.c_str());
+ if (!links) {
+ callLynx.push_back("-nonumbers");
+ callLynx.push_back("-nolist");
+ }
+ callLynx.push_back(NULL);
+ popenrw(&callLynx.front(), fds);
FILE * lynxIn = fdopen(fds[0], "w");
// Fixed encoding as we want the result to go back into a ustring
htmlNodeDumpFileFormat(lynxIn, doc, xmlDocGetRootElement(doc), "utf-8", 0);
@@ -64,8 +98,19 @@ class TransformHtmlToText : public TransformImpl<HtmlDocument, TextDocument> {
throw std::runtime_error("Lynx failed");
}
}
+ void configure(ScriptNodePtr s)
+ {
+ s->applyValue("width", width);
+ s->applyValue("links", links);
+ }
+ private:
+ VariableType width;
+ VariableType links;
};
-DECLARE_TRANSFORM(TransformHtmlToText);
+int TransformHtmlToTextLoader::defaultWidth;
+bool TransformHtmlToTextLoader::defaultLinks;
+
+DECLARE_CUSTOM_COMPONENT_LOADER("TransformHtmlToText", TransformHtmlToText, TransformHtmlToTextLoader, TransformLoader);
DECLARE_TRANSFORMTARGET("textdocument", TextDocument)