From d3a9ed2552416b3caef079649a920057f8c48f27 Mon Sep 17 00:00:00 2001
From: randomdan <randomdan@localhost>
Date: Mon, 9 Jan 2012 21:03:19 +0000
Subject: Add some configurability to the Html2Text transform Add option for
 repeating a testCgi run for benchmarking

---
 project2/cgi/testCgi.cpp       | 10 ++++++-
 project2/xml/transformText.cpp | 59 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/project2/cgi/testCgi.cpp b/project2/cgi/testCgi.cpp
index 624f2d7..6250cb0 100644
--- a/project2/cgi/testCgi.cpp
+++ b/project2/cgi/testCgi.cpp
@@ -56,6 +56,7 @@ class TestInput : public cgicc::CgiInput {
 				TESTOPT("HTTP_REFERER", "", "Referrer")
 				TESTOPT("HTTP_COOKIE", "", "Cookie")
 				TESTOPT("HTTPS", "No", "HTTPS?")
+				("runCount", Options::value(&runCount, 1), "Repeat run this many times")
 				;
 			FileOptions fo(".testCgi.settings");
 			opts.reset();
@@ -71,15 +72,22 @@ class TestInput : public cgicc::CgiInput {
 			StrPtr def(new std::string());
 			return *defaultMapFind(optStore, varName, def);
 		}
+		void run()
+		{
+			for (int run = 0; run < runCount; run += 1) {
+				cgiServe(this, std::cout);
+			}
+		}
 
 	private:
 		Options opts;
+		int runCount;
 };
 
 int
 main(int, char **)
 {
 	TestInput ti;
-	cgiServe(&ti, std::cout);
+	ti.run();
 }
 
diff --git a/project2/xml/transformText.cpp b/project2/xml/transformText.cpp
index 2002734..efe0c0b 100644
--- a/project2/xml/transformText.cpp
+++ b/project2/xml/transformText.cpp
@@ -34,19 +34,53 @@ TextDocument::getContentClass() const {
 	return ClassPlain;
 }
 
+class TransformHtmlToText;
+class TransformHtmlToTextLoader : public TransformLoaderImpl<TransformHtmlToText> {
+	public:
+		TransformHtmlToTextLoader() :
+			opts("Transform HTML to text options")
+		{
+			opts
+				("tx.html2txt.width", Options::value(&defaultWidth, 105),
+				 "Default width in the resulting text document")
+				("tx.html2txt.links", Options::value(&defaultLinks, false),
+				 "Default flag for whether the document should contain link references");
+		}
+		const Options * options() const
+		{
+			return &opts;
+		}
+
+		static int defaultWidth;
+		static bool defaultLinks;
+
+	private:
+		Options opts;
+};
+
 class TransformHtmlToText : public TransformImpl<HtmlDocument, TextDocument> {
 	public:
+		TransformHtmlToText() :
+			width(TransformHtmlToTextLoader::defaultWidth),
+			links(TransformHtmlToTextLoader::defaultLinks) {
+		}
 		void transform(const HtmlDocument * cdoc, TextDocument * str) const
 		{
 			xmlDoc * doc = const_cast<xmlDoc *>(cdoc->doc);
 			str->doc.clear();
 			int fds[2];
-			const char * callLynx[] = {
-#ifdef STRACE_LYNX
-				"/usr/bin/strace", "-o", "/tmp/lynx",
-#endif
-				"/usr/bin/lynx", "-dump", "-stdin", "-width=105", "-nonumbers", "-nolist", NULL };
-			popenrw(callLynx, fds);
+			std::vector<const char *> callLynx;
+			callLynx.push_back("/usr/bin/lynx");
+			callLynx.push_back("-dump");
+			callLynx.push_back("-stdin");
+			std::string widthArg = "-width=" + width.as<std::string>();
+			callLynx.push_back(widthArg.c_str());
+			if (!links) {
+				callLynx.push_back("-nonumbers");
+				callLynx.push_back("-nolist");
+			}
+			callLynx.push_back(NULL);
+			popenrw(&callLynx.front(), fds);
 			FILE * lynxIn = fdopen(fds[0], "w");
 			// Fixed encoding as we want the result to go back into a ustring
 			htmlNodeDumpFileFormat(lynxIn, doc, xmlDocGetRootElement(doc), "utf-8", 0);
@@ -64,8 +98,19 @@ class TransformHtmlToText : public TransformImpl<HtmlDocument, TextDocument> {
 				throw std::runtime_error("Lynx failed");
 			}
 		}
+		void configure(ScriptNodePtr s)
+		{
+			s->applyValue("width", width);
+			s->applyValue("links", links);
+		}
+	private:
+		VariableType width;
+		VariableType links;
 };
 
-DECLARE_TRANSFORM(TransformHtmlToText);
+int TransformHtmlToTextLoader::defaultWidth;
+bool TransformHtmlToTextLoader::defaultLinks;
+
+DECLARE_CUSTOM_COMPONENT_LOADER("TransformHtmlToText", TransformHtmlToText, TransformHtmlToTextLoader, TransformLoader);
 DECLARE_TRANSFORMTARGET("textdocument", TextDocument)
 
-- 
cgit v1.2.3