diff options
author | Michi Henning <michi@zeroc.com> | 2007-01-24 06:42:18 +0000 |
---|---|---|
committer | Michi Henning <michi@zeroc.com> | 2007-01-24 06:42:18 +0000 |
commit | 0a8b6ce2325aff3ecb50386b74e7aa8227b50efe (patch) | |
tree | f38f38afba31e9c7cbe835a9907f34a1962b90e1 /cpp | |
parent | *** empty log message *** (diff) | |
download | ice-0a8b6ce2325aff3ecb50386b74e7aa8227b50efe.tar.bz2 ice-0a8b6ce2325aff3ecb50386b74e7aa8227b50efe.tar.xz ice-0a8b6ce2325aff3ecb50386b74e7aa8227b50efe.zip |
*** empty log message ***
Diffstat (limited to 'cpp')
-rw-r--r-- | cpp/doc/Makefile | 4 | ||||
-rw-r--r-- | cpp/doc/htmlHeader | 61 | ||||
-rw-r--r-- | cpp/doc/indexFooter | 15 | ||||
-rwxr-xr-x | cpp/doc/swish/TemplateSlice.pm | 52 | ||||
-rw-r--r-- | cpp/doc/swish/swish.cgi | 3328 | ||||
-rw-r--r-- | cpp/doc/swish/swish.conf | 1 | ||||
-rw-r--r-- | cpp/src/slice2html/Gen.cpp | 44 | ||||
-rw-r--r-- | cpp/src/slice2html/Gen.h | 7 | ||||
-rw-r--r-- | cpp/src/slice2html/Main.cpp | 6 |
9 files changed, 3451 insertions, 67 deletions
diff --git a/cpp/doc/Makefile b/cpp/doc/Makefile index bef7ea5ec0f..1deba72da83 100644 --- a/cpp/doc/Makefile +++ b/cpp/doc/Makefile @@ -29,8 +29,8 @@ reference/index.html: $(SLICEFILES) $(MAKE) clean $(bindir)/slice2html --ice -I../slice --hdr=htmlHeader --ftr=htmlFooter \ --indexhdr=indexHeader --indexftr=indexFooter \ - --image-dir=images --logo-url="http://www.zeroc.com" --output-dir=reference --index=3 \ - --summary=120 $(SLICEFILES) + --image-dir=images --logo-url="http://www.zeroc.com" --search="/cgi-bin/swish.cgi" \ + --output-dir=reference --index=3 --summary=120 $(SLICEFILES) mkdir reference/$(IMAGES) cp $(IMAGES)/*.gif reference/$(IMAGES) cp $(JAVASCRIPT) reference diff --git a/cpp/doc/htmlHeader b/cpp/doc/htmlHeader index f945662fb98..d1bd99e135a 100644 --- a/cpp/doc/htmlHeader +++ b/cpp/doc/htmlHeader @@ -5,18 +5,55 @@ TITLE </title> <style type="text/css"> - body { font-family: Arial, Helvetica, sans-serif; } - .Page { width: 850px; margin-left: auto; margin-right: auto; } - .Symbol { font-family: "Courier New", Courier, mono; } - .Synopsis { font-family: "Courier New", Courier, mono; font-weight: bold; } - .Deprecated { font-style: italic; } - .Warning { font-style: italic; } - .Note { font-style: italic; } - .HeaderFooter { position: relative; width: 100%; } - .LogoTable { position: absolute; right: 0; top: 0; } - .Logo { border-style: none; } - .Button { border-style: none; } - .ButtonGrey { border-style: none; cursor: default; } + body { + font-family: Arial, Helvetica, sans-serif; + } + .Page { + width: 850px; + margin-left: auto; + margin-right: auto; + } + .Symbol { + font-family: "Courier New", Courier, mono; + } + .Synopsis { + font-family: "Courier New", Courier, mono; + font-weight: bold; + } + .Deprecated { + font-style: italic; + } + .Warning { + font-style: italic; + } + .Note { + font-style: italic; + } + .HeaderFooter { + position: relative; + width: 100%; + } + .LogoTable { + position: absolute; + right: 0; + top: 0; + } + .Logo { + border-style: none; + } + .Button { + border-style: none; + } + .ButtonGrey { + border-style: none; + cursor: default; + } + .SearchTable { + position: absolute; + top: 0px; + margin-left: auto; + margin-right: auto; + } </style> </head> <body> diff --git a/cpp/doc/indexFooter b/cpp/doc/indexFooter index da436303410..ef5cc5bcb68 100644 --- a/cpp/doc/indexFooter +++ b/cpp/doc/indexFooter @@ -9,6 +9,21 @@ </td> </tr> </table> + <div style="text-align: center;"> + <table class="SearchTable"> + <tr> + <td> + <form method="get" action="/cgi-bin/swish.cgi" + enctype="application/x-www-form-urlencoded" class="form"> + <div> + <input maxlength="100" value="" type="text" name="query"> + <input type="submit" value="Search" name="submit"> + </div> + </form> + </td> + </tr> + </table> + </div> <table class="LogoTable"> <tr> <td> diff --git a/cpp/doc/swish/TemplateSlice.pm b/cpp/doc/swish/TemplateSlice.pm index a1ae993ce3c..0a9a72510f5 100755 --- a/cpp/doc/swish/TemplateSlice.pm +++ b/cpp/doc/swish/TemplateSlice.pm @@ -71,31 +71,9 @@ sub page_header { </title> </head> <body> -<div style="width: 850px; margin-left: auto; margin-right: auto;"> - <div style="position: relative; width: 100%;"> - <table class="ButtonTable"> - <tr> - <td> - <a href="../doc/3.2/reference/index.html"> - <img class="HomeButton" src="../doc/images/home.gif" alt="Home" style="border-style: none"/> - </a> - </td> - <td> - <a href="../doc/3.2/reference/_sindex.html"> - <img class="IndexButton" src="../doc/images/index.gif" alt="Index" style="border-style: none"/> - </a> - </td> - </tr> - </table> - <table style="position: absolute; top: 0; right: 0;"> - <tr> - <td> - <a href="http://www.zeroc.com"><img class="Logo" src="../doc/images/logo.gif" alt="Logo" style="border-style: none;"/></a> - </td> - </tr> - </table> - </div> - <hr> + <div style="width: 850px; margin-left: auto; margin-right: auto;"> + <a href="http://www.zeroc.com"><img class="Logo" src="../doc/images/logo.gif" alt="Logo" style="border-style: none;"/></a> + <hr> EOF } @@ -336,30 +314,6 @@ EOF sub footer { return <<EOF; - <hr> - <div style="position: relative; width: 100%;"> - <table class="ButtonTable"> - <tr> - <td> - <a href="../doc/3.2/reference/index.html"> - <img class="HomeButton" src="../doc/images/home.gif" alt="Home" style="border-style: none"/> - </a> - </td> - <td> - <a href="../doc/3.2/reference/_sindex.html"> - <img class="IndexButton" src="../doc/images/index.gif" alt="Index" style="border-style: none"/> - </a> - </td> - </tr> - </table> - <table style="position: absolute; top: 0; right: 0;"> - <tr> - <td> - <a href="http://www.zeroc.com"><img class="Logo" src="../doc/images/logo.gif" alt="Logo" style="border-style: none;"/></a> - </td> - </tr> - </table> - </div> </div> </body> </html> diff --git a/cpp/doc/swish/swish.cgi b/cpp/doc/swish/swish.cgi new file mode 100644 index 00000000000..75e8e85bc15 --- /dev/null +++ b/cpp/doc/swish/swish.cgi @@ -0,0 +1,3328 @@ +#!/usr/bin/perl -w +package SwishSearch; +use strict; + +# This is set to where Swish-e's "make install" installed the helper modules. +use lib ( '/usr/local/lib/swish-e/perl' ); + + +my $DEFAULT_CONFIG_FILE = '.swishcgi.conf'; + +################################################################################### +# +# If this text is displayed on your browser then your web server +# is not configured to run .cgi programs. Contact your web server administrator. +# +# To display documentation for this program type "perldoc swish.cgi" +# +# swish.cgi $Revision$ Copyright (C) 2001 Bill Moseley swishscript@hank.org +# Example CGI program for searching with SWISH-E +# +# This example program will only run under an OS that supports fork(). +# Under windows it uses a piped open which MAY NOT BE SECURE. +# +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# The above lines must remain at the top of this program +# +# $Id$ +# +#################################################################################### + +# This is written this way so the script can be used as a CGI script or a mod_perl +# module without any code changes. + +# use CGI (); # might not be needed if using Apache::Request + + +#================================================================================= +# CGI entry point +# +#================================================================================= + + +use vars '$speedy_config'; # Global for caching in persistent environment such as SpeedyCGI + +# Run the script -- entry point if running as a CGI script + + unless ( $ENV{MOD_PERL} ) { + if ( !$speedy_config ) { + $speedy_config = default_config(); + + # Merge with disk config file. + $speedy_config = merge_read_config( $speedy_config ); + } + + process_request( $speedy_config ); + } + + + + +#================================================================================== +# This sets the default configuration parameters +# +# Any configuration read from disk is merged with these settings. +# +# Only a few settings are actually required. Some reasonable defaults are used +# for most. If fact, you can probably create a complete config as: +# +# return = { +# swish_binary => '/usr/local/bin/swish-e', +# swish_index => '/usr/local/share/swish/index.swish-e', +# title_property => 'swishtitle', # Not required, but recommended +# }; +# +# But, that doesn't really show all the options. +# +# You can modify the options below, or you can use a config file. The config file +# is .swishcgi.conf by default (read from the current directory) that must return +# a hash reference. For example, to create a config file that changes the default +# title and index file name, plus uses Template::Toolkit to generate output +# create a config file as: +# +# # Example config file -- returns a hash reference +# return { +# title => 'Search Our Site', +# swish_index => 'index.web', +# +# template => { +# package => 'SWISH::TemplateToolkit', +# file => 'swish.tt', +# options => { +# INCLUDE_PATH => '/home/user/swish-e/example', +# }, +# }, +# }; +# +# +#----------------------------------------------------------------------------------- + +sub default_config { + + + + ##### Configuration Parameters ######### + + #---- This lists all the options, with many commented out --- + # By default, this config is used -- see the process_request() call below. + + # You should adjust for your site, and how your swish index was created. + + ##>> + ##>> Please don't post this entire section on the swish-e list if looking for help! + ##>> + ##>> Send a small example, without all the comments. + + #====================================================================== + # *** NOTES **** + # Items beginning with an "x" or "#" are commented out + # the "x" form simply renames (hides) that setting. It's used + # to make it easy to disable a mult-line configuation setting. + # + # If you do not understand a setting then best to leave the default. + # + # Please follow the documentation (perldoc swish.cgi) and set up + # a test using the defaults before making changes. It's much easier + # to modify a working example than to try to get a modified example to work... + # + # Again, this is a Perl hash structure. Commas are important. + #====================================================================== + + + return { + title => 'Search our site', # Title of your choice. Displays on the search page + swish_binary => '/usr/local/bin/swish-e', # Location of swish-e binary + + + # By default, this script tries to read a config file. You should probably + # comment this out if not used save a disk stat + config_file => $DEFAULT_CONFIG_FILE, # Default config file + + + # The location of your index file. Typically, this would not be in + # your web tree. + # If you have more than one index to search then specify an array + # reference. e.g. swish_index =>[ qw( index1 index2 index3 )], + + swish_index => 'index.swish-e', # Location of your index file + # See "select_indexes" below for how to + # select more than one index. + + page_size => 15, # Number of results per page - default 15 + + + + + # prepend this path to the filename (swishdocpath) returned by swish. This is used to + # make the href link back to the original document. Comment out to disable. + + #prepend_path => 'http://localhost/mydocs', + + + + # This is the property that is used for the href link back to the original + # document. It's "swishdocpath" by default + + #link_property => 'swishdocpath', + + + ## Display properties ## + + # Everything swish records about a file is called a "property". These + # next three settings tell the swish.cgi script which properties should be passed + # to the templating coded for output generation. + + + # First is the property name to use as the main link text to the indexed document. + # Typically, this will be 'swishtitle' if have indexed html documents, + # but you can specify any PropertyName defined in your document. + # By default, swish will display the pathname for documents that do not + # have a title. + # In other words, this is used for the text of the links of the search results. + # <a href="prepend_path/swishdocpath">title_property</a> + + title_property => 'swishtitle', + + + + # Swish has a configuration directive "StoreDescription" that will save part or + # all of a document's contents in the index file. This can then be displayed + # along with results. If you are indexing a lot of files this can use a lot of disk + # space, so test carefully before indexing your entire site. + # Building swish with zlib can greatly reduce the space used by StoreDescription. + # + # This settings tells this script to display this property as the description. + # Normally, this should be 'swishdescription', but you can specify another property name. + # There is no default. + + description_prop => 'swishdescription', + + + + # Property names listed here will be displayed in a table below each result + # You may wish to modify this list if you are using document properties (PropertyNames) + # in your swish-e index configuration + # There is no default. + + display_props => [qw/swishlastmodified swishdocsize swishdocpath/], + + + + + + # Results can be be sorted by any of the properties listed here + # They will be displayed in a drop-down list on the form. + # You may modify this list if you are using document properties of your own creation + # Swish uses the rank as the default sort + + sorts => [qw/swishrank swishlastmodified swishtitle swishdocpath/], + + + # Secondary_sort is used to sort within a sort + # You may enter a property name followed by a direction (asc|desc) + + secondary_sort => [qw/swishlastmodified desc/], + + + + + # You can limit by MetaNames here. Names listed here will be displayed in + # a line of radio buttons. + # The default is to not allow any metaname selection. + # To use this feature you must define MetaNames while indexing. + + # The special "swishdefault" says to search any text that was not indexed + # as a specific metaname (e.g. typically the body of a HTML document and its title). + + # To see how this might work, add to your *swish-e* config file: + # MetaNames swishtitle swishdocpath + # reindex and try: + + metanames => [qw/ swishdefault swishtitle swishdocpath /], + + # Add "all" to this list to test the meta_groups feature described below + + + + # Another example: if you indexed an email archive + # that defined the metanames subject name email (as in the swish-e discussion archive) + # you might use: + #metanames => [qw/body subject name email/], + + + # Searching multiple meta names: + + # You can also group metanames into "meta-metanames". + # Example: Say you defined metanames "author", "comment" and "keywords" + # You want to allow searching "author", "comment" and the document body ("swishdefault") + # But you would also like an "all" search that searches all metanames, including "keywords": + # + # metanames => [qw/swishdefault author comment all/], + # + # Now, the "all" metaname is not a real metaname. It must be expanded into its + # individual metanames using meta_groups: + # + # "meta_groups" maps a fake metaname to a list of real metanames + # + # meta_groups => { + # all => [qw/swishdefault author comment keywords / ], + # }, + # + # swish.cgi will then take a query like + # + # all=(query words) + # + # and create the query + # + # swishdefault=(query words) OR author=(query words) OR comment=(query words) OR keywords=(query words) + # + # This is not ideal, but should work for most cases + # (might fail under windows since the query is passed through the shell). + + # To enable this group add "all" to the list of metanames above + + meta_groups => { + all => [qw/swishdefault swishtitle swishdocpath/], + }, + + # Note that you can use other words than "all". The script just checks if a given metaname is + # listed in "meta_groups" and expands as needed. + + + # "name_labels" is used to map MetaNames and PropertyNames to user-friendly names + # on the CGI form. + + name_labels => { + swishdefault => 'Title & Body', + swishtitle => 'Title', + swishrank => 'Rank', + swishlastmodified => 'Last Modified Date', + swishdocpath => 'Document Path', + swishdocsize => 'Document Size', + all => 'All', # group of metanames + subject => 'Message Subject', # other examples + name => "Poster's Name", + email => "Poster's Email", + sent => 'Message Date', + }, + + + timeout => 10, # limit time used by swish when fetching results - DoS protection. + # does not work under Windows + + max_query_length => 100, # limit length of query string. Swish also has a limit (default is 40) + # You might want to set swish-e's limit higher, and use this to get a + # somewhat more friendly message. + + + + + + max_chars => 500, # Limits the size of the description_prop if it is not highlighted + + # This structure defines term highlighting, and what type of highlighting to use + # If you are using metanames in your searches and they map to properties that you + # will display, you may need to adjust the "meta_to_prop_map". + + highlight => { + + # Pick highlighting module -- you must make sure the module can be found + # The highlighting modules are in the example/modules directory by default + + # Ok speed, but doesn't handle phrases or stopwords + # Deals with stemming, and shows words in context + # Takes into consideration WordCharacters, IgnoreFirstChars and IgnoreLastChars. + #package => 'SWISH::DefaultHighlight', + + # Somewhat slow, but deals with phases, stopwords, and stemming. + # Takes into consideration WordCharacters, IgnoreFirstChars and IgnoreLastChars. + package => 'SWISH::PhraseHighlight', + + # Faster: phrases without regard to wordcharacter settings + # doesn't do context display, so must match in first X words, so may not even highlight + # doesn't handle stemming or stopwords. + #package => 'SWISH::SimpleHighlight', + + show_words => 10, # Number of "swish words" words to show around highlighted word + max_words => 100, # If no words are found to highlighted then show this many words + occurrences => 6, # Limit number of occurrences of highlighted words + highlight_on => '<b>', # HTML highlighting codes + highlight_off => '</b>', + #highlight_on => '<font style="background:#FFFF99">', + #highlight_off => '</font>', + + # This maps (real) search metatags to display properties. + # e.g. if searching in "swishdefault" then highlight in the + # swishtitle and swishdescription properties + # Do not include "fake" metanames defined with meta_groups, just + # list the real metanames used in your index, and the properties they + # relate to. + + meta_to_prop_map => { + swishdefault => [ qw/swishtitle swishdescription/ ], + swishtitle => [ qw/swishtitle/ ], + swishdocpath => [ qw/swishdocpath/ ], + }, + }, + + + + # If you specify more than one index file (as an array reference) you + # can set this allow selection of which indexes to search. + # The default is to search all indexes specified if this is not used. + # When used, the first index is the default index. + + # You need to specify your indexes as an array reference: + #swish_index => [ qw/ index.swish-e index.other index2.other index3.other index4.other / ], + + Xselect_indexes => { + # pick radio_group, popup_menu, or checkbox_group + method => 'checkbox_group', + #method => 'radio_group', + #method => 'popup_menu', + + columns => 3, + # labels must match up one-to-one with elements in "swish_index" + labels => [ 'Main Index', 'Other Index', qw/ two three four/ ], + description => 'Select Site: ', + + # Optional - Set the default index if none is selected + # This needs to be an index file name listed in swish_index + # above, not a label + + default_index => '', + }, + + + # Similar to select_indexes, this adds a metaname search + # based on a metaname. You can use any metaname, and this will + # add an "AND" search to limit results to a subset of your records. + # i.e. it adds something like 'site=(foo or bar or baz)' if foo, bar, and baz were selected. + + # This really just allows you to limit existing searches by a metaname, instead of + # selecting a metaname (with metanames option above). + + # Swish-e's ExtractPath would work well with this. For example, + # to allow limiting searches to specific sections of the apache docs use this + # in your swish-e config file: + # ExtractPath site regex !^/usr/local/apache/htdocs/manual/([^/]+)/.+$!$1! + # ExtractPathDefault site other + # which extracts the segment of the path after /manual/ and indexes that name + # under the metaname "site". Then searches can be limited to files with that + # path (e.g. query would be swishdefault=foo AND site=vhosts to limit searches + # to the virtual host section. + + + Xselect_by_meta => { + #method => 'radio_group', # pick: radio_group, popup_menu, or checkbox_group + method => 'checkbox_group', + #method => 'popup_menu', + columns => 3, + metaname => 'site', # Can't be a metaname used elsewhere! + values => [qw/misc mod vhosts other/], + labels => { + misc => 'General Apache docs', + mod => 'Apache Modules', + vhosts => 'Virtual hosts', + }, + description => 'Limit search to these areas: ', + }, + + + + # The 'template' setting defines what generates the output + # The default is "TemplateDefault" which is reasonably ugly, + # but does not require installation of a separate templating system. + + # Note that some of the above options may not be available + # for templating, as it's up to you to layout the form + # and swish-e results in your template. + + # TemplateDefault is the default + + xtemplate => { + package => 'SWISH::TemplateDefault', + }, + + xtemplate => { + package => 'SWISH::TemplateDumper', + }, + + xtemplate => { + package => 'SWISH::TemplateToolkit', + file => 'swish.tt', + options => { + INCLUDE_PATH => '/usr/local/share/swish-e', + #PRE_PROCESS => 'config', + }, + }, + + xtemplate => { + package => 'SWISH::TemplateHTMLTemplate', + options => { + filename => 'swish.tmpl', + path => '/usr/local/share/swish-e', + die_on_bad_params => 0, + loop_context_vars => 1, + cache => 1, + }, + }, + + + + # The "on_intranet" setting is just a flag that can be used to say you do + # not have an external internet connection. It's here because the default + # page generation includes links to images on swish-e.or and on www.w3.org. + # If this is set to one then those images will not be shown. + # (This only effects the default ouput module SWISH::TemplateDefault) + + on_intranet => 0, + + + + # Here you can hard-code debugging options. The will help you find + # where you made your mistake ;) + # Using all at once will generate a lot of messages to STDERR + # Please see the documentation before using these. + # Typically, you will set these from the command line instead of in the configuration. + + # debug_options => 'basic, command, headers, output, summary, dump', + + + + # This defines the package object for reading CGI parameters + # Defaults to CGI. Might be useful with mod_perl. + # request_package => 'CGI', + # request_package => 'Apache::Request', + + + # use_library => 1, # set true and will use the SWISH::API module + # will cache based on index files when running under mod_perl + + + # Minor adjustment to page display. The page navigation normally looks like: + # Page: 1 5 6 7 8 9 24 + # where the first page and last page are always displayed. These can be disabled by + # by setting to true values ( 1 ) + + no_first_page_navigation => 0, + no_last_page_navigation => 0, + num_pages_to_show => 12, # number of pages to offer + + + + + # Limit to date ranges + + + + # This adds in the date_range limiting options + # You will need the DateRanges.pm module from the author to use that feature + + # Noramlly, you will want to limit by the last modified date, so specify + # "swishlastmodified" as the property_name. If indexing a mail archive, and, for + # example, you store the date (a unix timestamp) as "date" then specify + # "date" as the property_name. + + date_ranges => { + property_name => 'swishlastmodified', # property name to limit by + + # what you specify here depends on the DateRanges.pm module. + time_periods => [ + 'All', + 'Today', + 'Yesterday', + #'Yesterday onward', + 'This Week', + 'Last Week', + 'Last 90 Days', + 'This Month', + 'Last Month', + #'Past', + #'Future', + #'Next 30 Days', + ], + + line_break => 0, + default => 'All', + date_range => 1, + }, + + + # This is suppose to reduce the load on systems if hit with a large number + # of requests. Although this will limit the number of swish-e processes run + # it will not limit the number of CGI requests. I feel like a better solution + # is to use mod_perl (with the SWISH::API module). + # I also think that running /bin/ps for every is not ideal. + + # This only works on unix-based systems when running the swish-e binary. + # It greps /swish-e/ from the output of ps and aborts if the count is < limit_procs + + # Set max number of swish-e binaries and ps command to run + limit_procs => 0, # max number of swish process to run (zero to not limit) + ps_prog => '/bin/ps -Unobody -ocommand', # command to list number of swish binaries + + }; + +} + +#^^^^^^^^^^^^^^^^^^^^^^^^^ end of user config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#======================================================================================== + + + +#================================================================================= +# mod_perl entry point +# +# As an example, you might use a PerlSetVar to point to paths to different +# config files, and then cache the different configurations by path. +# +#================================================================================= + +my %cached_configs; + +sub handler { + my $r = shift; + + if ( my $config_path = $r->dir_config( 'Swish_Conf_File' ) ) { + + # Already cached? + # Note that this is cached for the life of the server -- must restart if want to change config + + if ( $cached_configs{ $config_path } ) { + process_request( $cached_configs{ $config_path } ); + return Apache::Constants::OK(); + } + + + # Else, load config + my $config = default_config(); + $config->{config_file} = $config_path; + + # Merge with disk config file. + $cached_configs{ $config_path } = merge_read_config( $config ); + + process_request( $cached_configs{ $config_path } ); + return Apache::Constants::OK(); + } + + + # Otherwise, use hard-coded config + my $config = default_config(); + + # Merge with disk config file. + $config = merge_read_config( $config ); + + process_request( default_config() ); + + return Apache::Constants::OK(); + +} + + +#============================================================================ +# Read config settings from disk, and merge +# Note, all errors are ignored since by default this script looks for a +# config file. +# +#============================================================================ +sub merge_read_config { + my $config = shift; + + + set_default_debug_flags(); + + set_debug($config); # get from config or from %ENV + + + return $config unless $config->{config_file}; + + my $return = do $config->{config_file}; # load the config file + + unless ( ref $return eq 'HASH' ) { + + # First, let's check for file not found for the default config, which we can ignore + + my $error = $@ || $!; + + if ( $config->{config_file} eq $DEFAULT_CONFIG_FILE && !-e $config->{config_file} ) { + warn "Config file '$config->{config_file}': $!" if $config->{debug}; + return $config; + } + + die "Config file '$config->{config_file}': $error"; + } + + + + if ( $config->{debug} || $return->{debug} ) { + require Data::Dumper; + print STDERR "\n---------- Read config parameters from '$config->{config_file}' ------\n", + Data::Dumper::Dumper($return), + "-------------------------\n"; + } + + set_debug( $return ); + + + # Merge settings + return { %$config, %$return }; +} + +#-------------------------------------------------------------------------------------------------- +sub set_default_debug_flags { + # Debug flags defined + + $SwishSearch::DEBUG_BASIC = 1; # Show command used to run swish + $SwishSearch::DEBUG_COMMAND = 2; # Show command used to run swish + $SwishSearch::DEBUG_HEADERS = 4; # Swish output headers + $SwishSearch::DEBUG_OUTPUT = 8; # Swish output besides headers + $SwishSearch::DEBUG_SUMMARY = 16; # Summary of results parsed + $SwishSearch::DEBUG_RESULTS = 32; # Detail of results parsed + $SwishSearch::DEBUG_DUMP_DATA = 64; # dump data that is sent to templating modules +} + + + + +#--------------------------------------------------------------------------------------------------- +sub set_debug { + my $conf = shift; + + $conf->{debug} = 0; + + my $debug_string = $ENV{SWISH_DEBUG} ||$conf->{debug_options}; + return unless $debug_string; + + + my %debug = ( + basic => [$SwishSearch::DEBUG_BASIC, 'Basic debugging'], + command => [$SwishSearch::DEBUG_COMMAND, 'Show command used to run swish'], + headers => [$SwishSearch::DEBUG_HEADERS, 'Show headers returned from swish'], + output => [$SwishSearch::DEBUG_OUTPUT, 'Show output from swish'], + summary => [$SwishSearch::DEBUG_SUMMARY, 'Show summary of results'], + results => [$SwishSearch::DEBUG_RESULTS, 'Show detail of results'], + dump => [$SwishSearch::DEBUG_DUMP_DATA, 'Show all data available to templates'], + ); + + + $conf->{debug} = 1; + + my @debug_str; + + for ( split /\s*,\s*/, $debug_string ) { + if ( exists $debug{ lc $_ } ) { + push @debug_str, lc $_; + $conf->{debug} |= $debug{ lc $_ }->[0]; + next; + } + + print STDERR "Unknown debug option '$_'. Must be one of:\n", + join( "\n", map { sprintf(' %10s: %10s', $_, $debug{$_}->[1]) } sort { $debug{$a}->[0] <=> $debug{$b}->[0] }keys %debug), + "\n\n"; + exit; + } + + print STDERR "Debug level set to: $conf->{debug} [", join( ', ', @debug_str), "]\n"; +} + + +#============================================================================ +# +# This is the main controller (entry point), where a config hash is passed in. +# +# Loads the request module (e.g. CGI.pm), and the output module +# Also sets up debugging +# +#============================================================================ + +sub process_request { + my $conf = shift; # configuration parameters + + + + # Limit number of requests - questionable value + limit_swish( $conf->{limit_procs}, $conf->{ps_prog} ) + if !$conf->{use_library} + && $conf->{limit_procs} && $conf->{limit_procs} =~ /^\d+$/ + && $conf->{ps_prog}; + + + + # Set default property used or the href link to the document + $conf->{link_property} ||= 'swishdocpath'; + + # Use CGI.pm by default + my $request_package = $conf->{request_package} || 'CGI'; + + load_module( $request_package ); + my $request_object = $request_package->new; + + + # load the templating module + my $template = $conf->{template} || { package => 'SWISH::TemplateDefault' }; + load_module( $template->{package} ); + + + # Allow fixup within the config file + if ( $conf->{request_fixup} && ref $conf->{request_fixup} eq 'CODE' ) { + &{$conf->{request_fixup}}( $request_object, $conf ); + } + + + set_debug_input( $conf, $request_object ) + if $conf->{debug} && !$ENV{GATEWAY_INTERFACE}; + + + # Create search object and build a query based on CGI parameters + my $search = SwishQuery->new( + config => $conf, + request => $request_object, + ); + + + + + # run the query (run if there's a query) + $search->run_query; # currently, results is the just the $search object + + if ( $search->hits ) { + $search->set_navigation; # sets links + } + + + + show_debug_output( $conf, $search ) + if $conf->{debug}; + + + $template->{package}->show_template( $template, $search ); +} + + +# For limiting number of swish-e binaries + +sub limit_swish { + my ( $limit_procs, $ps_prog ) = @_; + + + my $num_procs = scalar grep { /swish-e/ } `$ps_prog`; + return if $num_procs <= $limit_procs; + + warn "swish.cgi - limited due to too many currently running swish-e binaries: $num_procs running is more than $limit_procs\n"; + + ## Abort + print <<EOF; +Status: 503 Too many requests + +<html> +<head><title>Too Many Requests</title></head> +<body> +Too Many Requests -- Try back later +</body> +</html> +EOF + + exit; +} + + + + +#============================================================================ +# +# Loads a perl module -- and shows a pretty web page to say the obvious +# +# +#============================================================================ +sub load_module { + my $package = shift; + $package =~ s[::][/]g; + eval { require "$package.pm" }; + if ( $@ ) { + print <<EOF; +Content-Type: text/html + +<html> +<head><title>Software Error</title></head> +<body><h2>Software Error</h2><p>Please check error log</p></body> +</html> +EOF + + die "$0 $@\n"; + } +} + + + +#================================================================== +# set debugging input +# +#================================================================== + +sub set_debug_input { + my ( $conf, $request_object ) = @_; + + print STDERR 'Enter a query [all]: '; + my $query = <STDIN>; + $query =~ tr/\r//d; + chomp $query; + unless ( $query ) { + print STDERR "Using 'not asdfghjklzxcv' to match all records\n"; + $query = 'not asdfghjklzxcv'; + } + + $request_object->param('query', $query ); + + print STDERR 'Enter max results to display [1]: '; + my $max = <STDIN>; + chomp $max; + $max = 1 unless $max && $max =~/^\d+$/; + + $conf->{page_size} = $max; +} + +#================================================================== +# show debugging output +# +#================================================================== +sub show_debug_output { + my ( $conf, $results ) = @_; + + require Data::Dumper; + + + if ( $results->hits ) { + print STDERR "swish.cgi: returned a page of $results->{navigation}{showing} results of $results->{navigation}{hits} total hits\n"; + } else { + print STDERR "swish.cgi: no results\n"; + } + + if ($conf->{debug} & $SwishSearch::DEBUG_HEADERS ) { + print STDERR "\n------------- Index Headers ------------\n"; + if ( $results->{_headers} ) { + print STDERR Data::Dumper::Dumper( $results->{_headers} ); + } else { + print STDERR "No headers\n"; + } + + print STDERR "--------------------------\n"; + } + + + + if ( $conf->{debug} & $SwishSearch::DEBUG_DUMP_DATA ) { + print STDERR "\n------------- Results structure passed to template ------------\n", + Data::Dumper::Dumper( $results ), + "--------------------------\n"; + + } elsif ( $conf->{debug} & $SwishSearch::DEBUG_SUMMARY ) { + print STDERR "\n------------- Results summary ------------\n"; + if ( $results->{hits} ) { + print STDERR "$_->{swishrank} $_->{swishdocpath}\n" for @{ $results->{_results}}; + + } else { + print STDERR "** NO RESULTS **\n"; + } + + } elsif ( $conf->{debug} & $SwishSearch::DEBUG_RESULTS ) { + print STDERR "\n------------- Results detail ------------\n"; + if ( $results->{hits} ) { + print STDERR Data::Dumper::Dumper( $results->{_results} ); + } else { + print STDERR "** NO RESULTS **\n"; + } + + print STDERR "--------------------------\n"; + } +} + + + + + + + +#================================================================================================== +package SwishQuery; +#================================================================================================== + +use Carp; +# Or use this instead -- PLEASE see perldoc CGI::Carp for details +# <opinion>CGI::Carp doesn't help that much</opinion> +#use CGI::Carp; # qw(fatalsToBrowser); + +use SWISH::ParseQuery; + + + + +#-------------------------------------------------------------------------------- +# new() doesn't do much, just create the object +#-------------------------------------------------------------------------------- +sub new { + my $class = shift; + my %options = @_; + + my $conf = $options{config}; + + croak "Failed to set the swish index files in config setting 'swish_index'" unless $conf->{swish_index}; + croak "Failed to specify 'swish_binary' in configuration" unless $conf->{swish_binary}; + + # initialize the request search hash + my $sh = { + prog => $conf->{swish_binary}, + config => $conf, + q => $options{request}, + hits => 0, + MOD_PERL => $ENV{MOD_PERL}, + }; + + my $self = bless $sh, $class; + + + # load highlight module, if requsted + + if ( my $highlight = $self->config('highlight') ) { + $highlight->{package} ||= 'SWISH::DefaultHighlight'; + SwishSearch::load_module( $highlight->{package} ); + } + + + # Fetch the swish-e query from the CGI parameters + $self->set_query; + + return $self; +} + + +sub hits { shift->{hits} } + +sub config { + my ($self, $setting, $value ) = @_; + + confess "Failed to pass 'config' a setting" unless $setting; + + my $cur = $self->{config}{$setting} if exists $self->{config}{$setting}; + + $self->{config}{$setting} = $value if $value; + + return $cur; +} + +# Returns false if all of @values are not valid options - for checking +# $config is what $self->config returns + +sub is_valid_config_option { + my ( $self, $config, $err_msg, @values ) = @_; + + unless ( $config ) { + $self->errstr( "No config option set: $err_msg" ); + return; + } + + # Allow multiple values. + my @options = ref $config eq 'ARRAY' ? @$config : ( $config ); + + my %lookup = map { $_ => 1 } @options; + + for ( @values ) { + unless ( exists $lookup{ $_ } ) { + $self->errstr( $err_msg ); + return; + } + } + + return 1; +} + + +sub header { + my $self = shift; + return unless ref $self->{_headers} eq 'HASH'; + + return $self->{_headers}{$_[0]} || ''; +} + + +# return a ref to an array +sub results { + my $self = shift; + return $self->{_results} || undef; +} + +sub navigation { + my $self = shift; + return unless ref $self->{navigation} eq 'HASH'; + + return exists $self->{navigation}{$_[0]} ? $self->{navigation}{$_[0]} : ''; +} + +sub CGI { $_[0]->{q} }; + + + + +sub swish_command { + + my ($self, $param_name, $value ) = @_; + + return $self->{swish_command} || {} unless $param_name; + return $self->{swish_command}{$param_name} || '' unless $value; + + $self->{swish_command}{$param_name} = $value; +} + +# For use when forking + +sub swish_command_array { + + my ($self ) = @_; + + my @params; + my $swish_command = $self->swish_command; + + for ( keys %$swish_command ) { + + my $value = $swish_command->{$_}; + + if ( /^-/ ) { + push @params, $_; + push @params, ref $value eq 'ARRAY' ? @$value : $value; + next; + } + + # special cases + if ( $_ eq 'limits' ) { + push @params, '-L', $value->{prop}, $value->{low}, $value->{high}; + next; + } + + die "Unknown swish_command '$_' = '$value'"; + } + + return @params; + +} + + + +sub errstr { + my ($self, $value ) = @_; + + + $self->{_errstr} = $value if $value; + + return $self->{_errstr} || ''; +} + + +#============================================================================== +# Set query from the CGI parameters +#------------------------------------------------------------------------------ + +sub set_query { + my $self = shift; + my $q = $self->{q}; + + # Sets the query string, and any -L limits. + return unless $self->build_query; + + # Set the starting position (which is offset by one) + + my $start = $q->param('start') || 0; + $start = 0 unless $start =~ /^\d+$/ && $start >= 0; + + $self->swish_command( '-b', $start+1 ); + + + + # Set the max hits + + my $page_size = $self->config('page_size') || 15; + $self->swish_command( '-m', $page_size ); + + + return unless $self->set_index_file; + + + # Set the sort option, if any + return unless $self->set_sort_order; + + + return 1; + +} + + + + + +#============================================ +# This returns "$self" just in case we want to seperate out into two objects later + + +sub run_query { + + my $self = shift; + + my $q = $self->{q}; + my $conf = $self->{config}; + + return $self unless $self->swish_command('-w'); + + my $time_out_str = 'Timed out'; + + + my $timeout = $self->config('timeout') || 0; + + eval { + local $SIG{ALRM} = sub { + kill 'KILL', $self->{pid} if $self->{pid}; + die $time_out_str . "\n"; + }; + + alarm $timeout if $timeout && $^O !~ /Win32/i; + $self->run_swish; + alarm 0 unless $^O =~ /Win32/i; + + # catch zombies + waitpid $self->{pid}, 0 if $self->{pid}; # for IPC::Open2 + }; + + if ( $@ ) { + warn "$0 aborted: $@"; # if $conf->{debug}; + + $self->errstr( + $@ =~ /$time_out_str/ + ? "Search timed out after $timeout seconds." + : "Service currently unavailable" + ); + return $self; + } +} + + +# Build href for repeated search via GET (forward, backward links) + +sub set_navigation { + my $self = shift; + my $q = $self->{q}; + + + # Single string + + # default fields + my @std_fields = qw/query metaname sort reverse/; + + # Extra fields could be added in the config file + if ( my $extra = $self->config('extra_fields') ) { + push @std_fields, @$extra; + } + + my @query_string = + map { "$_=" . $q->escape( $q->param($_) ) } + grep { $q->param($_) } @std_fields; + + + + # Perhaps arrays + + for my $p ( qw/si sbm/ ) { + my @settings = $q->param($p); + next unless @settings; + push @query_string, "$p=" . $q->escape( $_ ) for @settings; + } + + + + + if ( $self->config('date_ranges' ) ) { + my $dr = SWISH::DateRanges::GetDateRangeArgs( $q ); + push @query_string, $dr, if $dr; + } + + + $self->{query_href} = $q->script_name . '?' . join '&', @query_string; + $self->{my_url} = $q->script_name; + + + my $hits = $self->hits; + + my $start = $self->swish_command('-b') || 1; + $start--; + + $self->{navigation} = { + showing => $hits, + from => $start + 1, + to => $start + $hits, + hits => $self->header('number of hits') || 0, + run_time => $self->header('run time') || 'unknown', + search_time => $self->header('search time') || 'unknown', + }; + + + + $self->set_page ( $self->swish_command( '-m' ) ); + + return $self; + +} + + +#============================================================ +# Build a query string from swish +# Just builds the -w string +#------------------------------------------------------------ + +sub build_query { + my $self = shift; + + my $q = $self->{q}; + + + # set up the query string to pass to swish. + my $query = $q->param('query') || ''; + + for ( $query ) { # trim the query string + s/\s+$//; + s/^\s+//; + } + + $self->{query_simple} = $query; # without metaname + $q->param('query', $query ); # clean up the query, if needed. + + + # Read in the date limits, if any. This can create a new query, which is why it is here + return unless $self->get_date_limits( \$query ); + + + unless ( $query ) { + $self->errstr('Please enter a query string') if $q->param('submit'); + return; + } + + + if ( length( $query ) > $self->{config}{max_query_length} ) { + $self->errstr('Please enter a shorter query'); + return; + } + + + + # Adjust the query string for metaname search + # *Everything* is a metaname search + # Might also like to allow searching more than one metaname at the same time + + my $metaname = $q->param('metaname') || 'swishdefault'; + + return unless $self->is_valid_config_option( $self->config('metanames') || 'swishdefault', 'Bad MetaName provided', $metaname ); + + # save the metaname so we know what field to highlight + # Note that this might be a fake metaname + $self->{metaname} = $metaname; + + + # prepend metaname to query + + # expand query when using meta_groups + + my $meta_groups = $self->config('meta_groups'); + + if ( $meta_groups && $meta_groups->{$metaname} ) { + $query = join ' OR ', map { "$_=($query)" } @{$meta_groups->{$metaname}}; + + # This is used to create a fake entry in the parsed query so highlighting + # can find the query words + $self->{real_metaname} = $meta_groups->{$metaname}[0]; + } else { + $query = $metaname . "=($query)"; + } + + + + + ## Look for a "limit" metaname -- perhaps used with ExtractPath + # Here we don't worry about user supplied data + + my $limits = $self->config('select_by_meta'); + my @limits = $q->param('sbm'); # Select By Metaname + + + # Note that this could be messed up by ending the query in a NOT or OR + # Should look into doing: + # $query = "( $query ) AND " . $limits->{metaname} . '=(' . join( ' OR ', @limits ) . ')'; + + if ( @limits && ref $limits eq 'HASH' && $limits->{metaname} ) { + $query .= ' and ' . $limits->{metaname} . '=(' . join( ' or ', @limits ) . ')'; + } + + + $self->swish_command('-w', $query ); + + return 1; +} + +#======================================================================== +# Get the index files from the form, or from the config settings +# Uses index numbers to hide path names +#------------------------------------------------------------------------ + +sub set_index_file { + my $self = shift; + + my $q = $self->CGI; + + # Set the index file - first check for options + + my $si = $self->config('select_indexes'); + if ( $si && ref $self->config('swish_index') eq 'ARRAY' ) { + + my @choices = $q->param('si'); + + if ( !@choices ) { + + if ( $si->{default_index} ) { + $self->swish_command('-f', $si->{'default_index'}); + return 1; + + } else { + $self->errstr('Please select a source to search'); + return; + } + } + + my @indexes = @{$self->config('swish_index')}; + + + my @selected_indexes = grep {/^\d+$/ && $_ >= 0 && $_ < @indexes } @choices; + + if ( !@selected_indexes ) { + $self->errstr('Invalid source selected'); + return $self; + } + my %dups; + my @idx = grep { !$dups{$_}++ } map { ref($_) ? @$_ : $_ } @indexes[ @selected_indexes ]; + $self->swish_command( '-f', \@idx ); + + + } else { + $self->swish_command( '-f', $self->config('swish_index') ); + } + + return 1; +} + +#================================================================================ +# Parse out the date limits from the form or from GET request +# +#--------------------------------------------------------------------------------- + +sub get_date_limits { + + my ( $self, $query_ref ) = @_; # reference to query since may be modified + + my $conf = $self->{config}; + + # Are date ranges enabled? + return 1 unless $conf->{date_ranges}; + + + eval { require SWISH::DateRanges }; + if ( $@ ) { + print STDERR "\n------ Can't use DateRanges feature ------------\n", + "\nScript will run, but you can't use the date range feature\n", + $@, + "\n--------------\n" if $conf->{debug}; + + delete $conf->{date_ranges}; + return 1; + } + + my $q = $self->{q}; + + my %limits; + + unless ( SWISH::DateRanges::DateRangeParse( $q, \%limits ) ) { + $self->errstr( $limits{dr_error} || 'Bad date range selection' ); + return; + } + + # Store the values for later (for display on templates) + + $self->{DateRanges_time_low} = $limits{dr_time_low}; + $self->{DateRanges_time_high} = $limits{dr_time_high}; + + + # Allow searchs just be date if not "All dates" search + # $$$ should place some limits here, and provide a switch to disable + # as it can bring up a lot of results. + + $$query_ref ||= 'not skaiqwdsikdeekk' + if $limits{dr_time_high}; + + + # Now specify limits, if a range was specified + + my $limit_prop = $conf->{date_ranges}{property_name} || 'swishlastmodified'; + + + if ( $limits{dr_time_low} && $limits{dr_time_high} ) { + + my %limits = ( + prop => $limit_prop, + low => $limits{dr_time_low}, + high => $limits{dr_time_high}, + ); + + $self->swish_command( 'limits', \%limits ); + } + + return 1; +} + + + +#================================================================ +# Set the sort order +# Just builds the -s string +#---------------------------------------------------------------- + +sub set_sort_order { + my $self = shift; + + my $q = $self->{q}; + + my $sorts_array = $self->config('sorts'); + my $sortby = $q->param('sort') || ''; + + return 1 unless $sorts_array && $sortby; + return unless $self->is_valid_config_option( $sorts_array, 'Invalid Sort Option Selected', $sortby ); + + + my $conf = $self->{config}; + + + # Now set sort option - if a valid option submitted (or you could let swish-e return the error). + my $direction = $sortby eq 'swishrank' + ? $q->param('reverse') ? 'asc' : 'desc' + : $q->param('reverse') ? 'desc' : 'asc'; + + my @sort_params = ( $sortby, $direction ); + + if ( $conf->{secondary_sort} ) { + my @secondary = ref $conf->{secondary_sort} ? @{ $conf->{secondary_sort} } : $conf->{secondary_sort}; + + push @sort_params, @secondary + if $sortby ne $secondary[0]; + } + + + $self->swish_command( '-s', \@sort_params ); + + + return 1; +} + + + +#======================================================== +# Sets prev and next page links. +# Feel free to clean this code up! +# +# Pass: +# $results - reference to a hash (for access to the headers returned by swish) +# $q - CGI object +# +# Returns: +# Sets entries in the $results hash +# + +sub set_page { + + my ( $self, $Page_Size ) = @_; + + my $q = $self->{q}; + my $config = $self->{config}; + + my $navigation = $self->{navigation}; + + + my $start = $navigation->{from} - 1; # Current starting record index + + + # Set start number for "prev page" and the number of hits on the prev page + + my $prev = $start - $Page_Size; + $prev = 0 if $prev < 0; + + if ( $prev < $start ) { + $navigation->{prev} = $prev; + $navigation->{prev_count} = $start - $prev; + } + + + my $last = $navigation->{hits} - 1; + + + # Set start number for "next page" and number of hits on the next page + + my $next = $start + $Page_Size; + $next = $last if $next > $last; + my $cur_end = $start + $self->{hits} - 1; + if ( $next > $cur_end ) { + $navigation->{next} = $next; + $navigation->{next_count} = $next + $Page_Size > $last + ? $last - $next + 1 + : $Page_Size; + } + + + # Calculate pages ( is this -1 correct here? ) + # Build an array of a range of page numbers. + + my $total_pages = int (($navigation->{hits} -1) / $Page_Size); # total pages for all results. + + if ( $total_pages ) { + + my @pages = 0..$total_pages; + + my $show_pages = $config->{num_pages_to_show} || 12; + + # To make the number always work + $show_pages-- unless $config->{no_first_page_navigation}; + $show_pages-- unless $config->{no_last_page_navigation}; + + + # If too many pages then limit + + if ( @pages > $show_pages ) { + + my $start_page = int ( $start / $Page_Size - $show_pages/2) ; + $start_page = 0 if $start_page < 0; + + # if close to the end then move of center + $start_page = $total_pages - $show_pages + if $start_page + $show_pages - 1 > $total_pages; + + @pages = $start_page..$start_page + $show_pages - 1; + + + # Add first and last pages, unless config says otherwise + unshift @pages, 0 + unless $start_page == 0 || $config->{no_first_page_navigation}; + + push @pages, $total_pages + unless $start_page + $show_pages - 1 == $total_pages || $config->{no_last_page_navigation} + } + + + # Build "canned" pages HTML + + $navigation->{pages} = + join ' ', map { + my $page_start = $_ * $Page_Size; + my $page = $_ + 1; + $page_start == $start + ? $page + : qq[<a href="$self->{query_href}&start=$page_start">$page</a>]; + } @pages; + + + # Build just the raw data - an array of hashes + # for custom page display with templates + + $navigation->{page_array} = [ + map { + { + page_number => $_ + 1, # page number to display + page_start => $_ * $Page_Size, + cur_page => $_ * $Page_Size == $start, # flag + } + } @pages + ]; + + + } + +} + +#================================================== +# Format and return the date range options in HTML +# +#-------------------------------------------------- +sub get_date_ranges { + + my $self = shift; + + my $q = $self->{q}; + my $conf = $self->{config}; + + return '' unless $conf->{date_ranges}; + + # pass parametes, and a hash to store the returned values. + + my %fields; + + SWISH::DateRanges::DateRangeForm( $q, $conf->{date_ranges}, \%fields ); + + + # Set the layout: + + my $string = '<br>Limit to: ' + . ( $fields{buttons} ? "$fields{buttons}<br>" : '' ) + . ( $fields{date_range_button} || '' ) + . ( $fields{date_range_low} + ? " $fields{date_range_low} through $fields{date_range_high}" + : '' ); + + return $string; +} + + + +#============================================ +# Run swish-e and gathers headers and results +# Currently requires fork() to run. +# +# Pass: +# $sh - an array with search parameters +# +# Returns: +# a reference to a hash that contains the headers and results +# or possibly a scalar with an error message. +# + + +sub run_swish { + + + my $self = shift; + + my $results = $self->{results}; + my $conf = $self->{config}; + my $q = $self->{q}; + + + my @properties; + my %seen; + + # Gather up the properties we need in results + + for ( qw/ title_property description_prop display_props link_property/ ) { + push @properties, ref $conf->{$_} ? @{$conf->{$_}} : $conf->{$_} + if $conf->{$_} && !$seen{$_}++; + } + + # Add in the default props that should be seen. + for ( qw/swishrank/ ) { + push @properties, $_ unless $seen{$_}; + } + + + # add in the default prop - a number must be first (this might be a duplicate in -x, oh well) + unshift @properties, 'swishreccount'; + + + $self->swish_command( -x => join( '\t', map { "<$_>" } @properties ) . '\n' ); + $self->swish_command( -H => 9 ); + + + if ( $conf->{debug} & $SwishSearch::DEBUG_COMMAND ) { + require Data::Dumper; + print STDERR "---- Swish parameters ----\n"; + print STDERR Data::Dumper::Dumper($self->swish_command); + print STDERR "\n-----------------------------------------------\n"; + } + + + + + + + # Use the swish-e library? + + return $self->run_library( @properties ) + if $self->config('use_library'); + + + my $fh = $^O =~ /Win32/i + ? windows_fork( $conf, $self ) + : real_fork( $conf, $self ); + + + # read in from child + + my %stops_removed; + + my $unknown_output = ''; + + + while (<$fh>) { + + chomp; + + print STDERR "$_\n" if $conf->{debug} & $SwishSearch::DEBUG_OUTPUT; + + + tr/\r//d; + + # This will not work correctly with multiple indexes when different values are used. + if ( /^# ([^:]+):\s+(.+)$/ ) { + + my $h = lc $1; + my $value = $2; + $self->{_headers}{$h} = $value; + + push @{$self->{_headers}{'removed stopwords'}}, $value if $h eq 'removed stopword' && !$stops_removed{$value}++; + + next; + } + + + # return swish errors as a mesage to the script + $self->errstr($1), return if /^err:\s*(.+)/; + + # Or, if you want to log the errors and just say "Service Unavailable" use this: + #die "$1\n" if /^err:\s*(.+)/; + + + # Found a result + if ( /^\d/ ) { + + my %h; + @h{@properties} = split /\t/; + $self->add_result_to_list( \%h ); + next; + + } elsif ( /^\.$/ ) { + last; + + } else { + next if /^#/; + } + + $unknown_output .= "'$_'\n"; + + } + + die "Swish returned unknown output: $unknown_output\n" if $unknown_output; + + $self->{hits} = $self->{_results} ? @{$self->{_results}} : 0; + +} + +# Filters in place +sub html_escape { + $_[0] = '' unless defined $_[0]; + for ($_[0]) { + s/&/&/g; + s/</</g; + s/>/>/g; + s/"/"/g; + } +} + + +#============================================================================ +# Adds a result to the result list and highlight the search words + +# This is a common source of bugs! The problem is that highlighting is done in this code. +# This is good, especially for the description because it is trimmed down as processing each +# result. Otherwise, would use a lot of memory. It's bad because the highlighting is +# creating html which really should be done in the template output code. +# What that means is the properties that are "searched" are run through the highlighting +# code (and thus HTML escaped) but other properties are not. +# If highlighting (and trimming) is to be kept here then either we need to +# html escape all display properties, or flag which ones are escaped. +# Since we know the ultimate output is HTML, the current method will be to escape here. + + +sub add_result_to_list { + my ( $self, $props ) = @_; + + + # Push the result onto the list + + push @{$self->{_results}}, $props; + + + # We need to save the text of the link prop (almost always swishdocpath) + # because all properties are escaped. + + my $link_property = $self->config('link_property') || 'swishdocpath'; + my $link_href = ( $self->config('prepend_path') || '' ) + . $props->{$link_property}; + + # Replace spaces ***argh this is the wrong place to do this! *** + # This doesn't really work -- file names could still have chars that need to be escaped. + $link_href =~ s/\s/%20/g; + + + # Returns hash of the properties that were highlighted + my $highlighted = $self->highlight_props( $props ) || {}; + + my $trim_prop = $self->config('description_prop') || ''; + $props->{$trim_prop} ||= '' + if $trim_prop; + + # HTML escape all properties that were not highlighted + for my $prop (keys %$props) { + next if $highlighted->{$prop}; + + # not highlighted, so escape + html_escape( $props->{$prop} ); + + if ( $prop eq $trim_prop ) { + my $max = $self->config('max_chars') || 500; + + $props->{$trim_prop} = substr( $props->{$trim_prop}, 0, $max) . ' <b>...</b>' + if length $props->{$trim_prop} > $max; + } + } + + $props->{swishdocpath_href} = $link_href; # backwards compatible + $props->{link_property} = $link_href; # backwards compatible + + +} + + +#======================================================================================= + + +# This will call the highlighting module as needed. +# The highlighting module MUST html escape the property. +# returns a hash of properties highlighted + + +sub highlight_props { + my ( $self, $props ) = @_; + + # make sure we have the config we need. + my $highlight_settings = $self->config('highlight') || return; + my $meta_to_prop = $highlight_settings->{meta_to_prop_map} || return; + + + + # Initialize highlight module ( could probably do this once per instance ) + # pass in the config highlight settings, and the swish-e headers as a hash. + + $self->{_highlight_object} ||= $highlight_settings->{package}->new( $highlight_settings, $self->{_headers} ); + my $highlight_object = $self->{_highlight_object} || return; + + + + + # parse the query on first result + + my $parsed_words = $self->header( 'parsed words' ) || die "Failed to find 'Parsed Words' in swish headers"; + + $self->{parsed_query} ||= ( parse_query( $parsed_words ) || return ); + + + my %highlighted; # track which were highlighted to detect if need to trim the description + + + # this is probably backwards -- might be better to loop through the %$props + + while ( my( $meta, $phrases ) = each %{$self->{parsed_query}} ) { + next unless $meta_to_prop->{$meta}; # is it a prop defined to highlight? + + # loop through the properties for the metaname + + for ( @{ $meta_to_prop->{$meta} } ) { + if ( $props->{$_} ) { + $highlighted{$_}++ if $highlight_object->highlight( \$props->{$_}, $phrases, $_ ); + } + } + } + + return \%highlighted; +} + + + + + + +#================================================================== +# Run swish-e by using the SWISH::API module +# + +my %cached_handles; + +sub run_library { + my ( $self, @props ) = @_; + + SwishSearch::load_module( 'SWISH::API' ); + + my $indexes = $self->swish_command('-f'); + + + print STDERR "swish.cgi: running library thus no 'output' available -- try 'summary'\n" + if ($self->{config}{debug} || 0) & $SwishSearch::DEBUG_OUTPUT; + + eval { require Time::HiRes }; + my $start_time = [Time::HiRes::gettimeofday()] unless $@; + + + + unless ( $cached_handles{$indexes} ) { + + my $swish = SWISH::API->new( ref $indexes ? join(' ', @$indexes) : $indexes ); + if ( $swish->Error ) { + $self->errstr( join ': ', $swish->ErrorString, $swish->LastErrorMsg ); + delete $cached_handles{$indexes} if $swish->CriticalError; + return; + } + + # read headers (currently only reads one set) + my %headers; + my $index = ($swish->IndexNames)[0]; + + for ( $swish->HeaderNames ) { + my @value = $swish->HeaderValue( $index, $_ ); + my $x = @value; + next unless @value; + $headers{ lc($_) } = join ' ', @value; + } + + + $cached_handles{$indexes} = { + swish => $swish, + headers => \%headers, + }; + } + + my $swish = $cached_handles{$indexes}{swish}; + + my $headers = $cached_handles{$indexes}{headers}; + + $self->{_headers} = $headers; + + + my $search = $swish->New_Search_Object; # probably could cache this, too + + if ( my $limits = $self->swish_command( 'limits' ) ) { + $search->SetSearchLimit( @{$limits}{ qw/prop low high/ } ); + } + + if ( $swish->Error ) { + $self->errstr( join ': ', $swish->ErrorString, $swish->LastErrorMsg ); + delete $cached_handles{$indexes} if $swish->CriticalError; + return; + } + + + if ( my $sort = $self->swish_command('-s') ) { + $search->SetSort( ref $sort ? join( ' ', @$sort) : $sort ); + } + + my $search_time = [Time::HiRes::gettimeofday()] if $start_time; + + my $results = $search->Execute( $self->swish_command('-w') ); + + + $headers->{'search time'} = sprintf('%0.3f seconds', Time::HiRes::tv_interval( $search_time, [Time::HiRes::gettimeofday()] )) + if $start_time; + + + if ( $swish->Error ) { + $self->errstr( join ': ', $swish->ErrorString, $swish->LastErrorMsg ); + delete $cached_handles{$indexes} if $swish->CriticalError; + return; + } + + # Add in results-related headers + $headers->{'parsed words'} = join ' ', $results->ParsedWords( ($swish->IndexNames)[0] ); + + if ( ! $results->Hits ) { + $self->errstr('no results'); + return; + } + $headers->{'number of hits'} = $results->Hits; + + # Get stopwords removed from each index (really need to track headers per index to be correct) + + for my $index ( $swish->IndexNames ) { + my @stopwords = $results->RemovedStopwords( $index ); + + push @{$headers->{'removed stopwords'}}, @stopwords + if @stopwords; + } + + + + # Now fetch properties + + $results->SeekResult( $self->swish_command( '-b' ) - 1 ); + + my $page_size = $self->swish_command( '-m' ); + + if ( $swish->Error ) { + $self->errstr( join ': ', $swish->ErrorString, $swish->LastErrorMsg ); + delete $cached_handles{$indexes} if $swish->CriticalError; + return; + } + + my $hit_count; + + while ( my $result = $results->NextResult ) { + my %props; + + + for my $prop ( @props ) { + # Note, we use ResultPropertyStr instead since this is a general purpose + # script (it converts dates to a string, for example). + # $result->Property is a faster method and does not convert dates and numbers to strings. + #my $value = $result->Property( $prop ); + my $value = $result->ResultPropertyStr( $prop ); + next unless $value; # ?? + + $props{$prop} = $value; + } + + $hit_count++; + + $self->add_result_to_list( \%props ); + + last unless --$page_size; + } + + + $headers->{'run time'} = sprintf('%0.3f seconds', Time::HiRes::tv_interval( $start_time, [Time::HiRes::gettimeofday()] )) + if $start_time; + + + $self->{hits} = $hit_count; + + +} + + + +#================================================================== +# Run swish-e by forking +# + +use Symbol; + +sub real_fork { + my ( $conf, $self ) = @_; + + + # Run swish + my $fh = gensym; + my $pid = open( $fh, '-|' ); + + die "Failed to fork: $!\n" unless defined $pid; + + + if ( !$pid ) { # in child + unless ( exec $self->{prog}, $self->swish_command_array ) { + warn "Child process Failed to exec '$self->{prog}' Error: $!"; + print "Failed to exec Swish"; # send this message to parent. + exit; + } + } else { + $self->{pid} = $pid; + } + + return $fh; +} + + +#===================================================================================== +# Windows work around +# from perldoc perlfok -- na, that doesn't work. Try IPC::Open2 +# +sub windows_fork { + my ( $conf, $self ) = @_; + + + require IPC::Open2; + my ( $rdrfh, $wtrfh ); + + # Ok, I'll say it. Windows sucks. + my @command = map { s/"/\\"/g; qq["$_"] } $self->{prog}, $self->swish_command_array; + my $pid = IPC::Open2::open2($rdrfh, $wtrfh, @command ); + + + $self->{pid} = $pid; + + return $rdrfh; +} + + + +1; + + +__END__ + +=head1 NAME + +swish.cgi -- Example Perl script for searching with the SWISH-E search engine. + +=head1 DESCRIPTION + +C<swish.cgi> is a CGI script for searching with the SWISH-E search +engine version 2.1-dev and above. It returns results a page at a +time, with matching words from the source document highlighted, +showing a few words of content on either side of the highlighted word. + +The script is highly configurable. Features include searching +multiple (or selectable) indexes, limiting searches to a subset of documents, +sorting by a number of different properties, and limiting results to +a date range. + +On unix type systems the swish.cgi script is installed in the directory +$prefix/lib/swish-e, which is typically /usr/local/lib/swish-e. This +can be overridden by the configure options --prefix or --libexecdir. + +The standard configuration (i.e. not using a config file) should work +with most swish index files. Customization of the parameters will be +needed if you are indexing special meta data and want to search and/or +display the meta data. The configuration can be modified by editing +this script directly, or by using a configuration file (.swishcgi.conf +by default). The script's configuration file is described below. + +You are strongly encouraged to get the default configuration working +before making changes. Most problems using this script are the result +of configuration modifications. + +The script is modular in design. Both the highlighting code and +output generation is handled by modules, which are included in the +F<example/modules> distribution directory and installed in the +$libexecdir/perl directory. +This allows for easy customization of +the output without changing the main CGI script. + +Included with the Swish-e distribution is a module to generate standard HTML +output. There's also modules and template examples to use with the popular +Perl templating systems HTML::Template and Template-Toolkit. This is very +useful if your site already uses one of these templating systems The +HTML::Template and Template-Toolkit packages are not distributed with +Swish-e. They are available from the CPAN (http://search.cpan.org). + +This scipt can also run basically unmodified as a mod_perl handler, +providing much better performance than running as a CGI script. Usage +under mod_perl is described below. + +Please read the rest of the documentation. There's a C<DEBUGGING> +section, and a C<FAQ> section. + +This script should work on Windows, but security may be an issue. + +=head1 REQUIREMENTS + +A reasonably current version of Perl. 5.00503 +or above is recommended (anything older will not be supported). + +The Date::Calc module is required to use the date range feature of the script. +The Date::Calc module is also available from CPAN. + + +=head1 INSTALLATION + +Here's an example installation session under Linux. It should be +similar for other operating systems. + +For the sake of simplicity in this installation example all files are +placed in web server space, including files such as swish-e index and +configuration files that would normally not be made available via the +web server. Access to these files should be limited once the script +is running. Either move the files to other locations (and adjust the +script's configuration) or use features of the web server to limit +access (such as with F<.htaccess>). + +Please get a simple installation working before modifying the +configuration file. Most problems reported for using this script have +been due to improper configuration. + +The script's default settings are setup for initial testing. By default the settings expect +to find most files and the swish-e binary in the same directory as the script. + +For I<security> reasons, once you have tested the script you will want to change settings to +limit access to some of these files by the web server (either by moving them out of web +space, or using access control such as F<.htaccess>). An example of using F<.htaccess> on +Apache is given below. + +It's expected that swish-e has already been unpacked and the swish-e binary has be compiled +from source and "make install" has been run. If swish-e was installed from a vendor package +(such as from a RPM or Debian package) see that pakage's documentation for where files are +installed. + +Example Installation: + +=over 4 + +=item 1 Symlink or copy the swish.cgi. + +Symlink (or copy if your platform or webserver does not allow symlinks) +the swish.cgi script from the installation directory to a local directory. +Typically, this would be the cgi-bin directory or a location where CGI script +are located. In this example a new directory is created and the script is +symlinked. + + ~$ mkdir swishdir + ~$ cd swishdir + ~/swishdir$ ln -s /usr/local/lib/swish-e/swish.cgi + +The installation directory is set at configure time with the --prefix or +--libexecdir options, but by default is in /usr/local/lib/swish-e. + +=item 2 Create an index + +Use an editor and create a simple configuration file for indexing your +files. In this example the Apache documentation is indexed. Last we +run a simple query to test that the index works correctly. + + ~/swishdir$ cat swish.conf + IndexDir /usr/local/apache/htdocs + IndexOnly .html .htm + DefaultContents HTML* + StoreDescription HTML* <body> 200000 + MetaNames swishdocpath swishtitle + ReplaceRules remove /usr/local/apache/ + +If you do not have the Apache docs installed then pick another directory to index +such as /usr/share/doc. + +Create the index. + + ~/swishdir$ swish-e -c swish.conf + Indexing Data Source: "File-System" + Indexing "/usr/local/apache/htdocs" + Removing very common words... + no words removed. + Writing main index... + Sorting words ... + Sorting 7005 words alphabetically + Writing header ... + Writing index entries ... + Writing word text: Complete + Writing word hash: Complete + Writing word data: Complete + 7005 unique words indexed. + 5 properties sorted. + 124 files indexed. 1485844 total bytes. 171704 total words. + Elapsed time: 00:00:02 CPU time: 00:00:02 + Indexing done! + +Now, verify that the index can be searched: + + ~/swishdir$ swish-e -w install -m 1 + # SWISH format: 2.1-dev-25 + # Search words: install + # Number of hits: 14 + # Search time: 0.001 seconds + # Run time: 0.040 seconds + 1000 htdocs/manual/dso.html "Apache 1.3 Dynamic Shared Object (DSO) support" 17341 + . + +Let's see what files we have in our directory now: + + ~/swishdir$ ls -1 + index.swish-e + index.swish-e.prop + swish.cgi + swish.conf + +=item 3 Test the CGI script + +This is a simple step, but often overlooked. You should test from the command line instead of jumping +ahead and testing with the web server. See the C<DEBUGGING> section below for more information. + + ~/swishdir$ ./swish.cgi | head + Content-Type: text/html; charset=ISO-8859-1 + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + <html> + <head> + <title> + Search our site + </title> + </head> + <body> + +The above shows that the script can be run directly, and generates a correct HTTP header and HTML. + +If you run the above and see something like this: + + ~/swishdir >./swish.cgi + bash: ./swish.cgi: No such file or directory + +then you probably need to edit the script to point to the correct location of your perl program. +Here's one way to find out where perl is located (again, on unix): + + ~/swishdir$ which perl + /usr/local/bin/perl + + ~/swishdir$ /usr/local/bin/perl -v + This is perl, v5.6.0 built for i586-linux + ... + +Good! We are using a reasonably current version of perl. + +Now that we know perl is at F</usr/local/bin/perl> we can adjust the "shebang" line +in the perl script (e.g. the first line of the script): + + ~/swishdir$ pico swish.cgi + (edit the #! line) + ~/swishdir$ head -1 swish.cgi + #!/usr/local/bin/perl -w + +=item 4 Test with the web server + +How you do this is completely dependent on your web server, and you may need to talk to your web +server admin to get this working. Often files with the .cgi extension are automatically set up to +run as CGI scripts, but not always. In other words, this step is really up to you to figure out! + +This example shows creating a I<symlink> from the web server space to the directory used above. +This will only work if the web server is configured to follow symbolic links (the default for Apache). + +This operation requires root access: + + ~/swishdir$ su -c "ln -s $HOME/swishdir /usr/local/apache/htdocs/swishdir" + Password: ********* + +If your account is on an ISP and your web directory is F<~/public_html> the you might just move the entire +directory: + + mv ~/swishdir ~/public_html + +Now, let's make a real HTTP request: + + ~/swishdir$ GET http://localhost/swishdir/swish.cgi | head -3 + #!/usr/local/bin/perl -w + package SwishSearch; + use strict; + +Oh, darn. It looks like Apache is not running the script and instead returning it as a +static page. Apache needs to be told that swish.cgi is a CGI script. + +F<.htaccess> comes to the rescue: + + ~/swishdir$ cat .htaccess + + # Deny everything by default + Deny From All + + # But allow just CGI script + <files swish.cgi> + Options ExecCGI + Allow From All + SetHandler cgi-script + </files> + +That "Deny From All" prevents access to all files (such as config and index files), and only access is allowed to the +F<swish.cgi> script. + +Let's try the request one more time: + + ~/swishdir >GET http://localhost/swishdir/swish.cgi | head + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + <html> + <head> + <title> + Search our site + </title> + </head> + <body> + <h2> + <a href="http://swish-e.org"> + +That looks better! Now use your web browser to test. + +Now, you may note that the links are not valid on the search results page. The swish config file +contained the line: + + ReplaceRules remove /usr/local/apache/ + +To make those links works (and assuming your web server will follow symbolic links): + + ~/swishtest$ ln -s /usr/local/apache/htdocs + + +BTW - "GET" used above is a program included with Perl's LWP library. If you do no have this you might +try something like: + + wget -O - http://localhost/swishdir/swish.cgi | head + +and if nothing else, you can always telnet to the web server and make a basic request. + + ~/swishtest$ telnet localhost 80 + Trying 127.0.0.1... + Connected to localhost. + Escape character is '^]'. + GET /swishtest/swish.cgi http/1.0 + + HTTP/1.1 200 OK + Date: Wed, 13 Feb 2002 20:14:31 GMT + Server: Apache/1.3.20 (Unix) mod_perl/1.25_01 + Connection: close + Content-Type: text/html; charset=ISO-8859-1 + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + <html> + <head> + <title> + Search our site + </title> + </head> + <body> + +This may seem like a lot of work compared to using a browser, but browsers +are a poor tool for basic CGI debugging. + + +=back + +If you have problems check the C<DEBUGGING> section below. + +=head1 CONFIGURATION + +If you want to change the location of the swish-e binary or the index +file, use multiple indexes, add additional metanames and properties, +change the default highlighting behavior, etc., you will need to +adjust the script's configuration settings. + +Again, please get a test setup working with the default parameters before +making changes to any configuration settings. Better to debug one +thing at a time... + +In general, you will need to adjust the script's settings to match the +index file you are searching. For example, if you are indexing a +hypermail list archive you may want to make the script use +metanames/properties of Subject, Author, and, Email address. Or you +may wish to provide a way to limit searches to subsets of documents +(e.g. parts of your directory tree). + +To make things somewhat "simple", the configuration parameters are +included near the top of the swish.cgi program. That is the only +place that the individual parameters are defined and explained, so you +will need to open up the swish.cgi script in an editor to view the +options. Further questions about individual settings should be +referred to the swish-e discussion list. + +The parameters are all part of a perl C<hash> structure, and the +comments at the top of the program should get you going. The perl +hash structure may seem a bit confusing, but it makes it easy to +create nested and complex parameters. Syntax is important, so +cut-n-paste should be your best defense if you are not a perl +programmer. + +By the way, Perl has a number of quote operators. For example, to +quote a string you might write: + + title => 'Search My Site', + +Some options take more than one parameter, where each parameter must +be quoted. For example: + + metanames => [ 'swishdefault', 'swishtitle', 'swishdocpath' ], + +which assigns an array ( [...] ) of three strings to the "metanames" +variable. Lists of quoted strings are so common in perl that there's +a special operator called "qw" (quote word) to save typing all those quotes: + + metanames => [ qw/ swishdefault swishtitle swishdocpath / ], + +or to use the parenthesis as the quote character (you can pick any): + + metanames => [ qw( swishdefault swishtitle swishdocpath ) ], + + +There are two options for changing the configuration settings from +their default values: One way is to edit the script directly, or the +other was is to use a separate configuration file. In either case, +the configuration settings are a basic perl hash reference. + +Using a configuration file is described below, but contains the same hash structure. + +There are many configuration settings, and some of them are commented out either by using +a "#" symbol, or by simply renaming the configuration directive (e.g. by adding an "x" to the parameter +name). + +A very basic configuration setup might look like: + + return { + title => 'Search the Swish-e list', # Title of your choice. + swish_binary => 'swish-e', # Location of swish-e binary + swish_index => 'index.swish-e', # Location of your index file + }; + +Or if searching more than one index: + + return { + title => 'Search the Swish-e list', + swish_binary => 'swish-e', + swish_index => ['index.swish-e', 'index2'], + }; + +Both of these examples return a reference to a perl hash ( C<return +{...}> ). In the second example, the multiple index files are set as +an array reference. + +Note that in the example above the swish-e binary file is relative to +the current directory. If running under mod_perl you will need to use +absolute paths. + +The script can also use the SWISH::API perl module (included with the +swish-e distribution in the F<perl> directory) to access the swish-e +index. The C<use_library> option is used to enable the use of the +SWISH::API module: + + return { + title => 'Search the Swish-e list', + swish_index => ['index.swish-e', 'index2'], + use_library => 1, # enable use of the SWISH::API module + }; + +The module must be available via the @INC array, like all Perl modules. + +Using the SWISH::API module avoids the need to fork and execute a +the swish-e program. Under mod_perl you will may see a significant +performance improvement when using the SWISH::API module. Under +normal CGI usage you will probably not see any speed improvements. + + +B<Using A Configuration File> + +As mentioned above, configuration settings can be either set in the +F<swish.cgi> script, or set in a separate configuration file. +Settings in a configuration file will override the settings in the +script. + +By default, the F<swish.cgi> script will attempt to read settings from the file F<.swishcgi.conf>. +For example, you might only wish to change the title used +in the script. Simply create a file called F<.swishcgi.conf> in the same directory as the CGI script: + + > cat .swishcgi.conf + # Example swish.cgi configuration script. + return { + title => 'Search Our Mailing List Archive', + }; + +The settings you use will depend on the index you create with swish: + + return { + title => 'Search the Apache documentation', + swish_binary => 'swish-e', + swish_index => 'index.swish-e', + metanames => [qw/swishdefault swishdocpath swishtitle/], + display_props => [qw/swishtitle swishlastmodified swishdocsize swishdocpath/], + title_property => 'swishdocpath', + prepend_path => 'http://myhost/apachedocs', + + name_labels => { + swishdefault => 'Search All', + swishtitle => 'Title', + swishrank => 'Rank', + swishlastmodified => 'Last Modified Date', + swishdocpath => 'Document Path', + swishdocsize => 'Document Size', + }, + + }; + +The above configuration defines metanames to use on the form. +Searches can be limited to these metanames. + +"display_props" tells the script to display the property "swishlastmodified" (the last modified +date of the file), the document size, and path with the search results. + +The parameter "name_labels" is a hash (reference) +that is used to give friendly names to the metanames. + +Here's another example. Say you want to search either (or both) the Apache 1.3 documentation and the +Apache 2.0 documentation indexed seperately. + + return { + title => 'Search the Apache Documentation', + date_ranges => 0, + swish_index => [ qw/ index.apache index.apache2 / ], + select_indexes => { + method => 'checkbox_group', + labels => [ '1.3.23 docs', '2.0 docs' ], # Must match up one-to-one to swish_index + description => 'Select: ', + }, + + }; + +Now you can select either or both sets of documentation while searching. + +All the possible settings are included in the default configuration located near the top of the F<swish.cgi> +script. Open the F<swish.cgi> script with an editor to look at the various settings. Contact the Swish-e Discussion list +for help in configuring the script. + + +=head1 DEBUGGING + +Most problems with using this script have been a result of improper configuration. Please +get the script working with default settings before adjusting the configuration settings. + +The key to debugging CGI scripts is to run them from the command line, not with a browser. + +First, make sure the program compiles correctly: + + $ perl -c swish.cgi + swish.cgi syntax OK + +Next, simply try running the program: + + $ ./swish.cgi | head + Content-Type: text/html; charset=ISO-8859-1 + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + <html> + <head> + <title> + Search our site + </title> + </head> + <body> + +Under Windows you will need to run the script as: + + C:\wwwroot\swishtest> perl swish.cgi + + +Now, you know that the program compiles and will run from the command line. +Next, try accessing the script from a web browser. + +If you see the contents of the CGI script instead of its output then +your web server is not configured to run the script. With Apache look +at settings like ScriptAlias, SetHandler, and Options. + +If an error is reported (such as Internal Server Error or Forbidden) +you need to locate your web server's error_log file and carefully read +what the problem is. Contact your web administrator for help locating +the web server's error log. + +If you don't have access to the web server's error_log file, you can +modify the script to report errors to the browser screen. Open the +script and search for "CGI::Carp". (Author's suggestion is to debug +from the command line -- adding the browser and web server into the +equation only complicates debugging.) + +The script does offer some basic debugging options that allow +debugging from the command line. The debugging options are enabled by +setting an environment variable "SWISH_DEBUG". How that is set +depends on your operating system and the shell you are using. These +examples are using the "bash" shell syntax. + +Note: You can also use the "debug_options" configuration setting, but +the recommended method is to set the environment variable. + +You can list the available debugging options like this: + + $ SWISH_DEBUG=help ./swish.cgi >outfile + Unknown debug option 'help'. Must be one of: + basic: Basic debugging + command: Show command used to run swish + headers: Show headers returned from swish + output: Show output from swish + summary: Show summary of results + dump: Show all data available to templates + +Debugging options may be combined: + + $ SWISH_DEBUG=command,headers,summary ./swish.cgi >outfile + +You will be asked for an input query and the max number of results to return. You can use the defaults +in most cases. It's a good idea to redirect output to a file. Any error messages are sent to stderr, so +those will still be displayed (unless you redirect stderr, too). + +Here are some examples: + + ~/swishtest$ SWISH_DEBUG=basic ./swish.cgi >outfile + Debug level set to: 1 + Enter a query [all]: + Using 'not asdfghjklzxcv' to match all records + Enter max results to display [1]: + + ------ Can't use DateRanges feature ------------ + + Script will run, but you can't use the date range feature + Can't locate Date/Calc.pm in @INC (@INC contains: modules /usr/local/lib/perl5/5.6.0/i586-linux /usr/local/lib/perl5/5.6.0 /usr/local/lib/perl5/site_perl/5.6.0/i586-linux /usr/local/lib/perl5/site_perl/5.6.0 /usr/local/lib/perl5/site_perl/5.005/i586-linux /usr/local/lib/perl5/site_perl/5.005 /usr/local/lib/perl5/site_perl .) at modules/DateRanges.pm line 107, <STDIN> line 2. + BEGIN failed--compilation aborted at modules/DateRanges.pm line 107, <STDIN> line 2. + Compilation failed in require at ./swish.cgi line 971, <STDIN> line 2. + + -------------- + Can't exec "./swish-e": No such file or directory at ./swish.cgi line 1245, <STDIN> line 2. + Child process Failed to exec './swish-e' Error: No such file or directory at ./swish.cgi line 1246, <STDIN> line 2. + Failed to find any results + +The above indicates two problems. First problem is that the Date::Calc module is not installed. +The Date::Calc module is needed to use the date limiting feature of the script. + +The second problem is a bit more serious. It's saying that the script can't find the +swish-e binary file. In this example it's specified as being in the current directory. +Either correct the path to the swish-e binary, or make a local copy or symlink to the +swish-e binary. + + ~/swishtest$ cat .swishcgi.conf + return { + title => 'Search the Apache Documentation', + swish_binary => '/usr/local/bin/swish-e', + date_ranges => 0, + }; + +Now, let's try again: + + ~/swishtest$ SWISH_DEBUG=basic ./swish.cgi >outfile + Debug level set to: 1 + + ---------- Read config parameters from '.swishcgi.conf' ------ + $VAR1 = { + 'date_ranges' => 0, + 'title' => 'Search the Apache Documentation' + }; + ------------------------- + Enter a query [all]: + Using 'not asdfghjklzxcv' to match all records + Enter max results to display [1]: + Found 1 results + + Can't locate SWISH::TemplateDefault.pm in @INC (@INC contains: modules /usr/local/lib/perl5/5.6.0/i586-linux /usr/local/lib/perl5/5.6.0 /usr/local/lib/perl5/site_perl/5.6.0/i586-linux /usr/local/lib/perl5/site_perl/5.6.0 /usr/local/lib/perl5/site_perl/5.005/i586-linux /usr/local/lib/perl5/site_perl/5.005 /usr/local/lib/perl5/site_perl .) at ./swish.cgi line 608. + +This means that the swish.cgi script could not locate a required module. To correct this +locate where the SWISH::Template module is installed and add a "use lib" line to +your configuration file (or to the swish.cgi script): + + + ~/swishtest$ cat .swishcgi.conf + use lib '/home/bill/local/lib/perl'; + + return { + title => 'Search the Apache Documentation', + date_ranges => 0, + }; + + ~/swishtest$ SWISH_DEBUG=basic ./swish.cgi >outfile + Debug level set to: 1 + + ---------- Read config parameters from '.swishcgi.conf' ------ + $VAR1 = { + 'date_ranges' => 0, + 'title' => 'Search the Apache Documentation' + }; + ------------------------- + Enter a query [all]: + Using 'not asdfghjklzxcv' to match all records + Enter max results to display [1]: + Found 1 results + +That is much better! + +The "use lib" statement tells Perl where to look for modules by adding the path supplied +to an array called @INC. + +Note that most modules are in the SWISH namespace. For example, the default output +module is called SWISH::TemplateDefault. When Perl is looking for that module it is looking for +the file F<SWISH/TemplateDefault.pm>. If the "use lib" statement is set as: + + use lib '/home/bill/local/lib/perl'; + +then Perl will look (among other places) for the file + + /home/bill/local/lib/perl/SWISH/TemplateDefault.pm + +when attempting to load the SWISH::TemplateDefault module. Relative paths may also be used. + + use lib 'modules'; + +will cause Perl to look for the file: + + ./modules/SWISH/TemplateDefault.pm + +relative to where the swish.cgi script is running. (This is not true when running under mod_perl). + +Here's another common problem. Everything checks out, but when you run the script you see +the message: + + Swish returned unknown output + +Ok, let's find out what output it is returning: + + ~/swishtest$ SWISH_DEBUG=headers,output ./swish.cgi >outfile + Debug level set to: 13 + + ---------- Read config parameters from '.swishcgi.conf' ------ + $VAR1 = { + 'swish_binary' => '/usr/local/bin/swish-e', + 'date_ranges' => 0, + 'title' => 'Search the Apache Documentation' + }; + ------------------------- + Enter a query [all]: + Using 'not asdfghjklzxcv' to match all records + Enter max results to display [1]: + usage: swish [-i dir file ... ] [-S system] [-c file] [-f file] [-l] [-v (num)] + ... + version: 2.0 + docs: http://sunsite.berkeley.edu/SWISH-E/ + + *** 9872 Failed to run swish: 'Swish returned unknown output' *** + Failed to find any results + +Oh, looks like /usr/local/bin/swish-e is version 2.0 of swish. We need 2.1-dev and above! + +=head1 Frequently Asked Questions + +Here's some common questions and answers. + +=head2 How do I change the way the output looks? + +The script uses a module to generate output. By default it uses the +SWISH::TemplateDefault.pm module. The module used is selected in the +swish.cgi configuration file. Modules are located in the +example/modules/SWISH directory in the distribution, but are installed in +the $prefix/lib/swish-e/perl/SWISH/ directory. + +To make simple changes you can edit the installed SWISH::TemplatDefault +module directly, otherwise make a copy of the module and modify its package +name. For example, change directories to the location of the installed +module and copy the module to a new name: + + $ cp TemplateDefault.pm MyTemplateDefault.pm + +Then at the top of the module adjust the "package" line to: + + package SWISH::MyTemplateDefault; + +To use this modules you need to adjust the configuration settings (either at +the top of F<swish.cgi> or in a configuration file: + + + template => { + package => 'SWISH::MyTemplateDefault', + }, + +The module does not need to be in the SWISH namespace, and can be stored in +any location as long as the module can be found via the @INC array (i.e. +modify the "use lib" statement in swish.cgi if needed). + + +=head2 How do I use a templating system with swish.cgi? + +In addition to the TemplateDefault.pm module, the swish-e distribution includes two other Perl modules for +generating output using the templating systems HTML::Template and Template-Toolkit. + +Templating systems use template files to generate the HTML, and make maintaining the look of a large (or small) site +much easier. HTML::Template and Template-Toolkit are separate packages and can be downloaded from the CPAN. +See http://search.cpan.org. + +Two basic templates are provided as examples for generating output using these templating systems. +The example templates are located in the F<example> directory. +The module F<SWISH::TemplateHTMLTemplate> uses the file F<swish.tmpl> to generate its output, while the +module F<SWISH::TemplateToolkit> uses the F<swish.tt> file. +(Note: swish.tt was renamed from search.tt Jun 03, 2004.) + +To use either of these modules you will need to adjust the "template" configuration setting. Examples for +both templating systems are provided in the configuration settings near the top of the F<swish.cgi> program. + +Use of these modules is an advanced usage of F<swish.cgi> and are provided as examples only. + +All of the output generation modules are passed a hash with the results from the search, plus other data use to create the +output page. You can see this hash by using the debugging option "dump" or by using the included SWISH::TemplateDumper +module: + + ~/swishtest >cat .swishcgi.conf + return { + title => 'Search the Apache Documentation', + template => { + package => 'SWISH::TemplateDumper', + }, + }; + +And run a query. For example: + + http://localhost/swishtest/swish.cgi?query=install + +=head2 Why are there three different highlighting modules? + +Three are three highlighting modules included with the swish-e distribution. +Each is a trade-off of speed vs. accuracy: + + SWISH::DefaultHighlight - reasonably fast, but does not highlight phrases + SWISH::PhraseHighlight - reasonably slow, but is reasonably accurate + SWISH::SimpleHighlight - fast, some phrases, but least accurate + +Eh, the default is actually "PhraseHighlight". Oh well. + +All of the highlighting modules slow down the script. Optimizations to these modules are welcome! + +=head2 My ISP doesn't provide access to the web server logs + +There are a number of options. One way it to use the CGI::Carp module. Search in the +swish.cgi script for: + + use Carp; + # Or use this instead -- PLEASE see perldoc CGI::Carp for details + # use CGI::Carp qw(fatalsToBrowser warningsToBrowser); + +And change it to look like: + + #use Carp; + # Or use this instead -- PLEASE see perldoc CGI::Carp for details + use CGI::Carp qw(fatalsToBrowser warningsToBrowser); + +This should be only for debugging purposes, as if used in production you may end up sending +quite ugly and confusing messages to your browsers. + +=head2 Why does the output show (NULL)? + +Swish-e displays (NULL) when attempting to display a property that does not exist in the index. + +The most common reason for this message is that you did not use StoreDescription in your config file while indexing. + + StoreDescription HTML* <body> 200000 + +That tells swish to store the first 200,000 characters of text extracted from the body of each document parsed +by the HTML parser. The text is stored as property "swishdescription". + +The index must be recreated after changing the swish-e configuration. + +Running: + + ~/swishtest > ./swish-e -T index_metanames + +will display the properties defined in your index file. + +This can happen with other properties, too. +For example, this will happen when you are asking for a property to display that is not defined in swish. + + ~/swishtest > ./swish-e -w install -m 1 -p foo + # SWISH format: 2.1-dev-25 + # Search words: install + err: Unknown Display property name "foo" + . + + ~/swishtest > ./swish-e -w install -m 1 -x 'Property foo=<foo>\n' + # SWISH format: 2.1-dev-25 + # Search words: install + # Number of hits: 14 + # Search time: 0.000 seconds + # Run time: 0.038 seconds + Property foo=(NULL) + . + +To check that a property exists in your index you can run: + + ~/swishtest > ./swish-e -w not dkdk -T index_metanames | grep foo + foo : id=10 type=70 META_PROP:STRING(case:ignore) *presorted* + +Ok, in this case we see that "foo" is really defined as a property. Now let's make sure F<swish.cgi> +is asking for "foo" (sorry for the long lines): + + ~/swishtest > SWISH_DEBUG=command ./swish.cgi > /dev/null + Debug level set to: 3 + Enter a query [all]: + Using 'not asdfghjklzxcv' to match all records + Enter max results to display [1]: + ---- Running swish with the following command and parameters ---- + ./swish-e \ + -w \ + 'swishdefault=(not asdfghjklzxcv)' \ + -b \ + 1 \ + -m \ + 1 \ + -f \ + index.swish-e \ + -s \ + swishrank \ + desc \ + swishlastmodified \ + desc \ + -x \ + '<swishreccount>\t<swishtitle>\t<swishdescription>\t<swishlastmodified>\t<swishdocsize>\t<swishdocpath>\t<fos>\t<swishrank>\t<swishdocpath>\n' \ + -H \ + 9 + +If you look carefully you will see that the -x parameter has "fos" instead of "foo", so there's our problem. + +=head2 How do I use the SWISH::API perl module with swish.cgi? + +Use the C<use_library> configuration directive: + + use_library => 1, + +This will only provide improved performance when running under mod_perl or other persistent +environments. + +=head2 Why does the "Run time" differ when using the SWISH::API module + +When using the SWISH::API module the run (and search) times are calculated +within the script. When using the swish-e binary the swish-e program reports the +times. The "Run time" may include the time required to load and compile the SWISH::API +module. + +=head1 MOD_PERL + +This script can be run under mod_perl (see http://perl.apache.org). +This will improve the response time of the script compared to running under CGI by loading the +swish.cgi script into the Apache web server. + +You must have a mod_perl enabled Apache server to run this script under mod_perl. + +Configuration is simple. In your httpd.conf or your startup.pl file you need to +load the script. For example, in httpd.conf you can use a perl section: + + <perl> + use lib '/usr/local/apache/cgi-bin'; # location of the swish.cgi file + use lib '/home/yourname/swish-e/example/modules'; # modules required by swish.cgi + require "swish.cgi"; + </perl> + +Again, note that the paths used will depend on where you installed the script and the modules. +When running under mod_perl the swish.cgi script becomes a perl module, and therefore the script +does not need to be installed in the cgi-bin directory. (But, you can actually use the same script as +both a CGI script and a mod_perl module at the same time, read from the same location.) + +The above loads the script into mod_perl. Then to configure the script to run add this to your httpd.conf +configuration file: + + <location /search> + PerlSetVar Swish_Conf_File /home/yourname/swish-e/myconfig.pl + allow from all + SetHandler perl-script + PerlHandler SwishSearch + </location> + +Note that you use the "Swish_Conf_File" setting in httpd.conf to tell the script +which config file to use. This means you can use the same script (and loaded modules) +for different search sites (running on the same Apache server). You can just specify +differnt config files for each Location and they can search different indexes and +have a completely different look for each site, but all share the same code. + +B<Note> that the config files are cached in the swish.cgi script. Changes to the config file +will require restarting the Apache server before they will be reloaded into the swish.cgi +script. This avoids calling stat() for every request. + +Unlike CGI, mod_perl does not change the current directory to the location of the script, so +your settings for the swish binary and the path to your index files must be absolute +paths (or relative to the server root). + +Using the SWISH::API module with mod_perl will provide the most performance improvements. +Use of the SWISH::API module can be enabled by the configuration setting C<use_library>: + + use_library => 1, + +Without highlighting code enabled, using the SWISH::API module resulted in about 20 requests +per second, where running the swish-e binary slowed the script down to about 8 requests per second. + +Note that the highlighting code is slow. For the best search performance turn off highlighting. +In your config file you can add: + + highlighting => 0, # disable highlighting + +and the script will show the first 500 chars of the description (or whatever you set for "max_chars"). +Without highlight one test was processing about 20 request per second. +With The "PhraseHighlight" module that dropped to a little better than two requests per second, +"DefaultHighlight" was about 2.3 request per second, and "SimpleHighlight" was about 6 request per second. + +Experiement with different highlighting options when testing performance. + +Please post to the swish-e discussion list if you have any questions about running this +script under mod_perl. + +Here's some general request/second on an Athlon XP 1800+ with 1/2GB RAM, Linux 2.4.20. + + Highlighting Mode + + None Phrase Default Simple + Using SWISH::API 45 1.5 2 12 + ---------------------------------------------------------------------------- + Using swish-e 12 1.3 1.8 7.5 + binary + +As you can see the highlighting code is a limiting factor. + +=head1 SpeedyCGI + +SpeedyCGI (also called PersistentPerl) is another way to run Perl scripts persistently. +SpeedyCGI is good if you do not have mod_perl available or do not have root access. +SpeedyCGI works on Unix systems by loading the script into a "back end" process and keeping +it in memory between requests. New requests are passed to the back end processes which avoids +the startup time required by a Perl CGI script. + +Install SpeedyCGI from http://daemoninc.com/ (your OS may provide a packaged version of +SpeedyCGI) and then change the first line of swish.cgi. For example, if the speedy binary is +installed in /usr/bin/speedy, use the line: + + #! /usr/bin/speedy -w -- -t60 + +The -w option is passed to Perl, and all options following the double-dash are SpeedyCGI options. + +Note that when using SpeedyCGI configuration data is cached in memory. If you change the swish.cgi +configuration file (.swishcgi.conf) then touch the main swish.cgi script to force reloading of +configuration data. + +=head1 Spidering + +There are two ways to spider with swish-e. One uses the "http" input method that uses code that's +part of swish. The other way is to use the new "prog" method along with a perl helper program called +C<spider.pl>. + +Here's an example of a configuration file for spidering with the "http" input method. +You can see that the configuration is not much different than the file system input method. +(But, don't use the http input method -- use the -S prog method shown below.) + + # Define what to index + IndexDir http://www.myserver.name/index.html + IndexOnly .html .htm + + IndexContents HTML* .html .htm + DefaultContents HTML* + StoreDescription HTML* <body> 200000 + MetaNames swishdocpath swishtitle + + # Define http method specific settings -- see swish-e documentation + SpiderDirectory ../swish-e/src/ + Delay 0 + +You index with the command: + + swish-e -S http -c spider.conf + +Note that this does take longer. For example, spidering the Apache documentation on +a local web server with this method took over a minute, where indexing with the +file system took less than two seconds. Using the "prog" method can speed this up. + +Here's an example configuration file for using the "prog" input method: + + # Define the location of the spider helper program + IndexDir ../swish-e/prog-bin/spider.pl + + # Tell the spider what to index. + SwishProgParameters default http://www.myserver.name/index.html + + IndexContents HTML* .html .htm + DefaultContents HTML* + StoreDescription HTML* <body> 200000 + MetaNames swishdocpath swishtitle + +Then to index you use the command: + + swish-e -c prog.conf -S prog -v 0 + +Spidering with this method took nine seconds. + + +=head1 Stemmed Indexes + +Many people enable a feature of swish called word stemming to provide +"fuzzy" search options to their users. The stemming code does not +actually find the "stem" of word, rather removes and/or replaces +common endings on words. Stemming is far from perfect, and many words +do not stem as you might expect. Plus, currently only English is +supported. But, it can be a helpful tool for searching your site. +You may wish to create both a stemmed and non-stemmed index, and +provide a checkbox for selecting the index file. + +To enable a stemmed index you simply add to your configuration file: + + UseStemming yes + +If you want to use a stemmed index with this program and continue to highlight search terms you will need +to install a perl module that will stem words. This section explains how to do this. + +The perl module is included with the swish-e distribution. It can be found in the examples directory (where +you found this file) and called something like: + + SWISH-Stemmer-0.05.tar.gz + +The module should also be available on CPAN (http://search.cpan.org/). + +Here's an example session for installing the module. (There will be quite a bit of output +when running make.) + + + % gzip -dc SWISH-Stemmer-0.05.tar.gz |tar xof - + % cd SWISH-Stemmer-0.05 + % perl Makefile.PL + or + % perl Makefile.PL PREFIX=$HOME/perl_lib + % make + % make test + + (perhaps su root at this point if you did not use a PREFIX) + % make install + % cd .. + +Use the B<PREFIX> if you do not have root access or you want to install the modules +in a local library. If you do use a PREFIX setting, add a C<use lib> statement to the top of this +swish.cgi program. + +For example: + + use lib qw( + /home/bmoseley/perl_lib/lib/site_perl/5.6.0 + /home/bmoseley/perl_lib/lib/site_perl/5.6.0/i386-linux/ + ); + +Once the stemmer module is installed, and you are using a stemmed index, the C<swish.cgi> script will automatically +detect this and use the stemmer module. + +=head1 DISCLAIMER + +Please use this CGI script at your own risk. + +This script has been tested and used without problem, but you should still be aware that +any code running on your server represents a risk. If you have any concerns please carefully +review the code. + +See http://www.w3.org/Security/Faq/www-security-faq.html + +Security on Windows questionable. + +=head1 SUPPORT + +The SWISH-E discussion list is the place to ask for any help regarding SWISH-E or this example +script. See http://swish-e.org. + +Before posting please review: + + http://swish-e.org/2.2/docs/INSTALL.html#When_posting_please_provide_the_ + +Please do not contact the author or any of the swish-e developers directly. + +=head1 LICENSE + +swish.cgi $Revision$ Copyright (C) 2001 Bill Moseley search@hank.org +Example CGI program for searching with SWISH-E + + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version +2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + + +=head1 AUTHOR + +Bill Moseley + +=cut + + diff --git a/cpp/doc/swish/swish.conf b/cpp/doc/swish/swish.conf index f0dc79bb8ba..4bb474c8a00 100644 --- a/cpp/doc/swish/swish.conf +++ b/cpp/doc/swish/swish.conf @@ -1,5 +1,6 @@ IndexDir /var/www/html/doc/3.2/reference IndexOnly .html +FileRules filename is _sindex\.html DefaultContents HTML* StoreDescription HTML* 200000 Replacerules remove /var/www/html/ diff --git a/cpp/src/slice2html/Gen.cpp b/cpp/src/slice2html/Gen.cpp index 9182d73e3f4..3c694561ec7 100644 --- a/cpp/src/slice2html/Gen.cpp +++ b/cpp/src/slice2html/Gen.cpp @@ -35,7 +35,7 @@ void generate(const UnitPtr& unit, const string& dir, const string& header, const string& footer, const string& indexHeader, const string& indexFooter, - const string& imageDir, const string& logoURL, unsigned indexCount, unsigned warnSummary) + const string& imageDir, const string& logoURL, const string& searchAction, unsigned indexCount, unsigned warnSummary) { unit->mergeModules(); @@ -50,6 +50,7 @@ generate(const UnitPtr& unit, const string& dir, GeneratorBase::setFooter(footer); GeneratorBase::setImageDir(imageDir); GeneratorBase::setLogoURL(logoURL); + GeneratorBase::setSearchAction(searchAction); GeneratorBase::setIndexCount(indexCount); GeneratorBase::warnSummary(warnSummary); @@ -95,6 +96,7 @@ string Slice::GeneratorBase::_header2; string Slice::GeneratorBase::_footer; string Slice::GeneratorBase::_imageDir; string Slice::GeneratorBase::_logoURL; +string Slice::GeneratorBase::_searchAction; unsigned Slice::GeneratorBase::_indexCount = 0; unsigned Slice::GeneratorBase::_warnSummary = 0; ContainedList Slice::GeneratorBase::_symbols; @@ -155,6 +157,15 @@ Slice::GeneratorBase::setLogoURL(const string& logoURL) } // +// Set search action, if any. +// +void +Slice::GeneratorBase::setSearchAction(const string& searchAction) +{ + _searchAction = searchAction; +} + +// // Set the threshold at which we start generating sub-indexes. // If a page has fewer entries than this, we don't generate a // sub-index. (For example, with "ic" set to 3, we generate @@ -728,12 +739,41 @@ Slice::GeneratorBase::printHeaderFooter(const ContainedPtr& c) end(); end(); + printSearch(); + printLogo(c, container, onEnumPage); end(); } void +Slice::GeneratorBase::printSearch() +{ + if(!_searchAction.empty()) + { + _out << nl << "<div style=\"text-align: center;\">"; + _out.inc(); + start("table", "SearchTable"); + start("tr"); + start("td"); + _out << nl << "<form method=\"get\" action=\"" << _searchAction << "\"" + << " enctype=\"application/x-www-form-urlencoded\" class=\"form\">"; + _out.inc(); + start("div"); + _out << nl << "<input maxlength=\"100\" value=\"\" type=\"text\" name=\"query\">"; + _out << nl << "<input type=\"submit\" value=\"Search\" name=\"submit\">"; + end(); + _out.dec(); + _out << nl << "</form>"; + end(); + end(); + end(); + _out.dec(); + _out << nl << "</div>"; + } +} + +void Slice::GeneratorBase::printLogo(const ContainedPtr& c, const ContainerPtr& container, bool forEnum) { string imageDir = getImageDir(); @@ -1544,6 +1584,8 @@ Slice::StartPageGenerator::printHeaderFooter() end(); end(); + printSearch(); + if(!imageDir.empty()) { start("table", "LogoTable"); diff --git a/cpp/src/slice2html/Gen.h b/cpp/src/slice2html/Gen.h index d48405e71b5..408ee5772ed 100644 --- a/cpp/src/slice2html/Gen.h +++ b/cpp/src/slice2html/Gen.h @@ -17,7 +17,8 @@ namespace Slice { void generate(const UnitPtr&, const ::std::string&, const ::std::string&, const ::std::string&, const std::string&, - const ::std::string&, const ::std::string&, const ::std::string&, unsigned, unsigned); + const ::std::string&, const ::std::string&, const ::std::string&, const ::std::string&, + unsigned, unsigned); typedef ::std::set< ::std::string> Files; @@ -30,6 +31,7 @@ public: static void setFooter(const ::std::string&); static void setImageDir(const ::std::string&); static void setLogoURL(const ::std::string&); + static void setSearchAction(const ::std::string&); static void setIndexCount(int); static void warnSummary(int); static void setSymbols(const ContainedList&); @@ -51,6 +53,7 @@ protected: void printSummary(const ContainedPtr&, const ContainerPtr&, bool); void printHeaderFooter(const ContainedPtr&); + void printSearch(); void printLogo(const ContainedPtr&, const ContainerPtr&, bool); ::std::string toString(const SyntaxTreeBasePtr&, const ContainerPtr&, bool = true, bool = false, @@ -70,7 +73,6 @@ protected: static unsigned _indexCount; static unsigned _warnSummary; - private: void openStream(const ::std::string&); @@ -96,6 +98,7 @@ private: static ::std::string _footer; static ::std::string _imageDir; static ::std::string _logoURL; + static ::std::string _searchAction; static ContainedList _symbols; }; diff --git a/cpp/src/slice2html/Main.cpp b/cpp/src/slice2html/Main.cpp index d7480475873..4befc462ca3 100644 --- a/cpp/src/slice2html/Main.cpp +++ b/cpp/src/slice2html/Main.cpp @@ -36,6 +36,7 @@ usage(const char* n) "--indexftr FILE Use the contents of FILE as the footer of the index/toc page (default=--ftr).\n" "--image-dir DIR Directory containing images for style sheets.\n" "--logo-url URL Link to URL from logo image (requires --image-dir).\n" + "--search ACTION Generate search box with specified ACTION.\n" "--index NUM Generate subindex if it has at least NUM entries (0 for no index, default=1).\n" "--summary NUM Print a warning if a summary sentence exceeds NUM characters.\n" "-d, --debug Print debug messages.\n" @@ -61,6 +62,7 @@ main(int argc, char* argv[]) opts.addOpt("", "index", IceUtil::Options::NeedArg, "1"); opts.addOpt("", "image-dir", IceUtil::Options::NeedArg); opts.addOpt("", "logo-url", IceUtil::Options::NeedArg); + opts.addOpt("", "search", IceUtil::Options::NeedArg); opts.addOpt("", "summary", IceUtil::Options::NeedArg, "0"); opts.addOpt("d", "debug"); opts.addOpt("", "ice"); @@ -139,6 +141,8 @@ main(int argc, char* argv[]) string logoURL = opts.optArg("logo-url"); + string searchAction = opts.optArg("search"); + string warnSummary = opts.optArg("summary"); unsigned summaryCount; if(!warnSummary.empty()) @@ -207,7 +211,7 @@ main(int argc, char* argv[]) try { Slice::generate(p, output, header, footer, indexHeader, indexFooter, imageDir, logoURL, - indexCount, summaryCount); + searchAction, indexCount, summaryCount); } catch(const string& err) { |