PERL 10
Conf.pl Guest on 16th July 2020 08:45:20 AM
  1. # Perlfect Search configuration file
  2. #$rcs = ' $Id: conf.pl,v 1.74 2007/03/30 22:55:03 gzervas Exp $' ;
  3.  
  4. # NOTE: Whenever you change one of the options that's marked with [re-index]
  5. # you need to run indexer.pl again to make the change take effect.
  6.  
  7. ###########################################################################
  8. ### basic configuration
  9. ### You'll have to adapt these values if you didn't use setup.pl
  10.  
  11. # Where do you want the indexer to start on your disk?
  12. # ** Note ** : If your files are generated dynamically (e.g. via PHP)
  13. # you should set $HTTP_START_URL (see below), otherwise users
  14. # will be able to see your pages' source code using the
  15. # "highlight matches" link.
  16. # [re-index]
  17. $DOCUMENT_ROOT = '/home/priya/public_html/';
  18.  
  19. # The base url of your site (normally that's the URL which
  20. # corresponds to $DOCUMENT_ROOT).
  21. $BASE_URL = 'http://jsoc.stanford.edu/~priya/index.html';
  22.  
  23. # The url in which Perlfect Search is located (usually somewhere in cgi-bin/).
  24. $CGIBIN = 'http://jsoc.stanford.edu/~priya/cgi-bin/perlfect/search/';
  25.  
  26. # The full-path of the directory where Perlfect Search is installed.
  27. $INSTALL_DIR = '/home/priya/public_html/perlfect/search/';
  28.  
  29. # Only files with these extensions should be indexed (case-sensitive).
  30. # This is only relevant for file system indexing, when you index files via
  31. # http you need to set @HTTP_CONTENT_TYPES instead. [re-index]
  32. @EXT = ("htm","html","shtml","asp","txt","doc.pdf");
  33.  
  34. # If you do not have telnet/ssh access to the server that runs the script, you
  35. # need to execute indexer.pl via CGI. Of course not everybody should be able
  36. # to do that, so set a password with this option.
  37. # ** Note ** : Only use this if absolutely necessary! Setting to "" disables
  38. # execution as a CGI, which is much more secure. Note that other people on
  39. # your server can probably read this file and look up your password.
  40. $INDEXER_CGI_PASSWORD = "";
  41.  
  42. ###########################################################################
  43. ### http configuration
  44. ### You only need this if you want to index your pages via http
  45.  
  46. # Where you want the indexer to start via http. Leave empty if
  47. # you want to index the files in the filesystem ($DOCUMENT_ROOT).
  48. # ** WARNING **: Do not use for foreign servers! It might use too many
  49. # resources on other people's servers. [re-index]
  50. # example: $HTTP_START_URL = 'http://localhost/';
  51. $HTTP_START_URL = '';
  52.  
  53. # The indexer might not notice if it runs into an endless loop. To void
  54. # that, set this to the maximum number of pages that will be visited
  55. # (this can be bigger than the number of pages indexed). [re-index]
  56. $HTTP_MAX_PAGES = 100;
  57.  
  58. # The web server's document root. Normally that's the same as $DOCUMENT_ROOT,
  59. # it differs if you're only using Perlfect Search on a subdirectory. [re-index]
  60. $HTTP_SERVER_ROOT = $DOCUMENT_ROOT;
  61.  
  62. # Limit crawling to these URL pattern. This is an important setting so
  63. # the script doesn't run out of control.
  64. # ** WARNING **: The default ($HTTP_START_URL) should not be changed,
  65. # otherwise you risk the script to crawl on remote servers. For example,
  66. # the robots.txt file will only be used on the $HTTP_START_URL server!
  67. # [re-index]
  68. @HTTP_LIMIT_URLS = ($HTTP_START_URL);
  69.  
  70. # Comment this out if you want to ignore robots.txt (only do that if
  71. # you really know what you are doing):
  72. $ROBOT_AGENT = 'perlfectsearch';
  73.  
  74. # Should the indexer follow links that are commented out?
  75. $HTTP_FOLLOW_COMMENT_LINKS = 1;
  76.  
  77. # Only if indexing via http: the content types to index.
  78. # Add 'application/msword' for for MS-Word,
  79. # 'application/pdf' for PDF. [re-index]
  80. @HTTP_CONTENT_TYPES = ('text/html', 'text/plain');
  81.  
  82. # Set to 1 to get verbose output during indexing. [re-index]
  83. $HTTP_DEBUG = 1;
  84.  
  85. ###########################################################################
  86. ### advanced configuration
  87. ### You only need this if you want to adapt advanced features
  88.  
  89. # Programs that convert other formats to ascii text.
  90. # The name of the file to be filtered is passed as FILENAME, and the command
  91. # must print out ascii (or latin1) text.
  92. # pdftotext is part of xpdf, available at
  93. # http://www.foolabs.com/xpdf/download.html
  94. # antiword is available at http://www.winfield.demon.nl/
  95. # NOTE: You also have to set @EXT or @HTTP_CONTENT_TYPES accordingly.
  96. # If there's a problem with pdftotext, try a new version or hand over
  97. # the -raw option to pdftotext.
  98. # [re-index]
  99. %EXT_FILTER = (
  100.            "pdf" => "/usr/bin/pdftotext FILENAME -",
  101.            "doc" => "/usr/bin/antiword FILENAME"
  102. );
  103.  
  104. # How many results should be shown per page.
  105. $RESULTS_PER_PAGE = 10;
  106.  
  107. # Limit the number of results. 0 = no limit.
  108. $MAX_RESULTS = 0;
  109.  
  110. # Enable the "highlight matches" feature that displays the original
  111. # pages, but with the search terms highlighted. See the README on
  112. # restrictions of this feature.
  113. $HIGHLIGHT_MATCHES = 1;
  114.  
  115. # A "highlight matches" link does only work for HTML files, so only
  116. # offer such a link for files with these suffixes.
  117. # ** Note **: If $HTTP_START_URL is not set, the highlighting
  118. # will load the file from disk so that the user might find
  119. # passwords in the highlightes file! So don't set this to include
  120. # dynamic files, unless you are using $HTTP_START_URL.
  121. @HIGHLIGHT_EXT = ("html", "htm");
  122.  
  123. # Perlfect Search can highlight the search terms in the matching
  124. # document. These are the colors that will be used for the background
  125. # of the terms (the browser must support CSS for this). If the last color
  126. # is used, the first one will be used again if there are still terms left.
  127. @HIGHLIGHT_COLORS = ('#4fafea', '#e5b547', '#aaaaaa', '#ee77ee');
  128.  
  129. # Show the ranking in percent, with the first document = 100%.
  130. $PERCENTAGE_RANKING = 1;
  131.  
  132. # Do you want to index numbers? If so set $INDEX_NUMBERS to 1. [re-index]
  133. $INDEX_NUMBERS = 0;
  134.  
  135. # If you don't have enough memory, set this to 1. This will slow down
  136. # indexer.pl by a factor of about 2. Searching is not affected.
  137. $LOW_MEMORY_INDEX = 1;
  138.  
  139. # How much of the document should be put in the index? With this option,
  140. # the context of the match is shown on the results page. This only works
  141. # if the match was in the first $CONTEXT_SIZE bytes of the document.
  142. # Warning: Using this option will generate a very big index file.
  143. # Set to 0 to disable, set to -1 for no limit. [re-index]
  144. $CONTEXT_SIZE = 0;
  145.  
  146. # If $CONTEXT_SIZE is enabled, how many occurences of every term should be shown
  147. # on the results page?
  148. $CONTEXT_EXAMPLES = 2;
  149.  
  150. # If $CONTEXT_SIZE is enabled, how many words should be used to show the context
  151. # of a term?
  152. $CONTEXT_DESC_WORDS = 12;
  153.  
  154. # How many words should be used from the <BODY> of an html document as a
  155. # description for the document in case there is no <META description> tag
  156. # available and $CONTEXT_SIZE is 0. [re-index]
  157. $DESC_WORDS = 25;
  158.  
  159. # The minimum length of a word. Any word of smaller size is not indexed.
  160. # [re-index]
  161. $MINLENGTH = 3;
  162.  
  163. # If you have umlauts or accents etc. in your text, enable this.
  164. # With this option accented characters will be indexed as the characters
  165. # they are based on (e.g. è -> e, ü -> u), without this option they will
  166. # be filtered out completely (you don't want that). [re-index]
  167. $SPECIAL_CHARACTERS = 1;
  168.  
  169. # The largest acceptable word size. Reducing this saves space but decreases
  170. # result accuracy. Setting the variable to 0 ignores stemming alltogether.
  171. # [re-index]
  172. $STEMCHARS = 0;
  173.  
  174. # Add URLs to the index, so one can search for them? Note that special
  175. # characters will be ignored, just as in normal text. [re-index]
  176. $INDEX_URLS = 0;
  177.  
  178. # You can completely ignore certain parts of your documents if you put these
  179. # HTML comments around them. [re-index]
  180. $IGNORE_TEXT_START = '<!--ignore_perlfect_search-->';
  181. $IGNORE_TEXT_END = '<!--/ignore_perlfect_search-->';
  182.  
  183. # The maximum length of <title> elements, everything longer than this
  184. # will be cut off. [re-index]
  185. $MAX_TITLE_LENGTH = 80;
  186.  
  187. # How much more important are words found in the title, in the meta values
  188. # (author, description, keywords), and in the headlines compared to normal
  189. # text in the body? This influences the ranking of the results.
  190. # Use any integer (0 = ignore that text completely) [re-index]
  191. $TITLE_WEIGHT = 5;
  192. $META_WEIGHT = 5;
  193. $H_WEIGHT{'1'} = 5;     # headline <h1>...</h1>
  194. $H_WEIGHT{'2'} = 4;
  195. $H_WEIGHT{'3'} = 3;
  196. $H_WEIGHT{'4'} = 1;
  197. $H_WEIGHT{'5'} = 1;
  198. $H_WEIGHT{'6'} = 1;     # headline <h6>...</h6>
  199.  
  200. # If you want to log the queries to an extra file, set this to 1.
  201. # Every use of search.pl will then be logged to data/log.txt. That file
  202. # has to exist and must be writable for the webserver. The line format is:
  203. # REMOTE_HOST;date;terms;matches;current page;(time to search in seconds);
  204. # NOTE: You'll have to comment in two lines at the top of search.pl to get the
  205. # time value (see the comment there).
  206. # NOTE: if you have many queries, this file will grow quite fast.
  207. $LOG = 0;
  208.  
  209. # This will increase the score of results that contain more than one of
  210. # the searched terms. Queries with only one term will not be affected.
  211. # The number given here is a factor that multiplies the score (even
  212. # several times, if there are more than two terms). 0 turns it off.
  213. $MULTIPLE_MATCH_BOOST = 0;
  214.  
  215. # Date format for the result page. %Y = year, %m = month, %d = day,
  216. # %H = hour, %M = minute, %S = second. On a Unix system use
  217. # 'man strftime' to get a list of all possible options.
  218. $DATE_FORMAT = "%Y-%m-%d";
  219.  
  220. # Date format for the "Latest Index update" information on the result page.
  221. $INDEX_DATE_FORMAT = "%Y-%m-%d %H:%M";
  222.  
  223. # Directory with templates (normally you don't have to modify this).
  224. $TEMPLATE_DIR = $INSTALL_DIR.'templates/';
  225.  
  226. # What's the default language. This is the language that's used if no lang
  227. # parameter is passed to the script or if the parameter is invalid.
  228. $DEFAULT_LANG = 'en';
  229.  
  230. # The result templates for several languages.
  231. $SEARCH_TEMPLATE{'en'} = $TEMPLATE_DIR.'search.html';
  232. $SEARCH_TEMPLATE{'de'} = $TEMPLATE_DIR.'search_de.html';
  233. $SEARCH_TEMPLATE{'fr'} = $TEMPLATE_DIR.'search_fr.html';
  234. $SEARCH_TEMPLATE{'it'} = $TEMPLATE_DIR.'search_it.html';
  235. $NO_MATCH_TEMPLATE{'en'} = $TEMPLATE_DIR.'no_match.html';
  236. $NO_MATCH_TEMPLATE{'de'} = $TEMPLATE_DIR.'no_match_de.html';
  237. $NO_MATCH_TEMPLATE{'fr'} = $TEMPLATE_DIR.'no_match_fr.html';
  238. $NO_MATCH_TEMPLATE{'it'} = $TEMPLATE_DIR.'no_match_it.html';
  239. # This is the template for using search.pl via command line:
  240. $SEARCH_TEMPLATE{'text'} = $TEMPLATE_DIR.'search.txt';
  241. $NO_MATCH_TEMPLATE{'text'} = $TEMPLATE_DIR.'no_match.txt';
  242. # This is the template for using the test cases (development only):
  243. $SEARCH_TEMPLATE{'qa'} = $INSTALL_DIR.'qa/search_qa.txt';
  244. $NO_MATCH_TEMPLATE{'qa'} = $INSTALL_DIR.'qa/no_match_qa.txt';
  245.  
  246. # The text for the "Next Page" link in several languages.
  247. $NEXT_PAGE{'en'} = 'Next';
  248. $NEXT_PAGE{'de'} = 'n&auml;chste Seite';
  249. $NEXT_PAGE{'fr'} = 'Suivant';
  250. $NEXT_PAGE{'it'} = 'Successiva';
  251.  
  252. # The text for the "Previous Page" link in several languages.
  253. $PREV_PAGE{'en'} = 'Previous';
  254. $PREV_PAGE{'de'} = 'vorige Seite';
  255. $PREV_PAGE{'fr'} = 'Précédent';
  256. $PREV_PAGE{'it'} = 'Precedente';
  257.  
  258. # Text of the link that shows a colored backround for matched terms:
  259. $HIGHLIGHT_TERMS{'en'} = 'highlight matches';
  260. $HIGHLIGHT_TERMS{'de'} = 'Treffer hervorheben';
  261.  
  262. # The text for the "too common" warning. <WORDS> will be replaced with
  263. # a list of the ignored words. If there are no ignored words, this text
  264. # will not appear.
  265. $IGNORED_WORDS{'en'} = '<p>The following words are either too short or very common and were
  266.         not included in your search: <strong><WORDS></strong></p>';
  267. $IGNORED_WORDS{'de'} = '<p>Folgende Wörter sind zu kurz oder kommen sehr häufig vor und wurden
  268.         daher in Ihrer Suchanfrage ignoriert: <strong><WORDS></strong></p>';
  269. $IGNORED_WORDS{'fr'} = '<p>Les mots suivants sont trop courts ou très courants et n\'ont
  270.         pas été inclus dans votre recherche: <strong><WORDS></strong></p>';
  271. # fixme: "too short" missing:
  272. $IGNORED_WORDS{'it'} = '<p>Le seguenti parole sono molto comuni e non
  273.    saranno incluse nella vostra ricerca: <strong><WORDS></strong></p>';
  274.  
  275. ###########################################################################
  276. ### You shouldn't have to edit anything below this line.
  277.  
  278. # Various paths (do NOT use system-wide /tmp for security reasons!)
  279. $TMP_DIR  = $INSTALL_DIR.'temp/';
  280. $DATA_DIR = $INSTALL_DIR.'data/';
  281. $CONF_DIR = $INSTALL_DIR."conf/";
  282. $STOPWORDS_FILE = $CONF_DIR.'stopwords.txt';
  283. $NO_INDEX_FILE = $CONF_DIR.'no_index.txt';
  284. $LOGFILE = $DATA_DIR.'log.txt';
  285. $SEARCH = 'search.pl';
  286. $SEARCH_URL = $CGIBIN.$SEARCH;
  287. $UPDATE_FILE = $DATA_DIR.'update';
  288.  
  289. # Paths to the database files.
  290. $INV_INDEX_DB_FILE = $DATA_DIR.'inv_index';
  291. $DOCS_DB_FILE      = $DATA_DIR.'docs';
  292. $URLS_DB_FILE      = $DATA_DIR.'urls';
  293. $SIZES_DB_FILE     = $DATA_DIR.'sizes';
  294. $TERMS_DB_FILE     = $DATA_DIR.'terms';
  295. $DF_DB_FILE        = $DATA_DIR.'df';
  296. $TF_DB_FILE        = $DATA_DIR.'tf';
  297. $CONTENT_DB_FILE   = $DATA_DIR.'content';
  298. $DESC_DB_FILE      = $DATA_DIR.'desc';
  299. $TITLES_DB_FILE    = $DATA_DIR.'titles';
  300. $DATES_DB_FILE     = $DATA_DIR.'dates';
  301.  
  302. # Paths to the temporary database files.
  303. $INV_INDEX_TMP_DB_FILE = $DATA_DIR.'inv_index_tmp';
  304. $DOCS_TMP_DB_FILE      = $DATA_DIR.'docs_tmp';
  305. $URLS_TMP_DB_FILE      = $DATA_DIR.'urls_tmp';
  306. $SIZES_TMP_DB_FILE     = $DATA_DIR.'sizes_tmp';
  307. $TERMS_TMP_DB_FILE     = $DATA_DIR.'terms_tmp';
  308. $CONTENT_TMP_DB_FILE   = $DATA_DIR.'content_tmp';
  309. $DESC_TMP_DB_FILE      = $DATA_DIR.'desc_tmp';
  310. $TITLES_TMP_DB_FILE    = $DATA_DIR.'titles_tmp';
  311. $DATES_TMP_DB_FILE     = $DATA_DIR.'dates_tmp';
  312.  
  313. # Official version number.
  314. $VERSION = "3.37";
  315. 1;

Paste is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.