Rem
Rem $Header: wk0init.sql 16-sep-2002.15:55:14 syang Exp $
Rem
Rem wk0init.sql
Rem
Rem Copyright (c) 1999, 2002, Oracle Corporation.  All rights reserved.  
Rem
Rem    NAME
Rem      wk0init.sql - WK init crawler configuration
Rem
Rem    DESCRIPTION
Rem      set default crawler configuartion parameters
Rem
Rem    NOTES
Rem      default max cache size = 5M
Rem              Inso filter thread = 1
Rem              crawling threads = 20
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    syang       09/16/02 - bug 2569663: unsupported Inso output charset JA16_EUC
Rem    syang       02/28/02 - bug 2246355: bind launch instance
Rem    syang       02/25/02 - use upper case for launch from any inst
Rem    syang       02/14/02 - add default jdbc driver type CC_JDBC_DRIVER
Rem    syang       02/18/02 - add launch_any
Rem    syang       02/01/02 - initialize default language with territory
Rem    syang       10/15/01 - bug2054828: utf8 not supported by ctxhx
Rem    ymatsuda    07/23/01 - change EUCJIS to EUC_JP
Rem    syang       07/13/01 - add CC_DS_AGENT_PATH
Rem    echee       03/08/01 - Editing rmipolicy path to comply with installer
Rem    syang       02/28/01 - optimize_index moved to ctxsys
Rem    dczhang     01/05/01 - remove old obj defs first
Rem    syang       09/26/00 - increase java max heap to 256m
Rem    syang       09/22/00 - add more db charset to inso charset mapping
Rem    syang       09/14/00 - use define variables to avoid sqlplus bug
Rem    echee       08/28/00 - renaming rmi policy file used by db machine
Rem    echee       08/20/00 - multiple crawler support
Rem    jechow      08/01/00 - cut the classpath
Rem    syang       07/31/00 - add default no file protocol crawling
Rem    syang       06/12/00 - set default cache to 5M
Rem    chsin       05/30/00 - change data source id from 0 to -1
Rem    syang       05/23/00 - remove invalid comment
Rem    rpandian    05/22/00 - change default web source id
Rem    rpandian    05/15/00 - Add Default value for MIME TYPE
Rem    rpandian    05/05/00 - Add default charset
Rem    syang       04/18/00 - set java start heap size and max heap size
Rem    ymatsuda    03/28/00 - pipe config info
Rem    syang       03/21/00 - add auto language detection
Rem    syang       03/08/00 - correct default language to en, ja
Rem    syang       03/07/00 - add exception handler in callback statement
Rem    syang       02/25/00 - LANGUAGE mapped to LANG
Rem    syang       02/22/00 - add optimization job configuration
Rem    syang       02/14/00 - LAST_MODIFIED should not mapped to CRAWLED_DATE
Rem    ymatsuda    02/16/00 - multi instance
Rem    syang       02/07/00 - add default language
Rem    syang       01/17/00 - add CC_LOG_DIRECTORY
Rem    dyu         01/13/00 - Fix classpath too long for sqlplus argument
Rem    ymatsuda    12/08/99 - add US7ASCII
Rem    syang       12/03/99 - set thread number and filter number
Rem    syang       12/02/99 - description should be mapped from body
Rem    swkuo       11/24/99 - Remove checking crawler configuration
Rem    syang       11/23/99 - commit after insert
Rem    swkuo       11/23/99 - Initialize cache full action
Rem    ymatsuda    11/23/99 - cache char set
Rem    swkuo       11/22/99 - created
Rem    swkuo       11/22/99 - Created
Rem

Rem Removing old object definitions...
delete from WK$CRAWLER_CONFIG_DEFAULT;
commit;

Rem Initialize the WK$CRAWLER_CONFIG_DEFAULT table
declare
  crawler_id number := -1;
  no_copy    number := -2; -- value initialized should not be copied
  proxy      varchar2(100);
  noproxy    varchar2(100);
  status     boolean;
  lang       varchar2(100);
  dft_language varchar2(100);
  charset    varchar2(100);
  encoding   varchar2(100);
  training_dir  varchar2(100);
  config_dir    varchar2(100);
  inst_name     varchar2(100);
begin

  lang := userenv('language');
  charset := upper(substr(lang,instr(lang,'.')+1));

  dft_language := 'en-US'; -- English
  -- this part needs to be consistent with the filter preference. (wk0pref.sql)
  -- default language should be ISO 639-1 language code
  if (charset = 'WE8ISO8859P1' or charset = 'US7ASCII') then
    encoding := '8859_1';
  elsif (charset = 'EE8ISO8859P2') then -- East European
    encoding := '8859_2';
  elsif (charset = 'SE8ISO8859P3') then -- South European
    encoding := '8859_3';
  elsif (charset = 'NEE8ISO8859P4') then -- North and North-East European
    encoding := '8859_4';
  elsif (charset = 'CL8ISO8859P5') then -- Latin/Cyrillic
    encoding := '8859_5';
  elsif (charset = 'AR8ISO8859P6') then -- Latin/Arabic
    encoding := '8859_6';
  elsif (charset = 'EL8ISO8859P7') then -- Latin/Greek
    encoding := '8859_7';
  elsif (charset = 'IW8ISO8859P8') then -- Latin/Hebrew
    encoding := '8859_8';
  elsif (charset = 'WE8ISO8859P9') then -- West European and Turkish
    encoding := '8859_9';
  elsif (charset = 'ZHS16CGB231280') then -- 16-bit fixed Simplified Chinese
    encoding := 'GB2312';
    dft_language := 'zh-CN'; -- Chinese
  elsif (charset = 'ZHT16BIG5') then -- Big 5 16-bit Traditional Chinese
    encoding := 'Big5';
    dft_language := 'zh-TW'; -- Chinese
  elsif (charset = 'KO16KSC5601') then -- 16-bit Korean
    encoding := 'ksc5601';
    dft_language := 'ko-KR'; -- Korean
  elsif (charset = 'JA16EUC') then
    encoding := 'Unicode'; -- EUC_JP not supported by Stellant code
    dft_language := 'ja-JP'; -- JAPANESE
  elsif (charset = 'JA16SJIS') then
    encoding := 'SJIS';
    dft_language := 'ja-JP';
  else
    encoding := 'Unicode';
  end if;

  -- default char set ISO LATIN 1
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_CHARSET', '8859_1');

  -- default for MIME TYPE
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_MIMETYPE', 'text/html');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_MIMETYPE', 'text/plain');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_CACHE_CHARSET', encoding);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_LANGUAGE', dft_language);

  -- default max cache size is 20
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_CACHE_SIZE', 5);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_FILTER_PATH', '&cthx_location');

  -- default Inso filter number is 1
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_FILTER_NUM', 1);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_EXEC_NAME', 'ImtCrawler');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TABLE_NAME', 'WK$URL');

  -- default crawling thread is 20
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_THREAD', 20);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_CONFIG_FILE_NAME', 
           '&data_location'||'config/crawler.dat');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_LOG_DIRECTORY', NULL);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_LOG_FILE_NAME', 'IMTCRAWLER.LOG');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_JAVA_EXEC_PATH', '&java_location -ms16m -mx256m');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_DS_AGENT_PATH', '&agent_location');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_DATABASE', '&jdbc_location');

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(no_copy, 'CC_LAUNCH_FROM_ANY_INST', upper('&launch_any'));

  -- crawler socket read timeout in seconds
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TIMEOUT', 30);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_RMI_POLICY_FILE', '&data_location' || 'config/rmipolicy');

  -- initialize training data set
  training_dir := '&data_location' || 'training/';

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAININGDOC_DIR', training_dir);

  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC',
                       'da ' || 'danish.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'nl ' || 'dutch.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'en ' || 'english.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'fr ' || 'french.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'de ' || 'german.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'it ' || 'italian.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'pt ' || 'portugue.dat');
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_TRAINING_DOC', 
                       'es ' || 'spanish.dat');

  -- default is no file protocol
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'CC_NO_FILE_PROTOCOL', '  ');
  
  -- java program that run index optimization job
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'OC_EXEC_NAME', 'OptimizeIndex');

  -- OptimizeIndex log file
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'OC_LOG_FILE_NAME', 'OPTINDEX.LOG');

  -- SQL statement to execute (argumnets are instance id, job id)
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(crawler_id, 'OC_PROCEDURE',
           'declare maxtime number; lk varchar2(128);' ||
           'begin wksys.wk$pre_optimize_index(?,?,maxtime,lk); ' ||
           '  ctx_ddl.optimize_index(''WK$DOC_PATH_IDX'',''FULL'', maxtime);'||
           '  wksys.wk$post_optimize_index(?,lk); end;');

  -- default jdbc driver is thin (0) driver
  insert into WK$CRAWLER_CONFIG_DEFAULT(CCD_CW_ID, CCD_PNAME, CCD_PVALUE)
    values(no_copy, 'CC_JDBC_DRIVER', '0');

  commit;

  -- bind launch instance to instance 1 if launch anywhere is false
  if (upper('&launch_any') = 'FALSE') then
    begin
      select INSTANCE_NAME into inst_name from gv$instance where 
        INSTANCE_NUMBER = 1;
      wk_adm.set_launch_instance(inst_name,'&jdbc_location');
    exception
      when no_data_found then
        -- not possible, but...
        wk_err.push(WKIG.IE_INTERNAL_ERROR,
          'wk0init.sql: unable to locate instance name for instance 1');
        wk_err.raise;
    end;
  else
    begin
      select NAME into inst_name from v$database;
      wk_adm.set_launch_instance(inst_name,'&jdbc_location');
    exception
      when no_data_found then
        -- not possible, but...
        wk_err.push(WKIG.IE_INTERNAL_ERROR,
          'wk0init.sql: unable to locate database name');
        wk_err.raise;
      when others then
        wk_err.text_on_stack(sqlerrm);
        wk_err.raise;
    end;
  end if;
end;
/
