Rem
Rem $Header: wk0idxcheck.sql 07-jul-2004.13:05:39 syang Exp $
Rem
Rem wk0idxcheck.sql
Rem
Rem Copyright (c) 2003, 2004, Oracle. All rights reserved.  
Rem
Rem    NAME
Rem      wk0idxcheck.sql - wk instance index prefence checker
Rem
Rem    DESCRIPTION
Rem      Check instance created before DB character set change to make sure
Rem      it is compatible with the new DB character set. May need to recreate
Rem      Ultra Search index
Rem
Rem    NOTES
Rem      User will need to modify the parameter to wk0ddl.create_index call if
Rem      custom indexing prefernces are used.
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    syang       07/07/04 - bug 3750885: Mac- charset mapping 
Rem    syang       10/21/03 - syang_backport_9.2.0.4.0_3066203 
Rem    syang       08/06/03 - filter select has no row for null_filter 
Rem    syang       07/23/03 - Created
Rem

set feedback off verify off
REM  This script checks and corrects an Ultra Search instance created before a database
REM  character change. In general there are two possible areas that could go wrong:
REM  1. Cache file character set : 
REM  2. Index filter used (for converting cache file into datbase character set)
REM  3. Index lexer used (multi lexer needed for indexing CJK multibytes document)
REM
REM  Before 10i, the index preference can only be replaced by recreating the index. So user
REM  has to weight continue using existing index data set versus recreating it.

PROMPT ==============  Ultra Search Instance Indexing Preference Checking ==============
PROMPT

PROMPT This script should be run after wk0prefcheck.sql
PROMPT This script must be run as instance owner and not WKSYS.  

set heading off
whenever sqlerror exit;
select 'User is '||USER from dual where  1=decode(USER,'WKSYS','NOT',1);
ACCEPT inst_name PROMPT 'Ultra Search instance name:'

-- set instance
begin
  wk_adm.use_instance('&inst_name');
exception
  when others then
   wk_err.raise;
end;
/
whenever sqlerror continue;
set serveroutput on;

column CC_PNAME format a10
column CC_PVALUE format a20
column ISL_LANGUAGE format a20
column ISL_OBJECT format a20
column IXV_OBJECT format a20
column IXV_CLASS format a20
column IXV_VALUE format a20


select 'Database character set is '''||upper(substr(userenv('language'),instr(userenv('language'),'.')+1))||''''
  from dual;
PROMPT
PROMPT Existing Ultra Search index settings:
PROMPT =====================================================
select 'Cache File Character Set:   ',CC_PVALUE from wk$crawler_config where cc_pname='CC_CACHE_CHARSET';
select 'Index Filter used:          ',IXV_OBJECT from ctx_user_index_values where IXV_CLASS='FILTER' and IXV_INDEX_NAME='WK$DOC_PATH_IDX';
PROMPT
PROMPT Index Lexer(s) used:
select '                             '||ISL_OBJECT from ctx_user_index_sub_lexers;
PROMPT =====================================================
PROMPT

-- List lexers used for indexing; should have 5 (DEFAULT,JAPANESE,KOREAN,SIMPLIFIED CHINESE,TRADITIONAL CHINESE)
set heading on
variable g_need_update CHAR;
variable correct_charset varchar2(100);
variable correct_filter varchar2(100);
variable db_charset varchar2(100);
declare
  lang       varchar2(100);
  charset    varchar2(100);
  encoding   varchar2(100);
  dft_language varchar2(100);
  l_cache_charset varchar2(100);
  l_filter varchar2(100);
  l_lexer varchar2(100);
  l_lexer_cnt number;
  l_correct_lexer_cnt number;
begin
  -- retrieve the current settings
  select CC_PVALUE into l_cache_charset from wk$crawler_config where cc_pname='CC_CACHE_CHARSET';

  begin
    select IXV_OBJECT into l_filter from ctx_user_index_values where IXV_CLASS='FILTER';
  exception
    when no_data_found then
      l_filter := 'NULL_FILTER';
  end;

  select count(ISL_OBJECT) into l_lexer_cnt from ctx_user_index_sub_lexers where ISL_INDEX_NAME='WK$DOC_PATH_IDX';

  -- get the expected value
  lang := userenv('language');
  charset := upper(substr(lang,instr(lang,'.')+1));
  :db_charset := charset;
  dft_language := 'en-US'; -- English

  -- find out the corresponding cache charset value
  if (charset = 'WE8ISO8859P1' or charset = 'US7ASCII') then
    encoding := '8859_1';
  elsif (charset = 'EE8ISO8859P2') then -- East European
    encoding := '8859_2';
  elsif (charset = 'SE8ISO8859P3') then -- South European
    encoding := '8859_3';
  elsif (charset = 'NEE8ISO8859P4') then -- North and North-East European
    encoding := '8859_4';
  elsif (charset = 'CL8ISO8859P5') then -- Latin/Cyrillic
    encoding := '8859_5';
  elsif (charset = 'AR8ISO8859P6') then -- Latin/Arabic
    encoding := '8859_6';
  elsif (charset = 'EL8ISO8859P7') then -- Latin/Greek
    encoding := '8859_7';
  elsif (charset = 'IW8ISO8859P8') then -- Latin/Hebrew
    encoding := '8859_8';
  elsif (charset = 'WE8ISO8859P9') then -- West European and Turkish
    encoding := '8859_9';
  elsif (charset = 'ZHS16CGB231280') then -- 16-bit fixed Simplified Chinese
    encoding := 'GB2312';
    dft_language := 'zh-CN'; -- Chinese
  elsif (charset = 'ZHT16BIG5') then -- Big 5 16-bit Traditional Chinese
    encoding := 'Big5';
    dft_language := 'zh-TW'; -- Chinese
  elsif (charset = 'KO16KSC5601') then -- 16-bit Korean
    encoding := 'ksc5601';
    dft_language := 'ko-KR'; -- Korean
  elsif (charset = 'JA16EUC') then
    encoding := 'Unicode';
    dft_language := 'ja-JP'; -- JAPANESE
  elsif (charset = 'JA16SJIS') then
    encoding := 'SJIS';
    dft_language := 'ja-JP';
  else
    encoding := 'Unicode';
  end if;

  -- correct CC_CACHE_CHARSET value
  -- dbms_output.put_line('CC_CACHE_CHARSET value should be '''||encoding||'''');
  :correct_charset := encoding;

  -- correct filter value
  if (charset in ('JA16SJIS', 'US7ASCII', 'WE8ISO8859P1',
                  'EE8ISO8859P2','SE8ISO8859P3','NEE8ISO8859P4','CL8ISO8859P5',
                  'AR8ISO8859P6','EL8ISO8859P7', 'IW8ISO8859P8','WE8ISO8859P9',
                  'ZHS16CGB231280','ZHT16BIG5','KO16KSC5601'))
  then
    -- dbms_output.put_line('Index filter preference wk_filter value should be ''null_filter''');
    :correct_filter := 'NULL_FILTER';
  else
    -- dbms_output.put_line('Index filter preference wk_filter value should be ''charset_filter(UTF16AUTO)''');
    :correct_filter := 'CHARSET_FILTER';
  end if;

  -- correct lexer count value
  l_correct_lexer_cnt := 1;
  if (charset in ('JA16EUC', 'JA16SJIS', 'UTF8', 'AL32UTF8', 'JA16EUCYEN',
                  'JA16EUCTILDE', 'JA16SJISYEN', 'JA16SJISTILDE')) then
    l_correct_lexer_cnt := l_correct_lexer_cnt+1;
  end if;
  -- same lexer for both simplified and trad. Chinese
  if (charset in ('ZHS16CGB231280', 'ZHS16GBK', 'ZHT32EUC', 'ZHT16MSWIN950',
      'ZHT16HKSCS', 'ZHS32GB18030',
      'ZHT16BIG5', 'ZHT32TRIS', 'AL24UTFFSS', 'UTF8', 'AL32UTF8')) then
    -- 2 Chinese lexers
    l_correct_lexer_cnt := l_correct_lexer_cnt+2;
  end if;
  if (charset in ('KO16MSWIN949', 'KO16KSC5601', 'UTF8', 'AL32UTF8')) then
    l_correct_lexer_cnt := l_correct_lexer_cnt+1;
  end if;

  -- assume everything is OK
  :g_need_update := 'N';
  if (:correct_charset <> l_cache_charset) then
    dbms_output.put_line('Existing cache character set setting is incorrect');
    :g_need_update := 'Y';
  end if;

  if (:correct_filter <> l_filter) then
    dbms_output.put_line('Existing index filter setting is incorrect');
    :g_need_update := 'Y';
  end if;

  if (l_correct_lexer_cnt <> l_lexer_cnt) then
    dbms_output.put_line('Existing index lexer setting is incorrect');
    :g_need_update := 'Y';
  end if;

  if (:g_need_update = 'Y') then
    dbms_output.put_line('Index should be recreated to update indexing preferences.');
    dbms_output.put_line('If proceed with patching, the Ultra Search index will be dropped');
    dbms_output.put_line('and recreated to pick up the new index preferences from wksys.');
    dbms_output.put_line('All data sources will need to be recrawled for indexing.');
  else
    dbms_output.put_line('Existing setting is correct');
  end if;
end;
/

set heading off
select decode(:g_need_update,'N','Checking done, press return to exit','Patch settings?(y/n)') from dual;
ACCEPT do_update 

begin
  if (:g_need_update = 'Y') then
    if ('&do_update' = 'y') then
      dbms_output.put_line('update CC_CACHE_CHARSET value...');
      wk_crw.update_crawler_config(ds_id=>-1,config_name=>'CC_CACHE_CHARSET',config_value=>:correct_charset);

      -- drop and recreate index
      dbms_output.put_line('Dropping index...');
      begin
        execute immediate 'drop index wk$doc_path_idx';
      exception
        when others then
         null;
      end;
      dbms_output.put_line('Truncate wk$doc table...');
      execute immediate 'truncate table wk$doc';
      dbms_output.put_line('Create an empty index...');
      wk_ddl.create_index(lexer=>null,stop_list=>null,data_store=>null);
      dbms_output.put_line('Index creation done, please force recrawl all documents.');
    else
      dbms_output.put_line('no update');
    end if;
  else
    null;
  end if;
end;
/

PROMPT
PROMPT Final index settings:
PROMPT =====================================================
select 'Cache File Character Set:   ',CC_PVALUE from wk$crawler_config where cc_pname='CC_CACHE_CHARSET';
select 'Index Filter used:          ',IXV_OBJECT from ctx_user_index_values where IXV_CLASS='FILTER' and IXV_INDEX_NAME='WK$DOC_PATH_IDX';
PROMPT
PROMPT Index Lexer(s) used:
select '                             '||ISL_OBJECT from ctx_user_index_sub_lexers;
PROMPT =====================================================

set serveroutput off;
