Rem
Rem $Header: wk0prefcheck.sql 07-jul-2004.13:05:40 syang Exp $
Rem
Rem wk0prefcheck.sql
Rem
Rem Copyright (c) 2003, 2004, Oracle. All rights reserved.  
Rem
Rem    NAME
Rem      wk0prefcheck.sql - wk index preference checker
Rem
Rem    DESCRIPTION
Rem      validate default index filter/lexer prefernce is set correctly.
Rem      Especially after a database character set change.
Rem
Rem    NOTES
Rem      Run as WKSYS. Existing instance should be checked by wk0idxcheck.sql
Rem      Implemented as bug 3066203 bug fix.
Rem
Rem    MODIFIED   (MM/DD/YY)
Rem    syang       07/07/04 - bug 3750885: Mac- charset mapping 
Rem    syang       10/21/03 - syang_backport_9.2.0.4.0_3066203 
Rem    syang       10/21/03 - correct wk0instcheck to wk0idxcheck 
Rem    syang       08/06/03 - filter select has no row for null_filter 
Rem    syang       07/23/03 - Created
Rem

set feedback off verify off
REM  This script checks and corrects Ultra Search default settings created before a database
REM  character set change. In general there are three possible areas that could go wrong:
REM  1. Cache file character set (as indicated by CC_CACHE_CHARSET)
REM  2. Index filter used (for converting cache file into datbase character set)
REM  3. Index lexer used (multi lexer needed for indexing CJK multibytes document)
REM
REM  The patch update the correct cache character set value and drop and recreate the index
REM  preferences for filter and lexer.
REM  User should also check any instance created before the character set change using wk0idxcheck.sql.


PROMPT ==============  Ultra Search Indexing Preference Checking ==============
PROMPT


PROMPT This script must be run as WKSYS.  This script will exit
PROMPT below if run as any other user.

whenever sqlerror exit;
set heading off
select 'User is '||USER from dual where 1=decode(USER,'WKSYS',1,'NOT');
whenever sqlerror continue;
set serveroutput on;

column CCD_PNAME format a20
column CCD_PVALUE format a20
column SLX_LANGUAGE format a20
column SLX_SUB_NAME format a20
column PRE_NAME format a20
column PRE_OBJECT format a20

select 'Database character set is '''||upper(substr(userenv('language'),instr(userenv('language'),'.')+1))||''''
  from dual;
PROMPT
PROMPT Existing default index preference settings:
PROMPT =====================================================
select 'Cache File Character Set:   ',CCD_PVALUE from wk$crawler_config_default where ccd_pname='CC_CACHE_CHARSET';
select 'Index Filter used:          ', PRE_OBJECT from ctx_user_preferences where PRE_NAME='WK_FILTER';
PROMPT
PROMPT Index Lexer(s) used:
select '                             '||SLX_SUB_NAME from ctx_user_sub_lexers;
PROMPT =====================================================
PROMPT

-- List lexers used for indexing; should have 5 (DEFAULT,JAPANESE,KOREAN,SIMPLIFIED CHINESE,TRADITIONAL CHINESE)
set heading on
variable g_need_update CHAR;
variable correct_charset varchar2(100);
variable correct_filter varchar2(100);
variable db_charset varchar2(100);
declare
  lang       varchar2(100);
  charset    varchar2(100);
  encoding   varchar2(100);
  dft_language varchar2(100);
  l_cache_charset varchar2(100);
  l_filter varchar2(100);
  l_lexer varchar2(100);
  l_lexer_cnt number;
  l_correct_lexer_cnt number;
begin
  -- retrieve the current settings
  select CCD_PVALUE into l_cache_charset from wk$crawler_config_default where ccd_pname='CC_CACHE_CHARSET';
  
  -- select will have no result if NULL_FILTER is used
  begin
    select PRE_OBJECT into l_filter from ctx_user_preferences where PRE_NAME='WK_FILTER';
  exception
    when no_data_found then
      l_filter := 'NULL_FILTER';
  end;

  select count(SLX_SUB_NAME) into l_lexer_cnt from ctx_user_sub_lexers;

  -- get the expected value
  lang := userenv('language');
  charset := upper(substr(lang,instr(lang,'.')+1));
  :db_charset := charset;
  dft_language := 'en-US'; -- English

  -- find out the corresponding cache charset value
  if (charset = 'WE8ISO8859P1' or charset = 'US7ASCII') then
    encoding := '8859_1';
  elsif (charset = 'EE8ISO8859P2') then -- East European
    encoding := '8859_2';
  elsif (charset = 'SE8ISO8859P3') then -- South European
    encoding := '8859_3';
  elsif (charset = 'NEE8ISO8859P4') then -- North and North-East European
    encoding := '8859_4';
  elsif (charset = 'CL8ISO8859P5') then -- Latin/Cyrillic
    encoding := '8859_5';
  elsif (charset = 'AR8ISO8859P6') then -- Latin/Arabic
    encoding := '8859_6';
  elsif (charset = 'EL8ISO8859P7') then -- Latin/Greek
    encoding := '8859_7';
  elsif (charset = 'IW8ISO8859P8') then -- Latin/Hebrew
    encoding := '8859_8';
  elsif (charset = 'WE8ISO8859P9') then -- West European and Turkish
    encoding := '8859_9';
  elsif (charset = 'ZHS16CGB231280') then -- 16-bit fixed Simplified Chinese
    encoding := 'GB2312';
    dft_language := 'zh-CN'; -- Chinese
  elsif (charset = 'ZHT16BIG5') then -- Big 5 16-bit Traditional Chinese
    encoding := 'Big5';
    dft_language := 'zh-TW'; -- Chinese
  elsif (charset = 'KO16KSC5601') then -- 16-bit Korean
    encoding := 'ksc5601';
    dft_language := 'ko-KR'; -- Korean
  elsif (charset = 'JA16EUC') then
    encoding := 'Unicode';
    dft_language := 'ja-JP'; -- JAPANESE
  elsif (charset = 'JA16SJIS') then
    encoding := 'SJIS';
    dft_language := 'ja-JP';
  else
    encoding := 'Unicode';
  end if;

  -- correct CC_CACHE_CHARSET value
  -- dbms_output.put_line('CC_CACHE_CHARSET value should be '''||encoding||'''');
  :correct_charset := encoding;

  -- correct filter value
  if (charset in ('JA16SJIS', 'US7ASCII', 'WE8ISO8859P1',
                  'EE8ISO8859P2','SE8ISO8859P3','NEE8ISO8859P4','CL8ISO8859P5',
                  'AR8ISO8859P6','EL8ISO8859P7', 'IW8ISO8859P8','WE8ISO8859P9',
                  'ZHS16CGB231280','ZHT16BIG5','KO16KSC5601'))
  then
    -- dbms_output.put_line('Index filter preference wk_filter value should be ''null_filter''');
    :correct_filter := 'NULL_FILTER';
  else
    -- dbms_output.put_line('Index filter preference wk_filter value should be ''charset_filter(UTF16AUTO)''');
    :correct_filter := 'CHARSET_FILTER';
  end if;

  -- correct lexer count value
  l_correct_lexer_cnt := 1;
  if (charset in ('JA16EUC', 'JA16SJIS', 'UTF8', 'AL32UTF8', 'JA16EUCYEN',
                  'JA16EUCTILDE', 'JA16SJISYEN', 'JA16SJISTILDE')) then
    l_correct_lexer_cnt := l_correct_lexer_cnt+1;
  end if;
  -- same lexer for both simplified and trad. Chinese
  if (charset in ('ZHS16CGB231280', 'ZHS16GBK', 'ZHT32EUC', 'ZHT16MSWIN950',
      'ZHT16HKSCS', 'ZHS32GB18030',
      'ZHT16BIG5', 'ZHT32TRIS', 'AL24UTFFSS', 'UTF8', 'AL32UTF8')) then
    -- 2 Chinese lexers
    l_correct_lexer_cnt := l_correct_lexer_cnt+2;
  end if;
  if (charset in ('KO16MSWIN949', 'KO16KSC5601', 'UTF8', 'AL32UTF8')) then
    l_correct_lexer_cnt := l_correct_lexer_cnt+1;
  end if;

  -- assume everything is OK
  :g_need_update := 'N';
  if (:correct_charset <> l_cache_charset) then
    dbms_output.put_line('Existing cache file character set setting is incorrect');
    :g_need_update := 'Y';
  end if;

  if (:correct_filter <> l_filter) then
    dbms_output.put_line('Existing index filter setting is incorrect');
    :g_need_update := 'Y';
  end if;

  if (l_correct_lexer_cnt <> l_lexer_cnt) then
    dbms_output.put_line('Existing index lexer setting is incorrect');
    :g_need_update := 'Y';
  end if;

  if (:g_need_update = 'N') then
    dbms_output.put_line('Existing setting is correct');
  end if;
end;
/

set heading off
select decode(:g_need_update,'N','Checking done, press return to exit','Patch settings?(y/n)') from dual;
ACCEPT do_update 

begin
  if (:g_need_update = 'Y') then
    if ('&do_update' = 'y') then
      dbms_output.put_line('update CC_CACHE_CHARSET value...');
      update WK$CRAWLER_CONFIG_DEFAULT set CCD_PVALUE = :correct_charset where CCD_PNAME = 'CC_CACHE_CHARSET';

      -- update filter
      dbms_output.put_line('update filter preference...');
      begin
        ctx_ddl.drop_preference('wk_filter');
      exception
      when others then
        null;
      end;
    
      if (:db_charset in ('JA16SJIS', 'US7ASCII', 'WE8ISO8859P1',
                  'EE8ISO8859P2','SE8ISO8859P3','NEE8ISO8859P4','CL8ISO8859P5',
                  'AR8ISO8859P6','EL8ISO8859P7', 'IW8ISO8859P8','WE8ISO8859P9',
                  'ZHS16CGB231280','ZHT16BIG5','KO16KSC5601'))
      then
        ctx_ddl.create_preference('wk_filter','null_filter');
      else
        ctx_ddl.create_preference('wk_filter','charset_filter');
        ctx_ddl.set_attribute('wk_filter','charset','UTF16AUTO');
      end if;

      dbms_output.put_line('update lexer preference...');
      -- update lexer
      begin
        ctx_ddl.drop_preference('wk_lexer');
      exception
        when others then
          null;
      end;

      begin
        ctx_ddl.drop_preference('wk_basic_lexer');
      exception
        when others then
          null;
      end;

      begin
        ctx_ddl.drop_preference('wk_japanese_lexer');
      exception
        when others then
          null;
      end;

      begin
        ctx_ddl.drop_preference('wk_chinese_lexer');
      exception
        when others then
          null;
      end;

      begin
        ctx_ddl.drop_preference('wk_korean_lexer');
      exception
        when others then
          null;
      end;
      ctx_ddl.create_preference('wk_basic_lexer','basic_lexer');
      ctx_ddl.set_attribute('wk_basic_lexer','base_letter','yes');
      ctx_ddl.create_preference('wk_japanese_lexer','japanese_vgram_lexer');
      ctx_ddl.create_preference('wk_chinese_lexer','chinese_vgram_lexer');
      ctx_ddl.create_preference('wk_korean_lexer','korean_morph_lexer');
      ctx_ddl.create_preference('wk_lexer','multi_lexer');
      ctx_ddl.add_sub_lexer('wk_lexer','default','wk_basic_lexer');
      if (:db_charset in ('JA16EUC', 'JA16SJIS', 'UTF8', 'AL32UTF8',
          'JA16EUCYEN', 'JA16EUCTILDE', 'JA16SJISYEN', 'JA16SJISTILDE')) then
        ctx_ddl.add_sub_lexer('wk_lexer','ja','wk_japanese_lexer');
      end if;
      -- same lexer for both simplified and trad. Chinese
      if (:db_charset in ('ZHS16CGB231280', 'ZHS16GBK', 'ZHT32EUC',
          'ZHT16MSWIN950', 'ZHT16HKSCS', 'ZHS32GB18030',
          'ZHT16BIG5', 'ZHT32TRIS', 'AL24UTFFSS', 'UTF8', 'AL32UTF8')) then
        ctx_ddl.add_sub_lexer('wk_lexer','zhs','wk_chinese_lexer','zh');
        ctx_ddl.add_sub_lexer('wk_lexer','zht','wk_chinese_lexer');
      end if;
      if (:db_charset in ('KO16MSWIN949', 'KO16KSC5601', 'UTF8', 'AL32UTF8')) then

        ctx_ddl.add_sub_lexer('wk_lexer','ko','wk_korean_lexer');
      end if;
    else
      dbms_output.put_line('no update');
    end if;
  else
    dbms_output.put_line('Indexing preference checking done');
  end if;
end;
/

PROMPT
PROMPT Final index preference settings:
PROMPT =====================================================
select 'Cache File Character Set:   ',CCD_PVALUE from wk$crawler_config_default where ccd_pname='CC_CACHE_CHARSET';
select 'Index Filter used:          ', PRE_OBJECT from ctx_user_preferences where PRE_NAME='WK_FILTER';
PROMPT
PROMPT Index Lexer(s) used:
select '                             '||SLX_SUB_NAME from ctx_user_sub_lexers;
PROMPT =====================================================

set serveroutput off;
