#module    IdxSpell	"1-001"
/*
 ***********************************************************************
 *                                                                     *
 * The software was developed at the Monsanto Company and is provided  *
 * "as-is".  Monsanto Company and the auther disclaim all warranties   *
 * on the software, including without limitation, all implied warran-  *
 * ties or merchantabilitiy and fitness.                               *
 *                                                                     *
 * This software does not contain any technical data or information    *
 * that is proprietary in nature.  It may be copied, modified, and     *
 * distributed on a non-profit basis and with the inclusion of this    *
 * notice.                                                             *
 *                                                                     *
 ***********************************************************************
 */

/*+
 * Module Name:	IdxSpell
 *
 * Author:	R L Aurbach	CR&DS MIS Group    07-May-1986
 *
 * Function:
 *	Build the Spell String for an index term, properly handling LaTeX
 *	syntax and commands.
 *
 * Modification History:
 *
 * Version     Initials	   Date		Description
 * ------------------------------------------------------------------------
 * 1-001	RLA	07-May-1986	Original Code
-*/

/*
 * Module IdxSpell - Module-Wide Data Description Section
 *
 * Include Files:
 */
#include	    descrip
#include	    ctype
#include	    "IdxDef.H"

/*
 * Module Definitions:
 */
#define		    TRUE	1
#define		    FALSE	0

#define		    str_lit(a)	{ sizeof(a)-1, DSC$K_DTYPE_T, DSC$K_CLASS_S, a }

/*
 * Global Declarations:
 */

/*
 * Static Declarations:
 */
    static $DESCRIPTOR	(accent_set, "`'^\"~=.uvHtcdb");
#define	   emphasis_array_count	    18
    static struct dsc$descriptor   emphasis_array[emphasis_array_count] = {
					str_lit("\\rm"),
					str_lit("\\em"),
					str_lit("\\bf"),
					str_lit("\\it"),
					str_lit("\\sf"),
					str_lit("\\sl"),
					str_lit("\\sc"),
					str_lit("\\tt"),
					str_lit("\\normalsize"),
					str_lit("\\small"),
					str_lit("\\tiny"),
					str_lit("\\footnotesize"),
					str_lit("\\large"),
					str_lit("\\Large"),
					str_lit("\\LARGE"),
					str_lit("\\huge"),
					str_lit("\\Huge"),
					str_lit("\\cal")  };

/*
 * External References:
 */

/*
 * Functions Called:
 */

/*+
 * Function Idx_Build_Spell_String - Documentation Section
 *
 * Discussion:
 *	Create a string which can be used to put the token in the right place
 *	in a list of tokens.  The string will be UPPERCASE, will exclude 
 *	internal LaTeX commands which are included for emphasis, and will have
 *	whitespace collapsed into single spaces.
 *
 * Calling Synopsis:
 *	Call Idx_Build_Spell_String (token, desc)
 *
 * Inputs:
 *	token	    ->	is the token to be processed.  ASCIZ string passed by
 *			reference.
 *
 * Outputs:
 *	desc	    ->	is the "Spell String", properly formulated.  Passed by
 *			dynamic string descriptor.
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	none
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. Copy the token into a dynamic string.
 *	B. Process Accent commands in the string.
 *	C. Process \verb commands in the string.
 *	D. Process emphasis commands in the string.
 *	E. Remove grouping characters in the string.
 *	F. Remove backslashes in the string.
 *	G. Collapse the string -- i.e., remove non-essential whitespace.
 *	H. Convert the string to upper case.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Build_Spell_String - Code Section
 */

void	idx_build_spell_string (token, desc)

    char		    *token;
    struct dsc$descriptor   *desc;
{
/*
 * Local Declarations
 */
    int			    token_length;
    void		    idx_replace_string();
    void		    idx_collapse_string();
    void		    idx_process_groups();
    void		    idx_process_backslash();
/*
 * Module Body
 */

/* Copy the token string into a dynamic string for processing		*/

token_length = strlen(token);
str$copy_r(desc, &token_length, token);

/* Process accent strings						*/

while (idx_process_accent(desc))    ;

/* Process \verb commands						*/

while (idx_process_verb(desc))	    ;

/* Process emphasis commands						*/

while (idx_process_emphasis(desc))  ;

/* Remove grouping characters						*/

idx_process_groups(desc);

/* Remove backslashes							*/

idx_process_backslash(desc);

/* Collapse the string							*/

idx_collapse_string (desc);

/* Convert the string to upper case					*/

str$upcase(desc, desc);
}

/*+
 * Function Idx_Replace_String - Documentation Section
 *
 * Discussion:
 *	This function replaces a specified substring in a dynamic string with
 *	a specified replacement string.
 *
 * Calling Synopsis:
 *	Call Idx_Replace_String (string, start, length, replace)
 *
 * Inputs:
 *	string	    ->	is the string to be updated.  The string is a dynamic
 *			string, passed by descriptor.
 *
 *	start	    ->	is the starting position in the string.  The first
 *			position in the string is position 1.  Integer passed
 *			by value.
 *
 *	length	    ->	is the number of characters in the substring to be
 *			replaced.  Integer passed by value.
 *
 *	replace	    ->	is the replacement text.  ASCIZ string passed by 
 *			reference.
 *
 * Outputs:
 *	string	    ->	is the updated string.  It is a dynamic string, passed
 *			by descriptor.
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	none
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. Copy the characters up to the first character in the substring to
 *	   be replaced into a temporary string.
 *	B. Copy the replacement string into the temporary string.
 *	C. Copy the remainder of the initial string into the temporary string.
 *	D. Update the initial string.
 *
 * Special Notes:
 *	It is assumed that the updated string will be no more than 132 
 *	characters long.
-*/

/*
 * Function Idx_Replace_String - Code Section
 */

void	idx_replace_string (string, start, length, replace)

    struct dsc$descriptor   *string;
    int			    start;
    int			    length;
    char		    *replace;
{
/*
 * Local Declarations
 */
    int			    new_length;	    /* length of new string	    */
    int			    rpl_length;	    /* length of the replace string */
    int			    left;	    /* number of characters left    */
    char		    temp[133];	    /* temporary string		    */
/*
 * Module Body
 */

new_length = 0;
if (start > 1)
    {
    strncpy (temp, string->dsc$a_pointer, start-1);
    new_length = start-1;
    }

if (replace != 0)
    {
    rpl_length = strlen(replace);
    if (rpl_length > 0)
	{
	strncpy (&temp[new_length], replace, rpl_length);
	new_length += rpl_length;
	}
    }

left = string->dsc$w_length - (start - 1) - length;
if (left > 0)
    {
    strncpy (&temp[new_length], &string->dsc$a_pointer[start+length-1], left);
    new_length += left;
    }

str$copy_r (string, &new_length, temp);
}

/*+
 * Function Idx_Collapse_String - Documentation Section
 *
 * Discussion:
 *	Replace the string with a new string which as all extraneous whitespace
 *	removed.  That is, there is no whitespace at the beginning or end of
 *	the string and every internal occurrence of whitespace is collapsed to
 *	a single space character.
 *
 * Calling Synopsis:
 *	Call Idx_Collapse_String (string)
 *
 * Inputs:
 *	string	    ->	is the input string.  It is a dynamic string, passed by
 *			descriptor.
 *
 * Outputs:
 *	string	    ->	is the output string.  It is a dynamic string, passed by
 *			descriptor.
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	none
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. For all characters in the input string,
 *	    1. If the character is whitespace,
 *		a. If no characters have been transferred,
 *		    1. Ignore it.
 *		b. If the previous character transferred was a space,
 *		    1. Ignore it.
 *		c. Else,
 *		    1. Copy a space to the output string.
 *	    2. Else,
 *		a. Copy the character to the output string.
 *	B. If the last character in the output string is a space,
 *	    1. Remove it.
 *	C. Update the dynamic string.
 *
 * Special Notes:
 *	It is assumed that the collapsed string will be no more than 132 
 *	characters long.
-*/

/*
 * Function Idx_Collapse_String - Code Section
 */

void	idx_collapse_string (string)

    struct dsc$descriptor   *string;
{
/*
 * Local Declarations
 */
    char		    temp[133];	    /* Working output string	    */
    char		    copy;	    /* Working character	    */
    int			    i;		    /* Array index in input string  */
    int			    j;		    /* Array index in temp string   */
/*
 * Module Body
 */

for (i = 0, j = 0; i < string->dsc$w_length; i++)
    {
    copy = string->dsc$a_pointer[i];
    if (isspace(copy) != 0)
	{
	if (j == 0)  
	    {
	    continue;
	    }
	else
	    {
	    if (temp[j-1] == ' ')
		{
		continue;
		}
	    else
		{
		temp[j++] = ' ';
		}
	    }
	}
    else
	{
	temp[j++] = copy;
	}
    }

/* Check to see if the temporary string is terminated by whitespace	*/

if ((j > 0) && (temp[j-1] == ' '))	j--;

/* Now copy the result to the output string				*/

str$copy_r (string, &j, temp);
}

/*+
 * Function Idx_Process_Accents - Documentation Section
 *
 * Discussion:
 *	This routine processes the spell string to remove all LaTeX commands
 *	which generate accents, without changing the spelling.  For example,
 *	the string "se\~{n}or" is translated to "senor".  Without special
 *	accent processing, the spell-string algorithm would produce "se n or",
 *	which might not appear in the proper place in the alphabetical listing.
 *
 * Calling Synopsis:
 *	status = Idx_Process_Accents (string)
 *
 * Inputs:
 *	string	    ->	is the spell-string to be processed.  It is a dynamic
 *			string, passed by descriptor.
 *
 * Outputs:
 *	string	    ->	is the resultant spell-string.  It is a dynamic string,
 *			passed by descriptor.
 *
 * Return Value:
 *	status	    ->	indicates the result of the operation.  It is a boolean
 *			integer, passed by value.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status = TRUE	An accent string was processed.
 *	status = FALSE	No accent string was found.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. Search the string for an accent pattern:
 *	    1. The first character in the pattern is "\".
 *	    2. The next character in the pattern is one of the characters in
 *	       the accent_set.
 *	    3. The next character in the pattern is "{".
 *	B. Mark the pattern and find the terminating "}".
 *	C. Replace the pattern with the string located within the braces.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Process_Accent - Code Section
 */

int	idx_process_accent (string)

    struct dsc$descriptor   *string;
{
/*
 * Local Declarations
 */
    struct dsc$descriptor   accent_str = { 1, DSC$K_DTYPE_T, DSC$K_CLASS_S, 0 };
    int			    start;	    /* Start of string to replace   */
    char		    *replace;	    /* Replacement string	    */
    int			    i;		    /* Loop index		    */
/*
 * Module Body
 */

for (i = 0; i < string->dsc$w_length; i++)
    {
    if (string->dsc$a_pointer[i] != '\\')	continue;
    start = ++i;
    accent_str.dsc$a_pointer = &string->dsc$a_pointer[i];
    if (str$find_first_in_set (&accent_str, &accent_set) == 0)	    continue;
    if (string->dsc$a_pointer[++i] != '{')	continue;
    replace = &string->dsc$a_pointer[++i];
    while (i < string->dsc$w_length)
	{
	if (string->dsc$a_pointer[i++] != '}')	continue;
	string->dsc$a_pointer[i-1] = '\0';
	break;
	}
    idx_replace_string (string, start, i-start+1, replace);
    return (TRUE);
    }
return (FALSE);
}

/*+
 * Function Idx_Process_Groups - Documentation Section
 *
 * Discussion:
 *	Process the characters which delimit groups ('{', '}', and '$') in the
 *	spell string.  These characters are removed unless they are quoted
 *	(e.g., "{" is removed but "\{" is not).
 *
 * Calling Synopsis:
 *	Call Idx_Process_Groups (String)
 *
 * Inputs:
 *	string	    ->	is the spell string.  It is a dynamic string passed by
 *			descriptor.
 *
 * Outputs:
 *	string	    ->	is the spell string.  It is a dynamic string passed by
 *			descriptor.
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	returns with spell string possibly modified.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. For all characters in the spell string,
 *	    1. If the character is '{', '}', or '$' and the preceeding
 *	       character is not a '\',
 *		a. Remove the character.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Process_Groups - Code Section
 */

void	idx_process_groups (string)

    struct dsc$descriptor   *string;
{
/*
 * Local Declarations
 */
    int			    i;		/* Loop index			    */
/*
 * Module Body
 */

for (i = 0; i < string->dsc$w_length; i++)
    {
    if ( (string->dsc$a_pointer[i] == '{')  ||
	 (string->dsc$a_pointer[i] == '}')  ||
	 (string->dsc$a_pointer[i] == '$') )
	{
	if ((i == 0) || (string->dsc$a_pointer[i-1] != '\\'))
	    {
	    idx_replace_string(string, i+1, 1, 0);
	    i--;
	    }
	}
    }
}

/*+
 * Function Idx_Process_Verb - Documentation Section
 *
 * Discussion:
 *	Process LaTeX \verb and \verb* commands in the spell string.  This is
 *	done by removing the \verb command from the spell string.
 *
 * Calling Synopsis:
 *	status = Idx_Process_Verb (string)
 *
 * Inputs:
 *	string	    ->	Spell string.  A dynamic string passed by descriptor.
 *
 * Outputs:
 *	string	    ->	Spell string.  A dynamic string passed by descriptor.
 *
 * Return Value:
 *	status	    ->	is a boolean integer passed by value.  It indicates
 *			whether or not a \verb command was found in the spell
 *			string.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status = TRUE	A \verb command was processed in the string.
 *	status = FALSE	No \verb command was processed.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. Search the string for a \verb command:
 *	    1. \verb [optionally followed by a *]
 *	    2. Next character is not alphabetic
 *	B. Mark the pattern and find the terminating character.
 *	C. Replace the pattern with its argument.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Process_Verb - Code Section
 */

int	idx_process_verb (string)

    struct dsc$descriptor   *string;
{
/*
 * Local Declarations
 */
    int			    i;		/* Loop index			    */
    int			    start;	/* Start of pattern		    */
    char		    delim;	/* Delimiter character		    */
    char		    *replace;	/* Pointer to replacement string    */
/*
 * Module Body
 */

if (string->dsc$w_length < 7)	    return (FALSE);
for (i = 0; i < string->dsc$w_length-7; i++)
    {
    if (strncmp (&string->dsc$a_pointer[i], "\\verb", 5) != 0)	continue;
    start = ++i;
    i += 5;
    if (string->dsc$a_pointer[i] == '*')    i++;
    delim = string->dsc$a_pointer[i++];
    if (isalpha(delim) != 0)		    continue;
    replace = &string->dsc$a_pointer[i];
    while (i < string->dsc$w_length)
	{
	if (string->dsc$a_pointer[i++] != delim)    continue;
	string->dsc$a_pointer[i-1] = '\0';
	break;
	}
    idx_replace_string (string, start, i-start+1, replace);
    return (TRUE);
    }
return (FALSE);
}

/*+
 * Function Idx_Process_Backslash - Documentation Section
 *
 * Discussion:
 *	Remove all '\' characters.
 *
 * Calling Synopsis:
 *	Call Idx_Process_Backslash (string)
 *
 * Inputs:
 *	string	    ->	Spell String.  Dynamic string passed by descriptor.
 *
 * Outputs:
 *	string	    ->	Spell String.  Dynamic string passed by descriptor.
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	returns
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. For all characters in the spell string,
 *	    1. If the character is a '\',
 *		a. Remove it.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Process_Backslash - Code Section
 */

void	idx_process_backslash (string)

    struct dsc$descriptor   *string;
{
/*
 * Local Declarations
 */
    int			    i;		/* Loop index			    */
/*
 * Module Body
 */

for (i = 0; i < string->dsc$w_length; i++)
    {
    if (string->dsc$a_pointer[i] == '\\')
	{
	idx_replace_string(string, i+1, 1, 0);
	i--;
	}
    }
}

/*+
 * Function Idx_Process_Emphasis - Documentation Section
 *
 * Discussion:
 *	Remove the normal LaTeX emphasis strings (\rm, \em, \bf, \it, \sf,
 *	\sl, \tt, \normalsize, \small, \tiny, \footnotesize, \large, \Large,
 *	\LARGE, \huge, \Huge, \cal).
 *
 * Calling Synopsis:
 *	status = Idx_Process_Emphasis (string)
 *
 * Inputs:
 *	string	    ->	Spell string.  Dynamic string passed by descriptor.
 *
 * Outputs:
 *	string	    ->	Spell string.  Dynamic string passed by descriptor.
 *
 * Return Value:
 *	status	    ->	boolean integer passed by value.  Indicates whether
 *			one of the listed LaTeX commands was found and processed
 *			in the string.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status = TRUE	A listed LaTeX command was processed.
 *	status = FALSE	No command was processed.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. For each of the listed special commands,
 *	    1. If the string is found,
 *		a. Remove it.
 *		b. Return TRUE.
 *	    2. Else,
 *		a. Return FALSE.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Process_Emphasis - Code Section
 */

int	idx_process_emphasis (string)

    struct dsc$descriptor   *string;
{
/*
 * Local Declarations
 */
    int			    i;		/* emphasis_array index		    */
    int			    start;	/* starting position		    */
    int			    length;	/* length of substring		    */
/*
 * Module Body
 */

for (i = 0; i < emphasis_array_count; i++)
    {
    start = str$position(string, &emphasis_array[i]);
    if (start != 0)
	{
	length = emphasis_array[i].dsc$w_length;
	idx_replace_string (string, start, length, 0);
	return (TRUE);
	}
    }
return (FALSE);
}
