#module    IdxBuild    "1-002"
/*
 ***********************************************************************
 *                                                                     *
 * The software was developed at the Monsanto Company and is provided  *
 * "as-is".  Monsanto Company and the auther disclaim all warranties   *
 * on the software, including without limitation, all implied warran-  *
 * ties or merchantabilitiy and fitness.                               *
 *                                                                     *
 * This software does not contain any technical data or information    *
 * that is proprietary in nature.  It may be copied, modified, and     *
 * distributed on a non-profit basis and with the inclusion of this    *
 * notice.                                                             *
 *                                                                     *
 ***********************************************************************
 */

/*+
 * Module Name:	IdxBuild
 *
 * Author:	R L Aurbach	CR&DS MIS Group    27-Apr-1986
 *
 * Function:
 *	Build the Index Tree data structure
 *
 * Modification History:
 *
 * Version     Initials	   Date		Description
 * ------------------------------------------------------------------------
 * 1-001	RLA	27-Apr-1986	Original Code
 * 1-002	RLA	03-May-1986	Add support for page-no highlighting
 *					  and move spell-string processing to a
 *					  separate module.
-*/

/*
 * Module IdxBuild - Module-Wide Data Description Section
 *
 * Include Files:
 */
#include	    descrip
#include	    stdio
#include	    ctype
#include	    "IdxDef.H"

/*
 * Module Definitions:
 */
#define		    TRUE	1
#define		    FALSE	0

/*
 * Global Declarations:
 */

/*
 * Static Declarations:
 */
    static struct dsc$descriptor   dyn_str  = 
					{ 0, DSC$K_DTYPE_T, DSC$K_CLASS_D, 0 };
    static struct dsc$descriptor   page_ref = 
					{ 0, DSC$K_DTYPE_T, DSC$K_CLASS_D, 0 };

/*
 * External References:
 */
    extern TREE_PTR	root;		/* root of the Index Tree	    */

/*
 * Functions Called:
 */

/*+
 * Function Idx_Build_Tree - Documentation Section
 *
 * Discussion:
 *	This routine uses the information parsed by Idx_Parse to build the Index
 *	Tree.
 *
 * Calling Synopsis:
 *	Call Idx_Build_Tree (token_1, token_2, token_3, page_no, token_ct)
 *
 * Inputs:
 *	token_1	    ->	is the top level item token.  ASCIZ string passed by
 *			reference.
 *
 *	token_2	    ->	is the second level item token.  ASCIZ string passed by
 *			reference.
 *
 *	token_3	    ->	is the third level item token.  ASCIZ string passed by
 *			reference.
 *
 *	page_no	    ->	is the page number token.  ASCIZ string passed by 
 *			reference.
 *
 *	token_ct    ->	is the number of tokens seen for this index entry.
 *			integer passed by value.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	The Index Tree is updated.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	Tree is updated.
 *
 * Error Conditions:
 *	The only expected error is a memory allocation failure.  In this case,
 *	a message is printed and the entry is ignored.

 *
 * Algorithm:
 *	A. Check for a page-no highlighting flag.
 *	B. Check for a valid set of input data.  Ignore if not.
 *	C. Build a Spell String for the top-level item.
 *	D. Locate the top-level item in the tree.
 *	    1. If it does not exist,
 *		A. Allocate a new node and link it in.
 *	E. If token_ct == 1,
 *	    1. Add page number to list.
 *	F. Else,
 *	    1. Build a Spell String for the second-level item.
 *	    2. Locate the second-level item in the tree.
 *		A. If it does not exist,
 *		    1. Allocate a new node and link it in.
 *	    3. If token_ct == 2,
 *		A. Add page number to list.
 *	    4. Else,
 *		A. Build a Spell String for the third-level item.
 *		B. Locate the third-level item in the tree.
 *		    1. If it does not exist,
 *			a. Allocate a new node and link it in.
 *		C. Add page number to list.
 *
 * Special Notes:
 *	The first character of token_1 may be a special character.  Special
 *	characters are used to indicate special highlighting to be performed
 *	on the page-no for this index entry.  The special characters are:
 *	    ^	use boldface
 *	    ~	use italic
 *	    _	use underlining
 *	Only one type of highlighting is supported for each page entry.
-*/

/*
 * Function Idx_Build_Tree - Code Section
 */

void	idx_build_tree (token_1, token_2, token_3, page_no, token_ct)

    char	    *token_1;
    char	    *token_2;
    char	    *token_3;
    char	    *page_no;
    char	    token_ct;
{
/*
 * Local Declarations
 */
    struct dsc$descriptor   spell_str = { 0, DSC$K_DTYPE_T, DSC$K_CLASS_D, 0 };
    TREE_PTR		    node;
    void		    idx_build_spell_string();
    TREE_PTR		    idx_locate_node();
    void		    idx_add_page_ref();
    int			    test_char;
    char		    *real_token_1;
    char		    highlight;
/*
 * Module Body
 */

/* Check for page-no highlighting flag					*/

if (token_ct == 0)				return;
if (token_1[0] == '\0')				return;
real_token_1 = &token_1[1];
test_char = token_1[0];
switch (test_char)
    {
    case '_' :	highlight = IDX_K_UNDERLINE;	break;

    case '~' :	highlight = IDX_K_ITALIC;	break;

    case '^' :	highlight = IDX_K_BOLD;		break;

    default  :	highlight = IDX_K_NONE;
		real_token_1 = &token_1[0];
		break;
    }

/* Check out the item for validity					*/

if (real_token_1[0] == '\0')			return;
if ((token_ct > 1) && (token_2[0] == '\0'))	return;
if ((token_ct > 2) && (token_3[0] == '\0'))	return;
if (token_ct > 3)				return;
if (page_no[0] == '\0')				return;

/* First level token processing						*/

idx_build_spell_string (real_token_1, &spell_str);
node = idx_locate_node (&root, real_token_1, &spell_str);
if (node == 0)
    {
    printf("Error processing index item %s\n", real_token_1);
    return;
    }
if (token_ct == 1)
    {
    idx_add_page_ref (&node->pghead, page_no, highlight);
    }

/* Second level token processing					*/

else
    {
    idx_build_spell_string (token_2, &spell_str);
    node = idx_locate_node (&node->subhead, token_2, &spell_str);
    if (node == 0)
	{
	printf("Error processing index subitem %s\n", token_2);
	return;
	}
    if (token_ct == 2)
	{
	idx_add_page_ref (&node->pghead, page_no, highlight);
	}

/* Third level token processing						*/

    else
	{
	idx_build_spell_string (token_3, &spell_str);
	node = idx_locate_node (&node->subhead, token_3, &spell_str);
	if (node == 0)
	    {
	    printf("Error processing index subsubitem %s\n", token_3);
	    return;
	    }
	idx_add_page_ref (&node->pghead, page_no, highlight);
	}
    }
}

/*+
 * Function Idx_Locate_Node - Documentation Section
 *
 * Discussion:
 *	This routine scans the list of nodes (starting from the listhead),
 *	looking for one which has a spell-string identical to the given
 *	spell string.  If it doesn't find one, it adds a node in the right place
 *	in the chain to maintain alphabetical order.
 *
 * Calling Synopsis:
 *	node = Idx_Locate_Node (head, token, desc)
 *
 * Inputs:
 *	head	    ->	is the address of the listhead of the list to be
 *			scanned.  As such, it is a TREE_PTR, passed by 
 *			reference.
 *
 *	token	    ->	is the token to be located or entered in the list.
 *			ASCIZ string passed by reference.
 *
 *	desc	    ->	is the spell string, passed by descriptor.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	node	    ->	is the address of the tree leaf for the current token.
 *			It is a TREE_PTR, passed by value.  If the leaf did not
 *			already exist and if the system was unable to allocate
 *			one, the node is returned as 0.
 *
 * Global Data:
 *	The list specified by the listhead will be modified if it is not 
 *	possible to add a new leaf when necessary.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	An empty listhead contains 0.
 *
 * Normal Exit State:
 *	The node is returned.
 *
 * Error Conditions:
 *	Allocation failure -- node = 0.

 *
 * Algorithm:
 *	A. If the list is empty,
 *	    1. Allocate a node and link it in.
 *	B. Else,
 *	    1. Beginning with the first node,
 *		a. If spell-string < node-spell-string,
 *		    1. Get next node.
 *		b. If spell-string = node-spell-string,
 *		    1. Return node pointer.
 *		c. If spell-string > node-spell-string,
 *		    1. Allocate a node and link it in.
 *
 * Special Notes:
 *	Although this algorithm isn't a very efficient way to alphatize things,
 *	it has the tremendous virtue of being easy to implement.  Since the
 *	program is not run often (typically a couple of times per document),
 *	the inefficiency is not important.
-*/

/*
 * Function Idx_Locate_Node - Code Section
 */

TREE_PTR	idx_locate_node (head, token, desc)

    TREE_PTR			*head;
    char			*token;
    struct dsc$descriptor	*desc;
{
/*
 * Local Declarations
 */
    TREE_PTR			node;
    TREE_PTR			old_node;
    TREE_PTR			new_node;
    int				status;
    TREE_PTR			idx_allocate_node();
/*
 * Module Body
 */

/* Chain through the list, looking for the right place			*/

old_node = 0;
new_node = *head;
while (new_node != 0)
    {
    status = str$compare(&new_node->spell, desc);
    if (status < 0)
	{
	old_node = new_node;
	new_node = new_node->link;
	continue;
	}
    if (status == 0)
	{
	return (new_node);
	}
    if (status > 0)
	{
	node = idx_allocate_node (token, desc);
	if (old_node == 0)
	    {
	    *head = node;
	    }
	else
	    {
	    old_node->link = node;
	    }
	node->link = new_node;
	return (node);
	}
    }

/* If the list is exhausted, allocate a node immediately		*/

node = idx_allocate_node (token, desc);
if (old_node == 0)
    {
    *head = node;
    }
else
    {
    old_node->link = node;
    }
return (node);
}

/*+
 * Function Idx_Allocate_Node - Documentation Section
 *
 * Discussion:
 *	Allocate and initialize a leaf of the Index Tree.
 *
 * Calling Synopsis:
 *	node = Idx_Allocate_Node (token, desc)
 *
 * Inputs:
 *	token	    ->	token string.  ASCIZ string passed by reference.
 *
 *	desc	    ->	spell string.  Character string passed by descriptor.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	node	    ->	Address of the allocated node.  TREE_PTR passed by 
 *			value.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	node is allocated.
 *
 * Error Conditions:
 *	allocation failure -- node is returned == 0.
 *
 * Algorithm:
 *	A. Allocate virtual memory for the node.
 *	B. Initialize the node structure.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Idx_Allocate_Node - Code Section
 */

TREE_PTR	idx_allocate_node (token, desc)

    char		    *token;
    struct dsc$descriptor   *desc;
{
/*
 * Local Declarations
 */
    int			    token_length;
    TREE_PTR		    node;
/*
 * Module Body
 */

node = (TREE_PTR) malloc (sizeof(TREE));
if (node != 0)
    {
    node->link    = 0;
    node->spell   = dyn_str;
    node->item    = dyn_str;
    node->subhead = 0;
    node->pghead  = 0;
    str$copy_dx(&node->spell, desc);
    token_length = strlen(token);
    str$copy_r(&node->item, &token_length, token);
    }

return (node);
}

/*+
 * Function Idx_Add_Page_Ref - Documentation Section
 *
 * Discussion:
 *	If the specified page reference is not already in the current list,
 *	add it to the end of the list.
 *
 * Calling Synopsis:
 *	Call Idx_Add_Page_Ref (pghead, token, highlight)
 *
 * Inputs:
 *	pghead	    ->	the address of the listhead for the page reference list.
 *			a PGNODE_PTR, passed by reference.
 *
 *	token	    ->	the page reference.  ASCIZ string passed by reference.
 *
 *	highlight   ->	a flag indicating what sort of highlighting to give
 *			this particular page reference.  char passed by value.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	none
 *
 * Global Data:
 *	If a new page reference needs to be added, the list is updated.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	If the page reference already exists, nothing happens.  If a new
 *	page reference needs to be added, it is allocated and appended to the
 *	end of the list.
 *
 * Error Conditions:
 *	If there is an allocation failure, the page reference is not added.

 *
 * Algorithm:
 *	A. Put the page_no token in a dynamic string.
 *	B. For all nodes in the list,
 *	    1. If the page-no in the list does not match the page-no,
 *		a. Get next node.
 *	    2. Else,
 *		a. Return.
 *	C. If the list is exhausted,
 *	    1. Allocate a new node.
 *	    2. Fill it in.
 *	    3. Chain it to the end of the list.
 *
 * Special Notes:
 *	This routine assumes that page-no references are always in numerical
 *	order.  This assumption is necessary, because the variety of page
 *	numbering (perhaps within the document) makes it impossible to check
 *	for order.
-*/

/*
 * Function Idx_Add_Page_Ref - Code Section
 */

void	idx_add_page_ref (pghead, token, highlight)

    PGNODE_PTR	    *pghead;
    char	    *token;
    char	    highlight;
{
/*
 * Local Declarations
 */
    int		    token_length;
    PGNODE_PTR	    node;
    PGNODE_PTR	    last_node;
/*
 * Module Body
 */

token_length = strlen(token);
str$copy_r(&page_ref, &token_length, token);

last_node = *pghead;

while (last_node != 0)
    {
    if (str$compare(&last_node->page_dsc, &page_ref) == 0)
	{
	if (highlight == IDX_K_NONE)	return;
	last_node->highlight = highlight;
	return;
	}
    if (last_node->link == 0)	    break;
    last_node = last_node->link;
    }

node = (PGNODE_PTR) malloc (sizeof(PGNODE));
if (node != 0)
    {
    node->link     = 0;
    node->page_dsc = dyn_str;
    str$copy_dx (&node->page_dsc, &page_ref);
    node->highlight = highlight;
    if (last_node == 0)
	{
	*pghead = node;
	}
    else
	{
	last_node->link = node;
	}
    }
}
