#module    GloScan    "1-002"
/*
 ***********************************************************************
 *                                                                     *
 * The software was developed at the Monsanto Company and is provided  *
 * "as-is".  Monsanto Company and the auther disclaim all warranties   *
 * on the software, including without limitation, all implied warran-  *
 * ties or merchantabilitiy and fitness.                               *
 *                                                                     *
 * This software does not contain any technical data or information    *
 * that is proprietary in nature.  It may be copied, modified, and     *
 * distributed on a non-profit basis and with the inclusion of this    *
 * notice.                                                             *
 *                                                                     *
 ***********************************************************************
 */

/*+
 * Module Name:	GloScan
 *
 * Author:	R L Aurbach	CR&DS MIS Group    20-Aug-1986
 *
 * Function:
 *	Scan a file for glossary definitions and build the internal data
 *	structure for the data.
 *
 * Modification History:
 *
 * Version     Initials	   Date		Description
 * ------------------------------------------------------------------------
 * 1-001	RLA	20-Aug-1986	Original Code
 * 1-002	RLA	25-Aug-1986	Make Glo_Parse_Item more sophisticated.
-*/

/*
 * Module GloScan - Module-Wide Data Description Section
 *
 * Include Files:
 */
#include	    descrip
#include	    ctype
#include	    stdio
#include	    "GloDef.H"

/*
 * Module Definitions:
 */

/*
 * Global Declarations:
 */

/*
 * Static Declarations:
 */
    static struct dsc$descriptor    str_dyn = STRDYN;

/*
 * External References:
 */
    extern NODE_PTR		    root;
    extern STRING_PTR		    labels;

/*
 * Functions Called:
 */

/*+
 * Function Glo_Scan_File - Documentation Section
 *
 * Discussion:
 *	Scan the specified glossary file for definitions which have labels which
 *	match the labels in the label list.  If a match is found, copy the
 *	information to a NODE in the data structure and delete the label from
 *	the label list.  Entries are placed in the data structure in alphabetic
 *	order.
 *
 * Calling Synopsis:
 *	status = Glo_Scan_File (file)
 *
 * Inputs:
 *	file	    ->	is a STRING_PTR, passed by value.  It contains the
 *			descriptor for the file specification string to be
 *			used to open the file.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status	    ->	is a boolean flag which indicates the result of the
 *			operation.  It will have a value of TRUE unless an
 *			error occurs which is fatal to the execution of the
 *			program.
 *
 * Global Data:
 *	root	    ->	additions may be made to the internal data structure.
 *
 *	labels	    ->	if a definition is found, its entry in the label list is
 *			removed.
 *
 * Files Used:
 *	The specified definition file is opened for read access.
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	The operation succeeded.  This means that no error which
 *			is fatal to the operation of the program occurred, not 
 *			that the program proceeded without error.
 *
 * Error Conditions:
 *	status == FALSE	A fatal error occurred.  The reason for the error should
 *			be described in a message to the user.

 *
 * Algorithm:
 *	A. If the labels list is empty, return immediately.
 *	B. Convert the file spec to an ASCIZ string and open the file.
 *	C. For each entry in the file,
 *	    1. Locate the label.
 *	    2. If the label list contains that label,
 *		a. Delete the label from the label list.
 *		b. Allocate a NODE.
 *		c. Fill in the NODE with the item and spell string.
 *		d. Fill in the NODE with definition lines.
 *		e. Chain the NODE into the data structure, using the spell 
 *		   string to position it correctly.
 *	D. Close the file.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Glo_Scan_File - Code Section
 */

int	Glo_Scan_File (file)

    STRING_PTR	    file;
{
/*
 * Local Declarations
 */
    char	    dna[256];
    FILE	    f;
    int		    counter = 0;
    char	    line[linesz];
    char	    token[linesz];
    char	    item[linesz];
    int		    ptr;
    int		    len;
    int		    status;
    NODE_PTR	    node;
    int		    mode = FALSE;
/*
 * Module Body
 */

if (labels == 0)    return (TRUE);

/* Open the glossary definition file					    */

sprintf(dna, "dna = %.*s", file->desc.dsc$w_length, file->desc.dsc$a_pointer);
if ((f = fopen(".gdf", "r", dna)) == NULL)
    {
    printf ("Could not find the glossary definition file %.*s\n",
	file->desc.dsc$w_length, file->desc.dsc$a_pointer);
    return (TRUE);
    }

while (fgets(line, linesz, f) != 0)
    {

/*
 * If mode = FALSE, we are in discard mode.  In this mode, we throw away any
 * line which does not begin with "@entry".  If mode = TRUE, we are in copy
 * mode.  In this mode, we copy any text to the current node.  In either case,
 * if the line begins with "@entry", we must process it.
 */

    status = strncmp(line, "@entry{", 7);
    if (status != 0)
	{
	if (mode)	    glo_copy_text(line, node);
	continue;
	}

/*
 * We've found a new entry in the file.  Begin processing it.
 * Search for the first line of a definition entry and parse out the label.
 */

    status = glo_parse_item(line, token, item, &ptr);
    if (!status)
	{
	mode = FALSE;
	continue;
	}

/* Search the label list for a label with the same name as the token.	    */

    status = glo_match_label (token);
    if (!status)
	{
	mode = FALSE;
	continue;
	}
    counter++;

/* Allocate and build the NODE structure				    */

    node = (NODE_PTR) malloc(sizeof(NODE));
    if (node == 0)
	{
	printf("Could not allocate space for a new definition\n");
	return(FALSE);
	}
    node->next = 0;
    node->spell = str_dyn;
    node->item = str_dyn;
    node->hdr = 0;
    idx_build_spell_string(item, &node->spell);
    len = strlen(item);
    str$copy_r(&node->item, &len, item);

/* Link the NODE into the data structure in alphabetical order		    */

    status = glo_link_node(node);
    if (!(status & TRUE))
	{
	printf("Duplicate glossary entry \'%s\' ignored\n", item);
	mode = FALSE;
	continue;
	}

/* If there is text on the @entry line, use it.				    */

    if (line[ptr] != '\0')	glo_copy_text (&line[ptr], node);
    mode = TRUE;
    }

/* End of file seen.							    */

fclose(f);

if (counter == 0)
    {
    printf("\nNo definitions found in file %.*s\n",
	file->desc.dsc$w_length, file->desc.dsc$a_pointer);
    }
else
    {
    printf("\n%d definitions extracted from file %.*s\n", counter,
	file->desc.dsc$w_length, file->desc.dsc$a_pointer);
    }
return (TRUE);
}

/*+
 * Function Glo_Link_Node - Documentation Section
 *
 * Discussion:
 *	Link a node into the node list in alphabetical order.
 *
 * Calling Synopsis:
 *	status = Glo_Link_Node (node)
 *
 * Inputs:
 *	node	    ->	is a NODE_PTR, passed by reference.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status	    ->	is a boolean integer.  TRUE means a successful link.
 *			FALSE means a duplicate entry was found.
 *
 * Global Data:
 *	root	    ->	the node is linked into the root list.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	node was linked in.
 *
 * Error Conditions:
 *	status == FALSE	a node with this spell string was already linked into
 *			the list.  This node is NOT linked in.
 *
 * Algorithm:
 *	A. If the list is empty, link the node in at the root.
 *	B. Else,
 *	    1. Beginning at the first node in the list,
 *		a. If spell-string < node-spell-string,
 *		    1. Get next node.
 *		b. If spell-string = node-spell-string,
 *		    1. Return FALSE.
 *		c. If spell-string > node-spell-string,
 *		    1. Link it in.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Glo_Link_Node - Code Section
 */

static int	glo_link_node (node)

    NODE_PTR		node;
{
/*
 * Local Declarations
 */
    NODE_PTR		old_node;
    NODE_PTR		new_node;
    int			status;
/*
 * Module Body
 */

old_node = 0;
new_node = root;
while (new_node != 0)
    {
    status = str$compare(&new_node->spell, &node->spell);
    if (status < 0)
	{
	old_node = new_node;
	new_node = new_node->next;
	continue;
	}
    if (status == 0)
	{
	return (FALSE);
	}
    if (status > 0)
	{
	if (old_node == 0)
	    {
	    root = node;
	    }
	else
	    {
	    old_node->next = node;
	    }
	node->next = new_node;
	return(TRUE);
	}
    }

/* If the list is exhausted, link the node in at the end.		    */

if (old_node == 0)
    {
    root = node;
    }
else
    {
    old_node->next = node;
    }
return(TRUE);
}

/*+
 * Function Glo_Match_Label - Documentation Section
 *
 * Discussion:
 *	See if a label in the label list matches the current file entry.
 *	If it does, then deallocate the label entry and return TRUE.
 *	Otherwise, return FALSE.
 *
 * Calling Synopsis:
 *	status = Glo_Match_Label (token)
 *
 * Inputs:
 *	token	    ->	is the file token string to be matched against the label
 *			list.  It is an ASCIZ string.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status	    ->	is a boolean integer.  TRUE indicates that this entry
 *			corresponds to a valid label in the label list.  The
 *			label list entry is removed.  FALSE indicates that
 *			this entry does not match a label in the label list.
 *
 * Global Data:
 *	labels	    ->	if the entry matches a label, that STRING in the label
 *			list is removed.
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	    We have a match.
 *	status == FALSE	    No match.
 *
 * Error Conditions:
 *	none
 *
 * Algorithm:
 *	A. For each entry in the label list,
 *	    1. Compare the label with the token.
 *	    2. If they match,
 *		a. Unchain and deallocate the STRING.
 *		b. Return TRUE.
 *	    3. Else,
 *		a. Get the next label.
 *	B. If no match,
 *	    1. Return FALSE.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Glo_Match_Label - Code Section
 */

static int	glo_match_label (token)

    char	    *token;
{
/*
 * Local Declarations
 */
    int		    length = strlen(token);
    STRING_PTR	    old;
    STRING_PTR	    new;
/*
 * Module Body
 */

if (labels == 0)    return (FALSE);

old = labels;
new = labels;
while (new != 0)
    {

/* Search the label list for a matching token				    */

    if (new->desc.dsc$w_length != length)
	{
	old = new;
	new = new->next;
	continue;
	}
    if (strncmp(token, new->desc.dsc$a_pointer, length) != 0)
	{
	old = new;
	new = new->next;
	continue;
	}

/* Found the matching token.  Delete the entry in the labels list.	    */

    if (new == labels)
	{
	labels = new->next;
	}
    else
	{
	old->next = new->next;
	}
    str$free1_dx(&new->desc);
    free(new);

    return (TRUE);
    }

return (FALSE);
}

/*+
 * Function Glo_Parse_Item - Documentation Section
 *
 * Discussion:
 *	Parse the label and item strings from a line with the format
 *	    @entry{label,item}[text]
 *	leaving the pointer pointing to the optional text string.
 *
 * Calling Synopsis:
 *	status = Glo_Parse_Item (line, label, item, ptr)
 *
 * Inputs:
 *	line	    ->	is the text line to parse.  It is an ASCIZ text string.
 *
 *
 * Outputs:
 *	label	    ->	is the label text string.  It is an ASCIZ text string.
 *
 *	item	    ->	is the item text string.  It is an ASCIZ text string.
 *
 *	ptr	    ->	is an integer index value, passed by reference.  It is
 *			the array index of the next character of the line
 *			string, after the closing "}".
 *
 * Return Value:
 *	status	    ->	is a result indicator.  If TRUE, the label and item
 *			elements were found.  If FALSE, there was a problem
 *		        with the parse operation.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	Operation succeeded.  All output variables were 
 *			returned.
 *
 * Error Conditions:
 *	status == FALSE	Operation failed due to a missing syntax element.
 *			None of the output variables is trustworthy.
 *
 * Algorithm:
 *	A. Parse out the label.  It begins with the first character after the
 *	   leading "{" and is terminated by a ",".
 *	B. Find the beginning of the item.  It is starts at the first 
 *	   non-whitespace character after the ",".
 *	C. Parse out the item.  Internally, braces are allowed as long as they
 *	   properly match.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Glo_Parse_Item - Code Section
 */

static int	glo_parse_item (line, label, item, ptr)

    char	    *line;
    char	    *label;
    char	    *item;
    int		    *ptr;
{
/*
 * Local Declarations
 */
    int		    i;
    int		    brace;
    char	    x;
/*
 * Module Body
 */

/* Copy the label to the output string.					    */

i = 0;
brace = 1;
*ptr = 7;
while(TRUE)
    {
    x = line[(*ptr)++];
    if (x == '\0')	return(FALSE);
    if (x == '{')
	{
	if (line[*ptr - 2] != '\\') brace++;
	}
    if (x == '}')
	{
	if (line[*ptr - 2] != '\\') brace--;
	if (brace <= 0)		    break;
	}
    if (x == ',')		    break;
    label[i++] = x;
    }
label[i] = '\0';

/* Find the beginning of the item string.				    */

while (isspace(line[*ptr]) != 0)    (*ptr)++;

/* Copy the item to the output string.					    */

i = 0;
while (brace > 0)
    {
    x = line[(*ptr)++];
    if (x == '\0')	return(FALSE);
    if (x == '{')
	{
	if (line[*ptr - 2] != '\\') brace++;
	}
    if (x == '}')
	{
	if (line[*ptr - 2] != '\\') brace--;
	if (brace <= 0)		    break;
	}
    item[i++] = x;
    }
item[i] = '\0';

/* Check to see if the item is missing. If it is, default to the label.	    */

if (i == 0)	    strcpy(item, label);
return (TRUE);
}

/*+
 * Function Glo_Copy_Text - Documentation Section
 *
 * Discussion:
 *	Create a STRING containing the current line of text and chain it in
 *	to the current node's definition list.
 *
 * Calling Synopsis:
 *	status = Glo_Copy_Text (line, node)
 *
 * Inputs:
 *	line	    ->	is the current line of text.  It is an ASCIZ string.
 *
 *	node	    ->	is the NODE_PTR for the current glossary entry.
 *
 * Outputs:
 *	none
 *
 * Return Value:
 *	status 	    ->	is a boolean integer which reflects success or
 *			failure.  TRUE will be returned unless there is a
 *			failure to allocate the string.
 *
 * Global Data:
 *	none
 *
 * Files Used:
 *	none
 *
 * Assumed Entry State:
 *	none
 *
 * Normal Exit State:
 *	status == TRUE	    success.
 *
 * Error Conditions:
 *	status == FALSE	    virtual memory allocation failure.
 *
 * Algorithm:
 *	A. Allocate a STRING data structure.
 *	B. If the input string has a non-zero length,
 *	    1. Copy the string to the STRING data structure.
 *	C. Chain the structure in to the end of the hdr structure.
 *
 * Special Notes:
 *	none
-*/

/*
 * Function Glo_Copy_Text - Code Section
 */

static int	glo_copy_text (line, node)

    char	    *line;
    NODE_PTR	    node;
{
/*
 * Local Declarations
 */
    int		    length;
    STRING_PTR	    old;
    STRING_PTR	    text;
/*
 * Module Body
 */

length = strlen(line);
if (line[length-1] == '\n')	length--;

text = (STRING_PTR) malloc(sizeof(STRING));
if (text == 0)	    return (FALSE);

text->next = 0;
text->desc = str_dyn;
if (length > 0)	    str$copy_r(&text->desc, &length, line);

if (node->hdr == 0)
    {
    node->hdr = text;
    }
else
    {
    old = node->hdr;
    while (old->next != 0)	old = old->next;
    old->next = text;
    }

return (TRUE);
}