.TITLE	BUILD_DICTIONARY
;
; Builds the EDX dictionary database file EDX_DICTIONARY.DAT
;
; Logical WORDLIST must point to file containing alphabetised list of all words
; which makes up the dictionary lexical database.
;
; Logical COMMONWORDS must point to file containing list of commonly used words.
; This list is searched first before searching the main WORDLIST words.
;
; The output file is EDX_DICTIONARY.DAT
;
; See EDX_BLDDIC.COM for more information.
;------------------------------------------------------------------------------
; THE EDX DICTIONARY:
; It is possible to examine the EDX dictionary file EDX_DICTIONARY.DAT
; with the EDX editor.  Invoke the EDX editor and read in the file as
; you would any other file.  Each line of the file represents one 512 byte
; block of the file.  The line number corresponds to the Virtual Block
; Number (VBN) of the block.
;
; DICTIONARY FILE LAYOUT:
; To optimize performance we use direct disk block transfers which
; bypasses the concept of file records.  The disk block size is 512
; bytes.  Once the file is opened for block I/O (by specifying BIO in
; the FAC field of the FAB), the disk blocks can be randomly accessed by
; specifying the Virtual Block Number (VBN) of the block you wish to
; read or write.  Each section of the database begins on a block
; boundary.
;
;	DICTIONARY FILE LAYOUT:
;	--------------------
;	|   HEADER BLOCK   |	(VBN 1)
;	|                  |
;	--------------------
;	| LEXICAL DATABASE |	(VBN 2)
;	|                  |
;	|                  |
;	|                  |
;	|                  |
;	|                  |
;	--------------------
;	|      INDEX       |	(around VBN 2000)
;	|                  |
;	|                  |
;	--------------------
;	| COMMON WORD LIST |	(around VBN 2008)
;	|                  |
;	--------------------
;
; HEADER
; Virtual Block number 1 (VBN 1) contains information about the rest of
; the database, where it's located in the file and how long each section
; is.  The figure below describes the header block in detail.
;
;	FILE HEADER (VBN 1)
;	-----------------------------------------
;	|   'X'   |   'D'   |   'E'   |  VERSNO | 00	header
;	-----------------------------------------
;	|                LEXVBN                 | 04	Virtual Block Number of disk block where lexical database begins
;	-----------------------------------------
;	|                LEXBLN                 | 08	Length in blocks of lexical database section
;	-----------------------------------------
;	|                INDVBN                 | 0C	Virtual Block Number of disk block where index database begins
;	-----------------------------------------
;	|                INDLEN                 | 10	Length in bytes of index database
;	-----------------------------------------
;	|                INDSWD                 | 14	Size of each index word
;	-----------------------------------------
;	|                INDPLN                 | 18	Page length.  Number of bytes (of lexical database) between index words ( = n*BLOCK_SIZE )
;	-----------------------------------------
;	|                CWDVBN                 | 1C	Virtual Block Number of disk block where common word list begins
;	-----------------------------------------
;	|                CWDLEN                 | 20	Length in bytes of common word list 
;	-----------------------------------------
;
; LEXICAL DATABASE
; The Lexical Database portion contains the actual list of ~70,000
; words, hopefully some of which are correctly spelled.  The words
; must appear in alphabetical order, with a single space character separating
; each word, including a space preceding the very first word and a space
; following the very last word.  All alphabetic characters must be
; uppercase.  The contiuous stream of characters looks like this:
;
;   " A AARDVARK AARDVARKS AARON AAVC ABACK ABACTERIAL ABACUS ...
;
; A section of this word list will later be searched for a match to an
; unknown word preceded and trailed by a single space character, such as:
;
;   search list for:  " AARON "
;
;
; INDEXING CONCEPTS
; Page Length:
; Conceptually, our lexical database section is broken into fixed length
; pages.  Each page is an integral number of 512 byte disk blocks.  The
; value INDPLN stored in the header block indicates the page length (in
; bytes) used in the dictionary.  (The value INDPLN must be an integral
; multiple of 512.)  If for example each page was 2 blocks long, then the
; value of INDPLN would be 1024 (512*2) and our lexical database looks like:
;
;	DICTIONARY FILE LAYOUT:
;	--------------------
;	|   HEADER BLOCK   | block 1	(virtual block number VBN 1)
;	|                  |
;	-------------------- LEXICAL DATABASE
;	|      page 1      | block 2
;	|                  | block 3
;	|      page 2      | block 4
;	|                  | block 5
;	|      page 3      | block 6
;	|        .         | .
;	|        .         | .
;	--------------------
;	|      INDEX       |	(around VBN 2000)
;	|                  |
;	|                  |
;	--------------------
;	| COMMON WORD LIST |	(around VBN 2008)
;	|                  |
;	--------------------
;
; Guide Words
; In a regular paper dictionary there is at the top of each page a guide
; word which indicates the first word for that page.  In our dictionary we
; use fixed length guide words, saving only the first N letters of the
; first full word of each lexical database page.  (Note that characters of
; a word may lie across a page boundary, we only accept the first full
; word after passing over those trailing characters of the previous word which
; started on the previous page.)  The value of N is the value of INDSWD stored
; in the header block.  If the guide word is less than N characters long it is
; blank padded to N characters.
; 
; INDEX
; The index section is a contiguous stream of the ordered fixed length
; guide words.  This is the information which helps us quickly zero in on
; where in the lexical database to search for a given word. 
;
; An example may explain best:
; Assuming an index word size (INDSWD) of 4, and a page length of 2 blocks
; (INDPLN = 1024 = 512*2), the first 4 pages of our dictionary would look
; like the following:
;
;  A                                                     (guide word for page 1 is "A   ")
;   A AARDVARK AARDVARKS AARON AAVC ABACK ABACTERIAL ... (page 1 consists of first 2 blocks (1024 bytes) of the lexical database.  Note leading space at beginning of database)
;  ABJE                                                  (guide word for page 2 is first 4 letters of word ABJECT)
;  OTICALLY ABJECT ABJECTION ABJECTLY ABJECTNESS ...     (page 2 consists of next 2 blocks (1024 bytes) of the lexical database)
;  ABRE                                                  (guide word for page 3 is first 4 letters of word ABREAST)
;  REACTION ABREAST ABRIDGE ABRIDGED ABRIDGEMENT ...     (page 3 consists of next 2 blocks (1024 bytes) of lexical database)
;  ABST                                                  (guide word for page 4 consists of first 4 letters of word ABSTRACTION)
;  STRACTING ABSTRACTION ABSTRACTIONAL ABSTRACTIONISM ...(page 4 consists of next 2 blocks (1024 bytes) of lexical database)
;
; The index contains the first 4 characters of the first full word of
; every page in the lexical database.  The 4 character index words are
; packed together into a continuous stream of characters.  For our above
; example, the index would look like:
;
;      guide word:  "A   ABJEABREABST..."
;                   |   |   |   |   |
;      page number:   1   2   3   4  ...
;
; The index of guide words may be randomly searched since we know each
; guide word is 4 characters long and we know where the index database
; starts and ends.
;
; Guide Word Index Searching algorythm
; The following is a brief description of the searching algorythm used by
; EDX in looking up a word in the dictionary.  We start by performing a
; binary search on the index of guide words for a match to the first N
; characters of the target word we ultimately wish to find.  There is however
; the posibility that 2 or more pages of our lexical database may have the
; same guide word since guide words are truncated to N characters.  We
; therefore abandon the binary search if we happen to come across an exact
; match, and switch to a linear search of the guide words up and down
; until we positively identify the smallest possible group of pages in
; which our target word must lie.
;   After identifying the target range of pages of the lexical database,
; we perform a linear search of those pages (using MATCHC) for our
; target word.
;
; COMMON WORD LIST:
; The common word list is formatted the same as the lexical database
; section.  It consists of a list of commonly used words with a space
; character between each word.  A space also preceeds the first word
; and trails the last word in the list.  This is a short list (about
; 1 block in length) of very frequently used words, ordered by frequency
; of use, with the most frequently used word listed first.  This list
; is searched first before a search through the index and main
; lexical database is made.
;
;
; LOADING THE EDX DICTIONARY:
; The EDX dictionary is not "read" into memory.  Instead it is "mapped" into
; memory using the $CRMPSC service, which is considerably faster and doesn't
; use up any user page file quota (pgflquo).
;
; The dictionary could be loaded by first allocating about 2000 pages of
; memory, and then reading the entire file into the memory allocated.  This
; would be quite slow due to the large size of the database.  Also a user's
; pgflquo quota limits the total amount of memory a user may allocate, and
; the 2000 pages required for the database is a considerable amount of memory.
;
; $CRMPSC accomplishes the same result of allocating memory and then reading
; the file into memory, except it never allocates memory from the system, and
; it never reads in the file.  Instead, it expands the process region by 2000
; pages thereby instantly making new virtual memory available, and then
; it declares that the the physical file EDX_DICTIONARY.DAT itself is the
; paging file for that section of memory.  The initialization is now complete,
; with hardly any work having been done.
;
; Now when the program attempts to read some of the dictionary that's in that
; memory range, a page fault will occur if that page is not already in memory
; and that page is automatically read into memory.  And since we're not using
; the system paging file this, the user's pgflquo quota is not affected.
;
;
; PERFORMANCE OPTIMIZATION
; There are 2 variables described above which affect the performance of
; this dictionary.  You may play with these variables if you wish, and see
; what values work best.  The varibles are:
;      DICINDSWD 	;INDSWD.  Size of guide word in index
;      DICINDPLS 	;INDPLN.  Page length in bytes (number of lexical database blocks between index words * block_size)
;
; INDSWD: (Index Size of Word)
; The smaller the guide word size, the smaller the index, and the less
; time it takes to search the index.  However, a smaller guide word means
; the range of pages where a target word must lie may not be as narrow as
; it would if the guide words were longer.  For example, if the guide word
; length were 2, and you were looking up the word 'ASSIMULATE', you would
; have to search all of the lexical database pages which had a guide word of
; 'AS'.
; 
; Conversely, with a larger guide word size, the range of pages wherein a
; target guide word must lie can be narrowed down more.  With a very large
; guide word size you could narrow down the search for a particular word
; to the exact page (actually 2 pages when you consider your target word
; might be at the end of the first page with trailing characters spilling
; onto the second page, right before the first full word of the second
; page from which the index guide word for that page is derrived). 
; Narrowing this range down means less of the lexical database neads to be
; read and searched, but it also means the index gets larger.
;
; INDPLN:(Index Page Length)
; The smallest page size is 1 block.  With a small page size there are
; more pages total, which means more guide words, which means a larger
; index, but it also means it may be possible to further narrow the range
; of pages wherein a target word must lie.  This means less less linear
; searching of pages for the target word.  However, determining the range
; of pages wherein a word must lie is also dependent upon the size of the
; guide word used (INDSWD).
; 
; Conversely, a large page size means less guide words, a smaller index
; size, and faster searching of the index.  Changing the page size from 1
; block to 2 will cut the size of the index in half.  However, when the
; range of pages is determined, those pages will have to be searched to
; find the target word.
;
;Recommendations:
; The index guide word size should be as large as possible and the page length
; should be as small as possible so that the position of most words
; in the lexical database can be narrowed down as much as possible.  The
; increase in size of the index is small and only affects the total size of
; the file EDX_DICTIONARY.DAT, of which the index accounts for only about 1%.
; It does not adversely affect the process since none of the EDX dictionary
; uses up virtual page file quota (pgflquo).  (It is mapped into memory as
; process private non-modifiable disk section using $CRMPSC).
;
; As such we have set the dictionary page size to = 1 block (512 bytes, or
; 1 page of memory), and the index guide word length to 6 which seems to
; be long enough so that there are few instances of two guide words being
; the same.
;
; 
; OPTIMIZING THE COMMON WORD LIST:
; The common word list is searched first for a match before searching
; the main lexical database.  Common words which occur often are thus
; handled quickly.  A longer common word list means a higher chance of
; matching whatever the target word is, thus skipping the longer process
; of searching The main lexical database.  However, a longer common word
; list means more time spent searching the common word list.
;
; Examining a reference book which lists words according to frequency of
; use is helpful in determining which words should be included in a
; common word list, and how long The list should be.  It turns out that
; The first few words at The top of The list are used quite frequently,
; with The number one word at The very top of The list being used nearly 3
; times as often as The second word on The list.  (And by now you may have
; guessed that The number one most frequently used word in The English
; language is The word 'THE').
;
; Probably a list containing the first 10 most commonly used words would
; be as effective in speeding up a spelling checker as a word list of the
; first 100 most commonly used words.
;
; Once again, all characters in the dictionary must be in uppercase.  
;------------------------------------------------------------------
;Register use:
;R7  = number of bytes written to section
;R9  = offset into WDBUF
;R10 = offset into REC_BUFF
;R11 = offset into DICNDX
;
;MACROS:
;	PUTCHAR
;	Add a character to rec_buff.  If rec_buff is full, then write the
;	buffer first.
	.MACRO	PUTCHAR char,?L1
	CMPW	R10, REC_SIZE			;Check if buffer is now full
	BNEQ	L1
	JSB	WRITE_RECBUF			;Write the buffer and reset it
L1:	MOVB	char,REC_BUFF(R10)		;Space before each word
	INCL	R10				;offset into REC_BUFF
	INCL	R7				;# of characters written to section
	.ENDM

;------------------------------------------------------------------------------

	.PSECT	DATA,WRT,NOEXE,LONG
	$SSDEF

;Constants
BLOCK_SIZE = 512			;Number of bytes in a block
SPACE = ^x20				;Ascii space character
TAB = ^x09				;Ascii tab character

;Optimization Parameters.  (Here are the values of INDSWD and INDPLN)
DICINDSWD = 6			;INDSWD.  Size of Guide Word used in index
DICINDPLN = 1*BLOCK_SIZE	;INDPLN.  Lexical Database Page Length in bytes (number of lexical database blocks between guide words * block_size)


;DEFINE OFFSETS INTO DICTIONARY HEADER BLOCK
DIC_VERNO  = ^x00	;Dictionary version number
DIC_HID    = ^x01	;Dictionary header ID
DIC_LEXVBN = ^x04	;Dictionary lexical database starting virtual block number
DIC_LEXBLN = ^x08	;Dictionary lexical database size in blocks
DIC_INDVBN = ^x0C	;Dictionary index starting virtual block number
DIC_INDLEN = ^x10	;Dictionary index length in bytes
DIC_INDSWD = ^x14	;Dictionary index size of word = INDSWD (constant)
DIC_INDPLN = ^x18	;Dictionary index block size (number of lexical database blocks between index words)
DIC_CWDVBN = ^x1C	;Dictionary common word list starting virtual block number
DIC_CWDLEN = ^x20	;Dictionary common word list length
DICVERNO  = 2		;EDX Dictionary Version Number

REC_SIZE:	.WORD   0		;Out record buffer size (varies)
MAXREC_SIZE = DICINDPLN
REC_BUFF:	.BLKB	MAXREC_SIZE	;Out record buffer
WDBUF_SIZE = 80				;Inword buffer size
WDBUF:		.BLKB	WDBUF_SIZE	;Inword buffer
WD_LEN:		.LONG	0		;Inword length
BLOCK_NUMBER:	.LONG	0		;Block number to write
DICNDX:		.LONG	0		;Address of index memory
DICNDX_LEN:	.LONG	64		;Length in pages of index memory
HEADER:		.LONG	0		;Address of header memory
HEADER_LEN:	.LONG	1		;Length in pages of header memory
NEWBLOCK:	.LONG	1		;True if starting new disk block
INRAB:		.LONG	0
ERROPWD1:	.ASCID	/Error opening WORDLIST: file./
ERROPWD2:	.ASCID	/Define logical WORDLIST to point to the file containing alphabetised/
ERROPWD3:	.ASCID	/list of words which make up the dictionary/
ERROPCW1:	.ASCID	/Error opening COMMONWORDS: file/
ERROPCW2:	.ASCID	/Define logical COMMONWORDS to point to the file containing list of/
ERROPCW3:	.ASCID	/commonly used words./
ERROPDIC:	.ASCID	/Error creating dictionary file EDX_DICTIONARY.DAT/
ERRCONWD:	.ASCID	/Error connecting to WORDLIST file/
ERRCONCW:	.ASCID	/Error connecting to COMMONWORDS file/
ERRCONDIC:	.ASCID	/Error connecting to dictionary file EDX_DICTIONAY.DAT/

.ALIGN LONG
LEXFAB:	$FAB	FNM = <WORDLIST:>	; Input file name

LEXRAB:	$RAB	FAB = LEXFAB,-
		RAC = SEQ,-
		UBF = WDBUF,-
		USZ = WDBUF_SIZE

CWFAB:	$FAB	FNM = <COMMONWORDS:>	; Input file name

CWRAB:	$RAB	FAB = CWFAB,-
		RAC = SEQ,-
		UBF = WDBUF,-
		USZ = WDBUF_SIZE

OUTFAB:	$FAB	FNM = <EDX_DICTIONARY.DAT>,-	; Output file name
		FOP = CBT,-			; contiguous best try
		MRS = BLOCK_SIZE,-		; record size = 1 BLOCK
		FAC = <BIO,PUT>,-		; Block I/O write operation
		ORG = SEQ,-			; File organization is to be sequential
		RFM = FIX			; Record format is fixed length

OUTRAB:	$RAB	FAB = OUTFAB,-			; Pointer to FAB
		RAC = SEQ,-			; Record access is to be sequential
		ROP = <BIO>			; Block I/O

;------------------------------------------------------------------------------

	.PSECT	CODE,NOWRT,EXE

	.ENTRY	BLDDIC,^M<>
	JSB	INITIALIZE			;Initialize stuff
	JSB	WRTDIC				;Write dictionary words
	JSB	WRTDICNDX			;Write dictionary index
	JSB	WRTCOMWRDS			;Write common words list
	JSB	WRTHEADER			;Write header block
	BSBB	CLOSE				;close all files and exit
1$:	$EXIT_S

CLOSE:
	$CLOSE	FAB=LEXFAB			;Close all files and exit
	$CLOSE	FAB=CWFAB
	$CLOSE	FAB=OUTFAB
	RSB

;-----------------------------------------------------------------------------
WRTDIC:
	MOVAL	LEXRAB,INRAB			;Set INRAB
	MOVZBL	#1,NEWBLOCK			;Set NEWBLOCK flag True
	MOVL	HEADER,R8			;Address of header block memory
	MOVL	BLOCK_NUMBER,DIC_LEXVBN(R8)	;Fill LEXVBN field.  Starting virtual block number of lexical database
	MOVAB	REC_BUFF,OUTRAB+RAB$L_RBF	;Address of buffer to write
	MOVW	#DICINDPLN, REC_SIZE		;record size = dicindpln blocks at a time
	MOVW	REC_SIZE,OUTRAB+RAB$W_RSZ	;record size
	CLRL	R11				;R11 = offset into DICNDX
	CLRL	R7				;R7 = total number of bytes written to DICNDX
	JSB	WRTWRDS				;Write words
	SUBL3	DIC_LEXVBN(R8), BLOCK_NUMBER, -
		DIC_LEXBLN(R8)			;Fill in LEXBLN field.
	RSB

;-----------------------------------------------------------------------------
WRTDICNDX:
	MOVL	HEADER,R8			;Address of header block memory
	MOVL	BLOCK_NUMBER,DIC_INDVBN(R8)	;Fill INDVBN field.  Starting virtual block number of index
	MOVL	BLOCK_NUMBER,OUTRAB+RAB$L_BKT	;Block number to write
	MOVL	DICNDX,OUTRAB+RAB$L_RBF		;Address of dic-index buffer to write
	MOVW	#BLOCK_SIZE, REC_SIZE		;record size = 1 block at a time
	MOVW	REC_SIZE,OUTRAB+RAB$W_RSZ	;record size
	MOVL	R11,DIC_INDLEN(R8)		;Fill INDLEN field.  Length in bytes of index
	MOVW	R11,OUTRAB+RAB$W_RSZ		;Number of bytes in DICNDX
	$WRITE	RAB=OUTRAB			;Write DICNDX
	BLBC	R0,1$				;branch on error
	DIVL3	#BLOCK_SIZE,R11,R0		;calculate number of block written
	INCL	R0
	ADDL2	R0,BLOCK_NUMBER			;BLOCK_NUMBER now points to
	RSB					;next unwritten block

1$:	$EXIT_S	R0				;abort

;-----------------------------------------------------------------------------
WRTCOMWRDS:
	MOVL	HEADER,R8			;Address of header block memory
	MOVL	BLOCK_NUMBER,DIC_CWDVBN(R8)	;Fill CWDVBN field.  Starting virtual block number of commonword list
	MOVAL	CWRAB,INRAB			;Set inrab
	MOVAB	REC_BUFF,OUTRAB+RAB$L_RBF	;Address of buffer to write
	MOVW	#BLOCK_SIZE, REC_SIZE		;record size = one block at a time
	MOVW	REC_SIZE,OUTRAB+RAB$W_RSZ	;record size
	CLRL	R7				;Counter of number of bytes in section
	CLRL	NEWBLOCK			;so we don't to dicndx stuff (not really necessary)
	JSB	WRTWRDS				;Write the common word list
	MOVL	R7,DIC_CWDLEN(R8)		;Length of common word list in bytes
	RSB

;==============================================================================

;WRTWRDS:
; Reads alphabetized words from infile and puts them in outfile

WRITE_RECBUF:
	MOVL	BLOCK_NUMBER,OUTRAB+RAB$L_BKT	;Block number to write
	$WRITE	RAB=OUTRAB			;Write the block
	BLBC	R0,1$				;Branch on error
	CLRL	R10				;Reset offset into REC_BUFF
	DIVW3	#BLOCK_SIZE, REC_SIZE, R0	;calculate number of blocks written
	ADDW2	R0,BLOCK_NUMBER			;Number of next block to fill
	MOVL	#1,NEWBLOCK			;Set NEWBLOCK flag
	RSB
1$:	$EXIT_S	R0				;abort

DONE:	PUTCHAR	#SPACE				;Space before first word
	SUBW3	R10, REC_SIZE,R0		;Left over length of block
	MOVC5	#0,(SP),#0,R0,REC_BUFF(R10)	;Fill left over with nulls
	JSB	WRITE_RECBUF			;Write out last block
	RSB

WRTWRDS:
	CLRL	R10				;Offset into REC_BUFF

READ:	$GET	RAB=@INRAB			; Get a word from file
	BLBS	R0,TRIM				; Trim leading and trailing blanks
	CMPL	R0,#RMS$_EOF			; Was error end-of-file?
	BNEQ	1$
	BRW	DONE
1$:	$EXIT_S	R0				;abort

TRIM:	;Skip over any leading spaces and tabs
	;R9 = offset into WDBUF
	MOVL	INRAB,R0
	MOVZWL	RAB$W_RSZ(R0),WD_LEN	;Save length of line

	PUSHAL	WDBUF
	PUSHL	WD_LEN
	CALLS	#2,UPCASE		;Uppercase the string

	CLRL	R9			;Reset offset to 0
1$:	CMPL	R9,WD_LEN		;Compare with maximum length of WD
	BNEQ	2$			;This was a blank line, skip it.
	BRW	READ			;get next word
2$:	CMPB	WDBUF(R9),#SPACE	;Compare with space
	BEQL	3$			;was a leading space
	CMPB	WDBUF(R9),#TAB		;Compare with tab
	BNEQ	MWRD			;not a tab or space.  Is beginnig of word
3$:	INCL	R2			;Increment offset into line
	BRB	1$			;and go to next line

;Extract first N (#DICINDSWD) characters of first word in block
;(If this is the WRTCOMWRDS pass we've already done the index and this won't matter)
MWRD:	PUTCHAR	#SPACE			;Space before each word
	BLBC	NEWBLOCK,10$		;Now branch if not starting new block (after putting space there)
	SUBL3	R9,WD_LEN,R0
	MOVL	DICNDX,R1
	MOVC5	R0,WDBUF(R9),#SPACE,#DICINDSWD,(R11)[R1]
	ADDL2	#DICINDSWD,R11		;increment offset into index

;Move word to output
	;Move word from input to output
10$:	CLRL	NEWBLOCK
1$:	PUTCHAR WDBUF(R9)
	INCL	R9				;Offset into WDBUF
	CMPL	R9,WD_LEN			;Check for end of word
	BNEQ	3$
	BRW	READ
3$:	CMPB	WDBUF(R9),#SPACE		;Check for trailing space
	BNEQ	4$
	BRW	READ
4$:	CMPB	WDBUF(R9),#TAB			;Check for trailing tab
	BNEQ	1$				;not end of word yet.  Loop.
	BRW	READ

;==============================================================================

;INITIALIZE
OPEN_FILES:
	$OPEN	FAB=LEXFAB			; Open input file
	BLBS	R0,1$
	PUSHL	LEXFAB+FAB$L_STV
	PUSHL	LEXFAB+FAB$L_STS
	PUSHAL	ERROPWD3
	PUSHAL	ERROPWD2
	PUSHAL	ERROPWD1
	CALLS	#1, G^LIB$PUT_OUTPUT		; First line
	CALLS	#1, G^LIB$PUT_OUTPUT		; Second line
	CALLS	#1, G^LIB$PUT_OUTPUT		; Third line
	CALLS	#2, G^LIB$STOP			; Signal error and stop

1$:	$CONNECT RAB=LEXRAB			; Connect to input
	BLBS	R0,2$
	PUSHL	LEXRAB+RAB$L_STV
	PUSHL	LEXRAB+RAB$L_STS
	PUSHAL	ERRCONWD
	CALLS	#1, G^LIB$PUT_OUTPUT		; Error connecting to WORDLIST
	CALLS	#2, G^LIB$STOP			; Signal error and stop

2$:	$OPEN	FAB=CWFAB			; Open input file
	BLBS	R0,3$
	PUSHL	CWFAB+FAB$L_STV
	PUSHL	CWFAB+FAB$L_STS
	PUSHAL	ERROPCW3
	PUSHAL	ERROPCW2
	PUSHAL	ERROPCW1
	CALLS	#1,G^LIB$PUT_OUTPUT		; line 1
	CALLS	#1,G^LIB$PUT_OUTPUT		; line 2
	CALLS	#1,G^LIB$PUT_OUTPUT		; line 3
	CALLS	#2, G^LIB$STOP			; Signal error and stop

3$:	$CONNECT RAB=CWRAB			; Connect to input
	BLBS	R0,4$
	PUSHL	CWRAB+RAB$L_STV
	PUSHL	CWRAB+RAB$L_STS
	PUSHAL	ERRCONCW
	CALLS	#1, G^LIB$PUT_OUTPUT
	CALLS	#2, G^LIB$STOP			; Signal error and stop

4$:	$CREATE	FAB=OUTFAB			; Create output file
	BLBS	R0,5$
	PUSHL	OUTFAB+FAB$L_STV
	PUSHL	OUTFAB+FAB$L_STS
	PUSHAL	ERROPDIC
	CALLS	#1,G^LIB$PUT_OUTPUT
	CALLS	#2, G^LIB$STOP			; Signal error and stop

5$:	$CONNECT RAB=OUTRAB			; Connect to output
	BLBC	R0,6$
	RSB
6$:	PUSHL	OUTRAB+RAB$L_STV
	PUSHL	OUTRAB+RAB$L_STS
	PUSHAL	ERRCONDIC
	CALLS	#1, G^LIB$PUT_OUTPUT
	CALLS	#2, G^LIB$STOP			; Signal error

;------------------------------------------------------------------------------

GET_DICNDX:
	PUSHAL	DICNDX				;Address to place return address of memory block allocated
	PUSHAL	DICNDX_LEN			;Address containing length of memory to allocate
	CALLS	#2,G^LIB$GET_VM_PAGE		;Allocate memory
	BLBC	R0,1$
	CLRL	R11				;Offset into DICNDX
	RSB
1$:	$EXIT_S	R0				;abort

;------------------------------------------------------------------------------
GET_HEADER:
	PUSHAL	HEADER				;Address to place return address of memory block allocated
	PUSHAL	HEADER_LEN			;Address containing length of memory to allocate
	CALLS	#2,G^LIB$GET_VM_PAGE		;Allocate memory
	BLBC	R0,1$
	MOVC5	#0,(SP),#0, REC_SIZE,@HEADER	; zero out the memory
	RSB
1$:	$EXIT_S	R0				;abort

WRTBLK1:
	MOVL	#1,BLOCK_NUMBER			;write virtual block #1 (empty header block)
	MOVW	#BLOCK_SIZE, REC_SIZE		;record size = one block
	MOVC5	#0,(SP),#0,REC_SIZE,REC_BUFF	;Fill with with nulls
	MOVAB	REC_BUFF,OUTRAB+RAB$L_RBF	;Address of buffer to write
	MOVW	REC_SIZE,OUTRAB+RAB$W_RSZ	;record size
	JSB	WRITE_RECBUF
	RSB
;------------------------------------------------------------------------------
INITIALIZE:
	JSB	OPEN_FILES
	JSB	GET_DICNDX
	JSB	GET_HEADER
	JSB	WRTBLK1
	RSB
;==============================================================================
WRTHEADER:
	MOVL	HEADER,R8
	MOVB	#DICVERNO,DIC_VERNO(R8)
	MOVB	#^A"E",DIC_HID(R8)
	MOVB	#^A"D",DIC_HID+1(R8)
	MOVB	#^A"X",DIC_HID+2(R8)
	MOVL	#DICINDSWD,DIC_INDSWD(R8)
	MOVL	#DICINDPLN,DIC_INDPLN(R8)
	MOVL	#1,BLOCK_NUMBER
	MOVL	HEADER,OUTRAB+RAB$L_RBF		;Address of buffer to write
	MOVW	#BLOCK_SIZE, REC_SIZE		;record size = one block
	MOVW	REC_SIZE,OUTRAB+RAB$W_RSZ	;record size
	BRW	WRITE_RECBUF

;------------------------------------------------------------------------------
	.SUBTITLE UPCASE
;++
;
; Functional Description:
;	Uppercase a string.
;
; Calling Sequence:
;	CALLS	#2,UPCASE
;
; Argument inputs:
;	4(AP) - Length of string (word)
;	8(AP) - Address of string
;
; Outline:
;	1.  Descriptor for string is built on stack
;	2.  STR$UPCASE is called to upcase string

	$DSCDEF				;Define DSC$ descriptor definitions
STRLEN=4
STRADR=8
	.ENTRY	UPCASE,^M<>
	SUBL2	#4,SP			;allocate memory on stack for descriptor
	MOVW	STRLEN(AP),(SP)		;length of string
	MOVB	#DSC$K_DTYPE_T,^x02(SP)	;Fill in Type
	MOVB	#DSC$K_CLASS_S,^x03(SP)	;Fill in Class
	MOVL	STRADR(AP),^x04(SP)	;Address of string
	MOVL	SP,R0			;Address of string descriptor
	PUSHL	R0			;src-str descriptor
	PUSHL	R0			;dst-str descriptor
	CALLS	#2,G^STR$UPCASE		;UPCASE STRING
	RET

	.END	BLDDIC