/* MSBMKB.C
 *
 * Translates a binary file into a file composed only of printable characters,
 * called a "BOO file", which can be decoded by the MSBPCT program.  The file
 * is encoded by taking three consecutive eight bit bytes and dividing them
 * into four six-bit bytes.  An ASCII zero is then added to the resulting four
 * bytes to make them printable ASCII characters in the range of the character
 * zero to the character underscore.  To reduce the size of the encoded file,
 * runs of up to 78 consecutive NULs (ASCII 0) are compressed into only two
 * characters, tilde (~) followed by the number of NULs + "0".  Because of the
 * four-for-three encoding, runs of NULs can only start with the first
 * character of a triplet.
 *
 * This will compile automatically on an MS-DOS system if the compiler
 * has the symbol MSDOS predefined (as does Microsoft C).  If the symbol
 * MS-DOS is not defined, it will compile automatically for UNIX.
 */	 

/* Update history:
 *
 * 1/25/92 - Christian Hemsing. OS-9 support. Added one or two ~0 at end
 * to decide whether one or two nulls should be removed from end. Old BOO
 * decoders should not be bothered.  (Idea from Charles Lasner.)
 *
 * 6/10/90 - Frank da Cruz.  Remove TOPS-20 code, make it work automatically
 * under UNIX.
 * 
 * 1/27/89 - Frank da Cruz.  Fix up #ifdef's for MSDOS.
 *
 * 1/01/88 - Frank da Cruz.  Add exit(0); to end of main() to give good
 * return code upon success.
 *
 * 3/11/86 - Howie Kaye, Columbia University:
 *  . Added UNIX and Microsoft C compatibility.
 *  . Changed I/O to be buffered.
 * Note: There is a bug built into the EOF handling which causes the 
 * output file to grow every time a file is packed/unpacked.  This is 
 * because 2 nulls and a space are added at the end of each run.  Since
 * the data is past the end of the file, it does not affect the program
 * produced.
 *
 * Original by Bill Catchings, Columbia University, July 1984.
 */

#include <stdio.h>		/* Standard C i/o definitions */

#ifdef MSDOS
#include <fcntl.h>
#else
#ifdef OSK
#include <modes.h>
#else
#include <sys/file.h>
#endif
#endif

/* Symbol Definitions */

#define MAXPACK 	80	/* Maximum record (line) size */

#define MYRPTQ		'~'     /* Repeat count prefix */
#define DATALEN 	MAXPACK-3	/* Length of data buffer - \r,\n,\0*/

#define TRUE		-1	/* Boolean constants */
#define FALSE		0

/* Macros */

#define tochar(ch)  ((ch) + '0')

/* Global Variables */

int	maxsize,			/* Max size for data field */
	fd,				/* File descriptor of output file */
	ofd,				/* File descriptor of input file */
	otot,				/* Current character number */
	rpt,				/* Repeat count */
	rptq,				/* Repeat quote */
	rptflg,				/* Repeat processing flag */
	size,				/* Size of present record (line) */
	t,				/* Current character value as int */
	eoflag;				/* Set when file is empty. */

long	nc,				/* Number of input characters */
	oc;				/* Number of output characters */

char	one,				/* First character of triplet */
	two,				/* Second character of triplet */
	three,				/* Third character of triplet */
	*filnam,			/* Input file name */
	*ofile,				/* Output file name */
	packet[MAXPACK];		/* Output record buffer */

main(argc,argv) int argc; char **argv; {

    char sfile();			/* Send file routine & ret code */

    if (--argc != 2) usage();		/* Make sure there's a command line. */
    rptq = MYRPTQ;			/* Repeat Quote */
    rptflg = TRUE;			/* Repeat Count Processing Flag */

    filnam = *++argv;			/* Get file to send */
    ofile = *++argv;			/* Output file to create */
    sfile();
    printf("\
      Done, in: %ld, out: %ld, efficiency: %.2f%%\n",nc,oc,(100.0*nc)/oc);
    exit(0);
}

/*
   S F I L E - Encode a whole file
*/

char sfile() {
    char *i;

#ifdef MSDOS
#ifndef O_RAW
#define O_RAW O_BINARY
#endif /* O_RAW */
#else
#define O_RAW 0
#endif /* MSDOS */

#ifdef OSK
    fd = open(filnam,S_IREAD);
#else
    fd = open(filnam,O_RDONLY | O_RAW,0x1ff);
#endif
    if (fd < 0) {			/* Report any errors */
	printf("\n?Error opening file \"%s\"\n",filnam);
	exit(1);
    }
#ifdef MSDOS
    ofd = open(ofile,O_CREAT|O_WRONLY|O_TRUNC|O_RAW,0x1ff);
#else
#ifdef OSK
    ofd = creat(ofile,S_IWRITE);
#else
    ofd = open(ofile,O_CREAT|O_WRONLY|O_TRUNC|O_RAW,0664);
#endif /* OSK */
#endif /* MSDOS */
    if (ofd < 0) {
	printf("\n?error opening file \"%s\"\n",ofile);
	exit(1);
    }
    oc = strlen(filnam);		/* Filename string length. */
    for (i=filnam; *i != '\0'; i++)     /* Uppercase the file name. */
      if (*i >= 'a' && *i <= 'z') *i ^= 040;
    write(ofd,filnam,oc);		/* Write the file name in the file. */
#ifdef MSDOS
    write(ofd,"\r\n",2);
#else
    write(ofd,"\n",1);
#endif /* MSDOS */
    maxsize = DATALEN - 8;  /* 4 bytes (last triplet) + twice ~0 */
    rpt = 0;				/* Zero the repeat count. */
    oc = nc = 0;			/* Output & input character counts. */
    otot = 1;				/* Start with first char of triplet. */
    while (getbuf() > 0) {		/* While not EOF, get a packet. */
#ifdef MSDOS
	packet[size++] = '\r';          /* Explicit CRLF for DOS */
#endif /* MSDOS */
	packet[size++] = '\n';
	packet[size] = '\0';
	oc += size;			/* Count output record size. */
	write(ofd,packet,size); 	/* Write the record to the file. */
#ifdef DEBUG
        printf("%d: %s",size,packet);   /* Print on the screen for testing. */
#endif /* DEBUG */
    }
    close(fd);				/* Close the files neatly */
    close(ofd);
}

/*
   G E T B U F -- Do one record.
*/

getbuf() {				/* Fill one record buffer. */
    if (eoflag != 0) return(-1);	/* If at the end of file, stop. */
    size = 0;				/* Current position in record. */
    while((t = getch()) >= 0) {		/* t == -1 means EOF. */
	nc++;				/* Count the character. */
	process(t);			/* Process the character. */
	if (size >= maxsize) {		/* If the packet is full, */
	    return(size);
	}
    }
    eoflag = -1;			/* Say we hit the end of the file. */

    if (otot == 3) {			/* Only one left in triplet?  Add ~0 */
	process(0);			/* Pad the triplet with null */
	packet[size++] = rptq;		/* Put in null repeat char and */
	packet[size++] = tochar(0);	/* indicate one trailing null */
    } else {
	process(0); 			/* Clean out any remaining chars. */
	process(0);
	process(' ');
	packet[size++] = rptq;		/* Put in null repeat char and */
	packet[size++] = tochar(0);	/* indicate two trailing nulls */
	packet[size++] = rptq;
	packet[size++] = tochar(0);
    }
    return(size);
}

/* P R O C E S S -- Do one character. */

process(a) char a; {
    if (otot == 1) {			/* Is this the first of three chars? */
	if (a == 0) {			/* Is it a null? */
	    if (++rpt < 78)		/* Below max nulls, just count. */
		return;
	    else if (rpt == 78) { 	/* Reached max number, must output. */
		packet[size++] = rptq;	/* Put in null repeat char and */
		packet[size++] = tochar(rpt); /* number of nulls. */
		packet[size] = '\0';
		rpt = 0;
		return;
	    }
	} else {
	    if (rpt == 1) {		/* Just one null? */
		one = 0;		/* Say the first char was a null. */
		two = a;		/* This char is the second one. */
		otot = 3;		/* Look for the third char. */
		rpt = 0;		/* Restart null count. */
		return;
	    }
	    if (rpt > 1) {		/* Some number of nulls? */
		packet[size++] = rptq;	/* Insert the repeat prefix */
		packet[size++] = tochar(rpt); /* and count. */
		packet[size] = '\0';
		rpt = 0;		/* Reset repeat counter. */
	    }
	    one = a;			/* Set first character. */
	    otot = 2;			/* Say we are at the second char. */
	}
    } else if (otot == 2) {
	two = a;			/* Set second character. */
	otot = 3;			/* Say we are at the third char. */
    } else {
	three = a;
	otot = 1;			/* Start over at one. */
	pack(one,two,three);		/* Pack in the three characters. */
    }
}

/* This routine does the actual three character to four character encoding.
 * The concept is relatively straight forward.	The first output character
 * consists of the first (high order or most significant) six bits of the
 * first input character.  The second output character is made from the
 * remaining two low order bits of the first input character and the first
 * four high order bits of the second input character.	The third output
 * character is built from the last four low order bits of the second input
 * character and the two high order bits of the third input character.	The
 * fourth and last output character consists of the six low order bit of
 * the third input character.  In this way the three eight bit input char-
 * acters (for a total of 24 bits) are divided into four six bit output
 * characters (also for a total of 24 bits).  In order to make the four
 * output characters printable an ASCII zero is then added to each of them.
 */
pack(x,y,z) char x,y,z; {
    packet[size++] = tochar((x >> 2) & 077);
    packet[size++] = tochar(((x & 003) << 4) | ((y >> 4) & 017));
    packet[size++] = tochar(((y & 017) << 2) | ((z >> 6) & 003));
    packet[size++] = tochar(z & 077);
    packet[size] = '\0';
}

int
getch() { 				/* Get next (or pushed) char. */
#ifndef BSIZE
#define BSIZE 500
#endif
    static int index = 0, count = 0;
    static char buf[BSIZE];

    if (count == 0) {
	count = read(fd,buf,BSIZE);
	if (count <= 0) return(-1);
	index = 0;
    }
    count--;
    return(buf[index++]&0xff);
}

/* Usage message */

usage() {
    fprintf(stderr,"usage: msbmkb inputfile outputfile\n");
    exit(1);
}
