/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. */
/* if the pattern is not simple fixed pattern, then after preprocessing */
/* and generating the masks, the program goes here. four cases:  1.     */ 
/* the pattern is simple regular expression and no error, then do the   */
/* matching here.  2. the pattern is simple regular expression and      */
/* unit cost errors are allowed: then go to asearch().                  */
/* 3. the pattern is simple regular expression, and the edit cost is    */
/* not uniform, then go to asearch1().                                  */
/* if the pattern is regular expression then go to re() if M < 14,      */
/* else go to re1()                                                     */
/* input parameters: old_D_pat: delimiter pattern.                      */
/* fd, input file descriptor, M: size of pattern, D: # of errors.       */

#include "agrep.h"
#include "memory.h"

extern int CurrentByteOffset;
extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[];
extern int LIMITOUTPUT, LIMITPERFILE;
extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE; 
extern char D_pattern[];
extern int TRUNCATE, DD, S;
extern char Progname[], CurrentFileName[];
extern int num_of_matched, prev_num_of_matched;
extern int agrep_initialfd;
extern int EXITONERROR;
extern int agrep_inlen;
extern CHAR *agrep_inbuffer;
extern int agrep_inpointer;
extern CHAR *agrep_outbuffer;
extern int agrep_outlen;
extern int agrep_outpointer;
extern FILE *agrep_finalfp;
extern int errno;

extern int NEW_FILE, POST_FILTER;

/* bitap dispatches job */

int
bitap(old_D_pat, Pattern, fd, M, D)
char old_D_pat[], *Pattern;  
int fd, M, D;  
{
	char c;  
	register unsigned r1, r2, r3, CMask, i;
	register unsigned end, endpos, r_Init1;
	register unsigned D_Mask;
	int  ResidueSize , FIRSTROUND, lasti, print_end, j, num_read;
	int  k;
	CHAR *buffer;

	D_length = strlen(old_D_pat);
	for(i=0; i<D_length; i++) if(old_D_pat[i] == '^' || old_D_pat[i] == '$')
		old_D_pat[i] = '\n';
	if (REGEX) { 
		if (D > 4) {
			fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname);
			if (!EXITONERROR) {
				errno = AGREP_ERROR;
				return -1;
			}
			else exit(2);
		}
		if (M <= SHORTREG) { 
			return re(fd, M, D);   /* SUN: need to find a even point */
		}
		else { 
			return re1(fd, M, D);
		}
	}   
	if (D > 0 && JUMP == ON) 
	{ 
		return asearch1(old_D_pat, fd, D); 
	}
	if (D > 0) 
	{ 
		return asearch(old_D_pat, fd, D); 
	}
	if(I == 0) Init1 = (unsigned)037777777777;

	j=0;

	r_Init1 = Init1;
	r1 = r2 = r3 = Init[0];
	endpos = D_endpos;

	D_Mask = D_endpos;
	for(i=1 ; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;
	D_Mask = ~D_Mask;
	FIRSTROUND = ON;

#if	AGREP_POINTER
	if (fd != -1) {
#endif	/*AGREP_POINTER*/
		alloc_buf(fd, &buffer, Max_record+BlockSize+1);
		buffer[Max_record-1] = '\n';
		lasti = Max_record;
		while ((num_read = fill_buf(fd, buffer + Max_record, BlockSize)) > 0)
		{
			i=Max_record; 
			end = Max_record + num_read; 
			if(FIRSTROUND) {  
				i = Max_record - 1 ;

				if(DELIMITER) {
					for(k=0; k<D_length; k++) {
						if(old_D_pat[k] != buffer[Max_record+k]) break;
					}
					if(k>=D_length) j--;
				}

				FIRSTROUND = OFF;  
			}
			if(num_read < BlockSize) {
				strncpy(buffer+Max_record+num_read, old_D_pat, D_length);
				end = end + D_length;
				buffer[end] = '\0';
			}

			/* BITAP_PROCESS: the while-loop below */
			while (i < end)
			{
				c = buffer[i++];
				CurrentByteOffset ++;
				CMask = Mask[c];
				r1 = r_Init1 & r3;
				r2 = (( r3 >> 1 ) & CMask) | r1;
				if ( r2 & endpos ) {
					j++;
					if (DELIMITER) CurrentByteOffset -= D_length;
					else CurrentByteOffset -= 1;
					if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
					{ 
						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
							num_of_matched++;

							if (agrep_finalfp != NULL) 
								fprintf(agrep_finalfp, "%s\n", CurrentFileName);
							else {
								int outindex;
								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
										(CurrentFileName[outindex] != '\0'); outindex++) {
									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
								}
								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
									OUTPUT_OVERFLOW;
									free_buf(fd, buffer);
									return -1;
								}
								else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
								agrep_outpointer += outindex;
							}

							free_buf(fd, buffer);
							NEW_FILE = OFF;
							return 0; 
						}

						print_end = i - D_length - 1;
						if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
							if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} 
						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
							free_buf(fd, buffer);
							return 0;	/* done */
						}
					}
					lasti = i - D_length; 
					TRUNCATE = OFF;
					r2 = r3 = r1 = Init[0];
					r1 = r_Init1 & r3;
					r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
					if (DELIMITER) CurrentByteOffset += 1*D_length;
					else CurrentByteOffset += 1*1;
				}
				c = buffer[i++];
				CurrentByteOffset ++;
				CMask = Mask[c];
				r1 = r_Init1 & r2;
				r3 = (( r2 >> 1 ) & CMask) | r1; 
				if ( r3 & endpos ) {
					j++;
					if (DELIMITER) CurrentByteOffset -= D_length;
					else CurrentByteOffset -= 1;
					if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
					{ 
						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
							num_of_matched++;

							if (agrep_finalfp != NULL) 
								fprintf(agrep_finalfp, "%s\n", CurrentFileName);
							else {
								int outindex;
								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
										(CurrentFileName[outindex] != '\0'); outindex++) {
									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
								}
								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
									OUTPUT_OVERFLOW;
									free_buf(fd, buffer);
									return -1;
								}
								else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
								agrep_outpointer += outindex;
							}

							free_buf(fd, buffer);
							NEW_FILE = OFF;
							return 0; 
						}

						print_end = i - D_length - 1;
						if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
							if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
							free_buf(fd, buffer);
							return 0;	/* done */
						}
					}
					lasti = i - D_length ;
					TRUNCATE = OFF;
					r2 = r3 = r1 = Init[0]; 
					r1 = r_Init1 & r2;
					r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
					if (DELIMITER) CurrentByteOffset += 1*D_length;
					else CurrentByteOffset += 1*1;
				}   
			}

			ResidueSize = num_read + Max_record - lasti;
			if(ResidueSize > Max_record) {
				ResidueSize = Max_record;
				TRUNCATE = ON;   
			}
			strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize);
			lasti = Max_record - ResidueSize;
			if(lasti < 0) {
				lasti = 1;
			} 
			if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
			    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
				free_buf(fd, buffer);
				return 0;	/* done */
			}
		}
		free_buf(fd, buffer);
		return 0;
#if	AGREP_POINTER
	}
	else {
		buffer = agrep_inbuffer;
		num_read = agrep_inlen;
		end = num_read;
		/* buffer[end-1] = '\n';*/ /* at end of the text. */
		/* buffer[0] = '\n';*/  /* in front of the  text. */
		i = 0;
		lasti = 1;

		if(DELIMITER) {
			for(k=0; k<D_length; k++) {
				if(old_D_pat[k] != buffer[k]) break;
			}
			if(k>=D_length) j--;
		}

			/* An exact copy of the above: BITAP_PROCESS: the while-loop below */
			while (i < end)
			{
				c = buffer[i++];
				CurrentByteOffset ++;
				CMask = Mask[c];
				r1 = r_Init1 & r3;
				r2 = (( r3 >> 1 ) & CMask) | r1;
				if ( r2 & endpos ) {
					j++;
					if (DELIMITER) CurrentByteOffset -= D_length;
					else CurrentByteOffset -= 1;
					if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
					{ 
						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
							num_of_matched++;

							if (agrep_finalfp != NULL) 
								fprintf(agrep_finalfp, "%s\n", CurrentFileName);
							else {
								int outindex;
								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
										(CurrentFileName[outindex] != '\0'); outindex++) {
									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
								}
								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
									OUTPUT_OVERFLOW;
									free_buf(fd, buffer);
									return -1;
								}
								else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
								agrep_outpointer += outindex;
							}

							free_buf(fd, buffer);
							NEW_FILE = OFF;
							return 0; 
						}

						print_end = i - D_length - 1;
						if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
							if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} 
						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
							free_buf(fd, buffer);
							return 0;	/* done */
						}
					}
					lasti = i - D_length; 
					TRUNCATE = OFF;
					r2 = r3 = r1 = Init[0];
					r1 = r_Init1 & r3;
					r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
					if (DELIMITER) CurrentByteOffset += 1*D_length;
					else CurrentByteOffset += 1*1;
				}
				c = buffer[i++];
				CurrentByteOffset ++;
				CMask = Mask[c];
				r1 = r_Init1 & r2;
				r3 = (( r2 >> 1 ) & CMask) | r1; 
				if ( r3 & endpos ) {
					j++;
					if (DELIMITER) CurrentByteOffset -= D_length;
					else CurrentByteOffset -= 1;
					if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
					{ 
						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
							num_of_matched++;

							if (agrep_finalfp != NULL) 
								fprintf(agrep_finalfp, "%s\n", CurrentFileName);
							else {
								int outindex;
								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && 
										(CurrentFileName[outindex] != '\0'); outindex++) {
									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
								}
								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
									OUTPUT_OVERFLOW;
									free_buf(fd, buffer);
									return -1;
								}
								else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
								agrep_outpointer += outindex;
							}

							free_buf(fd, buffer);
							NEW_FILE = OFF;
							return 0; 
						}

						print_end = i - D_length - 1;
						if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
							if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
							free_buf(fd, buffer);
							return 0;	/* done */
						}
					}
					lasti = i - D_length ;
					TRUNCATE = OFF;
					r2 = r3 = r1 = Init[0]; 
					r1 = r_Init1 & r2;
					r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
					if (DELIMITER) CurrentByteOffset += 1*D_length;
					else CurrentByteOffset += 1*1;
				}   
			}

		return 0;
	}
#endif	/*AGREP_POINTER*/
}

fill_buf(fd, buf, record_size)
int fd, record_size; 
unsigned char *buf;
{
	int num_read=1;
	int total_read=0;
	extern int glimpse_clientdied;

	if (fd >= 0) {
		while(total_read < record_size && num_read > 0) {
			if (glimpse_clientdied) return 0;
			num_read = read(fd, buf+total_read, record_size - total_read);
			total_read = total_read + num_read;
		}
	}
#if	AGREP_POINTER
	else return 0;	/* should not call this function if buffer is a pointer to a user-specified region! */
#else	/*AGREP_POINTER*/
	else {	/* simulate a file */
		total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size;
		memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read);
		agrep_inpointer += total_read;
		/* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/
	}
#endif	/*AGREP_POINTER*/
	if (glimpse_clientdied) return 0;
	return(total_read);
}

/*
 * In these functions no allocs/copying is done when
 * fd == -1, i.e., agrep is called to search within memory.
 */

void
alloc_buf(fd, buf, size)
	int fd;
	char **buf;
	int size;
{
#if	AGREP_POINTER
	if (fd != -1)
#endif	/*AGREP_POINTER*/
		*buf = (char *)malloc(size);
}

void
free_buf(fd, buf)
	int fd;
	char *buf;
{
#if	AGREP_POINTER
	if (fd != -1)
#endif	/*AGREP_POINTER*/
		free(buf);
}
