/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. */
#include <sys/types.h>
#include <sys/stat.h>
#include "glimpse.h"
#include <fcntl.h>
#define  CHAR unsigned char

/* ----------------------------------------------------------------------
get_filenames()
input: an index table, (an index vector, i-th entry is ON if
i-th partition is to be searched.), the partition table in src_index_set[]
and the list of all files in "NAME_LIST".
output: the list of filenames to be searched.
------------------------------------------------------------------------- */

#if	BG_DEBUG
extern FILE *debug;
#endif	/*BG_DEBUG*/

extern int  p_table[MAX_PARTITION];
extern CHAR **GTextfiles;
extern CHAR **GTextfilenames;
extern int *GFileIndex;
extern int GNumfiles;
extern CHAR GProgname[];
extern CHAR FileNamePat[];
extern int  MATCHFILE;
extern int  agrep_outpointer;

extern int mask_int[32];
extern int OneFilePerBlock;
extern char INDEX_DIR[MAX_LINE_LEN];
extern	unsigned int	*multi_dest_index_set[MAXNUM_PAT];
extern int file_num;	/* in index/io.c */
int bigbuffer_size;
char *bigbuffer = NULL;	/* constant buffer to read all filenames in NAME_LIST */
char *outputbuffer = NULL;	/* keeps changing: used for -F search via memagrep */
extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE;

read_filenames()
{
	struct stat st;
	unsigned char buffer[MAX_NAME_SIZE];
	char *currptr;
	int i;

	/* one time processing: assumes during one run of glimpse, the index remains constant! */
	if (bigbuffer == NULL) {
		FILE *fp = fopen(NAME_LIST, "r");

		if (fp == NULL) {
			fprintf(stderr, "Can't open for reading: %s/%s\n", INDEX_DIR, NAME_LIST);
			exit(2);
		}
		if (-1 == stat(NAME_LIST, &st)) {
			fclose(fp);
			fprintf(stderr, "Can't stat: %s/%s\n", INDEX_DIR, NAME_LIST);
			exit(2);
		}
		fgets(buffer, MAX_NAME_SIZE, fp);
		bigbuffer_size = st.st_size - strlen(buffer);
		sscanf(buffer, "%d", &file_num);
		if ((file_num < 0) || (file_num > MaxNum24bPartition)) {
			fclose(fp);
			fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST);
			exit(2);
		}
		initialize_data_structures(file_num);
		for (i=0; i<MAXNUM_PAT; i++) {
			multi_dest_index_set[i] = (unsigned int *)my_malloc(sizeof(int)*REAL_PARTITION);
			memset(multi_dest_index_set[i], '\0', sizeof(int) * REAL_PARTITION);
		}
		bigbuffer = (char *)my_malloc(bigbuffer_size + MAX_PAT + 2);	/* The whole file + place to store -F's pattern */
		if (bigbuffer != NULL) outputbuffer = (char *)my_malloc(FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE);	/* Space for max# files per partition */
		if (outputbuffer != NULL) GTextfiles = (CHAR **) my_malloc(sizeof(CHAR *) * file_num);
		if (GTextfiles != NULL) GTextfilenames = (CHAR **) my_malloc(sizeof(CHAR *) * file_num);
		if (GTextfilenames != NULL) GFileIndex = (int *)my_malloc(sizeof(int) * file_num);
		if (bigbuffer == NULL || outputbuffer == NULL || GTextfiles == NULL || GTextfilenames == NULL || GFileIndex == NULL) {
			fclose(fp);
			fprintf(stderr, "%s: my_malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__);
			exit(2);	/* No point freeing memory */
		}
		if (bigbuffer_size != fread(bigbuffer, 1, bigbuffer_size, fp)) {/* read in whole file in CONTIGUOUS memory */
			fclose(fp);
			fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST);
			exit(2);	/* No point freeing memory */
		}
		memset(bigbuffer+bigbuffer_size, '\n', MAX_PAT + 2);
		for (i=0, currptr = bigbuffer; i<file_num && currptr < bigbuffer + bigbuffer_size; i++, currptr ++) {
			GTextfilenames[i] = (unsigned char *)currptr;
			while (*currptr != '\n') currptr ++;
		}
	}
	return 0;
}

get_filenames(index_vect, argc, argv, dummylen, dummypat, file_num)
int  *index_vect;
int argc; /* the arguments to agrep for -F */
char *argv[];
int dummylen;
CHAR dummypat[];
int file_num;
{
	int  i=0,j, ret;
        int  start, end, k, prevk;
	int filesseen;
	char *beginptr, *endptr;

#if	BG_DEBUG
	fprintf(debug, "get_filenames(): the following partitions are ON\n");
	for(i=0; i<((OneFilePerBlock > 0) ? round(file_num, 8*sizeof(int)) : MAX_PARTITION); i++)
		if(index_vect[i]) fprintf(debug, "i=%d,%x\n", i, index_vect[i]);
#endif	/*BG_DEBUG*/

	GNumfiles = 0;
	filesseen = 0;
	endptr = beginptr = bigbuffer;

	if(MATCHFILE == OFF) {	/* just copy the filenames */
	    if (OneFilePerBlock) {
		for (i=0; i<round(file_num, 8*sizeof(int)); i++) {
		    if (index_vect[i] == 0) continue;
		    for (j=0; j<8*sizeof(int); j++) {
			if (!(index_vect[i] & mask_int[j])) continue;
			start = i*8*sizeof(int) + j;
			end = start + 1;
#if	BG_DEBUG
			fprintf(debug, "start=%d, end=%d\n", start, end);
#endif	/*BG_DEBUG*/
			/*
			 * skip over so many filenames and get the filenames to copy.
			 * NOTE: successive "start"s ALWAYS increase.
			 */

			while(filesseen < start) {
				while(*beginptr != '\n') beginptr ++;
				beginptr ++;	/* skip over '\n' */
				filesseen ++;
			}

			endptr = beginptr;
			while (filesseen < end) {
				while(*endptr != '\n') endptr ++;
				if (endptr == beginptr + 1) goto end_of_loop1;	/* null name of non-existent file */
				*endptr = '\0';
				/* return with all the names you COULD get */
				if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {
					*endptr = '\n';
					fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
					return;
				}
				GFileIndex[GNumfiles] = i*8*sizeof(int) + j;
				*endptr = '\n';
				if (++GNumfiles >= file_num) goto end_files;
			end_of_loop1:
				beginptr = endptr = endptr + 1;	/* skip over '\n' */
				filesseen ++;
			}
		    }
		}
	    } /* one file per block */
	    else {
		/* Just the outer for-loop and initial begin/end values are different: rest is same */
		for (i=0; i<MAX_PARTITION; i++) {
		    if(index_vect[i] > 0) {
			start = p_table[i];
			end = p_table[i+1];
			if (start >= end) continue;
#if	BG_DEBUG
			fprintf(debug, "start=%d, end=%d\n", start, end);
#endif	/*BG_DEBUG*/
			/*
			 * skip over so many filenames and get the filenames to copy.
			 * NOTE: successive "start"s ALWAYS increase.
			 */

			while(filesseen < start) {
				while(*beginptr != '\n') beginptr ++;
				beginptr ++;	/* skip over '\n' */
				filesseen ++;
			}

			endptr = beginptr;
			while (filesseen < end) {
				while(*endptr != '\n') endptr ++;
				if (endptr == beginptr + 1) goto end_of_loop2;	/* null name of non-existent file */
				*endptr = '\0';
				/* return with all the names you COULD get */
				if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {
					*endptr = '\n';
					fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
					return;
				}
				GFileIndex[GNumfiles] = filesseen;
				*endptr = '\n';
				if (++GNumfiles >= file_num) goto end_files;
			end_of_loop2:
				beginptr = endptr = endptr + 1;	/* skip over '\n' */
				filesseen ++;
			}
		    }
		}
	    }
	}
	else {	/* search and copy matched filenames */
	    extern int REGEX, FASTREGEX;	/* agrep global which tells us whether the pattern is a regular expression or not */
	    int myREGEX, myFASTREGEX;
	    if ((dummylen = memagrep_init(argc, argv, MAX_PAT, dummypat)) <= 0) goto end_files;
	    ret = memagrep_search(dummylen, dummypat, dummylen*2, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
	    myREGEX = REGEX; myFASTREGEX = FASTREGEX;

	    if (OneFilePerBlock) {
		for (i=0; i<round(file_num, 8*sizeof(int)); i++) {
		    if (index_vect[i] == 0) continue;
		    for (j=0; j<8*sizeof(int); j++) {
			if (!(index_vect[i] & mask_int[j])) continue;
			start = i*8*sizeof(int) + j;
			end = start + 1;
#if	BG_DEBUG
			fprintf(debug, "start=%d, end=%d\n", start, end);
#endif	/*BG_DEBUG*/
			/*
			 * skip over so many filenames and get the region to search =
			 * beginptr to endptr: NOTE: successive "start"s ALWAYS increase.
			 */

			while(filesseen < start) {
				while(*beginptr != '\n') beginptr ++;
				beginptr ++;	/* skip over '\n' */
				filesseen ++;
			}
			beginptr --;	/* I need '\n' for memory search */

			endptr = beginptr+1;
			while (filesseen < end) {
				while(*endptr != '\n') endptr ++;
				endptr ++;	/* skip over '\n' */
				filesseen ++;
			}
			endptr --;	/* I need '\n' for memory search */
			if (endptr == beginptr + 1) goto end_of_loop3;	/* null name of non-existent file */

#if	BG_DEBUG
			*endptr = '\0';
			fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);
			*endptr = '\n';
#endif	/*BG_DEBUG*/

			/* if file in the partition matches then copy it */
			if (myREGEX || myFASTREGEX) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
			else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
			if (ret > 0) {
#if	BG_DEBUG
			    {
				char c = outputbuffer[agrep_outpointer + 1];
				outputbuffer[agrep_outpointer + 1] = '\0';
				fprintf(debug, "OUTPUTBUFFER=%s\n", outputbuffer);
				outputbuffer[agrep_outpointer + 1] = c;
			    }
#endif	/*BG_DEBUG*/
			    k = prevk = 0;
			    while(k+1<agrep_outpointer) {	/* name of a file cannot have '\n' in it */
				k++;
				if (outputbuffer[k] == '\n') {
					outputbuffer[k] = '\0';
					/* return with all the names you COULD get */
					if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) {
						outputbuffer[k] = '\n';
						fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
						return;
					}
					outputbuffer[k] = '\n';
					GFileIndex[GNumfiles] = i*8*sizeof(int)+j;
					if (++GNumfiles >= file_num) goto end_files;
					k = prevk = k+1;
				}
			    }
			}
			else {
			    index_vect[i] &= ~mask_int[j];	/* remove it from the list: used if ByteLevelIndex */
			}

		    end_of_loop3:
			beginptr = endptr = endptr + 1;
		    }
		}
	    } /* one file per block */
	    else {
		/* Just the outer for-loop and initial begin/end values are different: rest is same */
		for (i=0; i<MAX_PARTITION; i++) {
		    if(index_vect[i] > 0) {
			start = p_table[i];
			end = p_table[i+1];
			if (start >= end) continue;
#if	BG_DEBUG
			fprintf(debug, "start=%d, end=%d\n", start, end);
#endif	/*BG_DEBUG*/
			/*
			 * skip over so many filenames and get the region to search =
			 * beginptr to endptr: NOTE: successive "start"s ALWAYS increase.
			 */

			while(filesseen < start) {
				while(*beginptr != '\n') beginptr ++;
				beginptr ++;	/* skip over '\n' */
				filesseen ++;
			}
			beginptr --;	/* I need '\n' for memory search */

			endptr = beginptr+1;
			while (filesseen < end) {
				while(*endptr != '\n') endptr ++;
				endptr ++;	/* skip over '\n' */
				filesseen ++;
			}
			endptr --;	/* I need '\n' for memory search */
			if (endptr == beginptr + 1) goto end_of_loop4;	/* null name of non-existent file */

#if	BG_DEBUG
			*endptr = '\0';
			fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);
			*endptr = '\n';
#endif	/*BG_DEBUG*/

			/* if file in the partition matches then copy it */
			if (myREGEX || myFASTREGEX) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
			else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
			if (ret > 0) {
			    k = prevk = 0;
			    while(k+1<agrep_outpointer) {	/* name of a file cannot have '\n' in it */
				k++;
				if (outputbuffer[k] == '\n') {
					outputbuffer[k] = '\0';
					/* return with all the names you COULD get */
					if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) {
						outputbuffer[k] = '\n';
						fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
						return;
					}
					outputbuffer[k] = '\n';
					GFileIndex[GNumfiles] = filesseen - 1;	/* not sure here which one but this is never used so ok to fill junk */
					if (++GNumfiles >= file_num) goto end_files;
					k = prevk = k+1;
				}
			    }
			}
			else {
			    index_vect[i] = 0;	/* mask it off */
			}

		    end_of_loop4:
			beginptr = endptr = endptr + 1;
		    }
		}
	    }
	}

end_files:
#if	BG_DEBUG
	fprintf(debug, "The following %d filenames are ON\n", GNumfiles);
	for (i=0; i<GNumfiles; i++)
		fprintf(debug, "\t%s\n", GTextfiles[i]);
#endif	/*BG_DEBUG*/
	return;
}

