#!/bin/sh
# awk file for looking at statit output.
# Usage: avg file_name [-[d]c command_field] [-[d]i iterations] header line_num col_num [header line_num col_num]..
# options:
# -[ds]c Field_num: specify field number in command line to sum over.
#       avg will ouput the average over every output with the same
#	command line field value.
#	The first column in the output is the value of the field.
# -[ds]i iterations: specify number of statit outputs over which to average.
#
#       The optional d flag is a debugging aid which prints part of the
#       statit line from which the field was taken, and the cumulative sum.
#	The optional s flag computes the sample standard deviation of the
#	variables and prints them in a row under the mean.
#       Ommiting both the -c and -i options will cause the average to be
#       calculated over every statit output in the specified file.
#	The first column in the output is the output row number.
#
# the variables to be averaged are specfied by a header which will be
# printed above the average, the variable's line number (starting from 1)
# and the field number (starting with 1) in the statit output.
#
# Example Usage:
# Suppose you ran a benchmark with 5 different options and 3 iterations
# per option, and put all the statit output in one file.  avg can be used
# to find the average of the 3 iterations/option and output 1 line of
# stats for each option.
# In this case suppose we wanted to average just the elapsed time and the
# % idle time, the input to avg would be as given below, given that the statit
# output looks like this:
# % cat stat.out
# Hostid: 5100bbd2 Hostname: "firefly" Version: 2.05 Command:
#                       Elapsed Time Statistics
# 60701.84 time (seconds)	100.00 % Start time: Tue Apr 23 13:15:36 1991
#  8328.97 user time		 13.72 %
# 22742.40 nice time		 37.47 %
# 19775.88 system time		 32.58 %
#  9854.59 idle time		 16.23 %
#  .
#  .
#  % avg stat.out -i 3 ElpsTime 3   1   Idle% 7   4
#                  | |    ^     ^   ^    ^    ^   ^
#                   |     |     |   |    |    |   |
#              #iters  header  lin col  hdr  lin col

if [ ! -s $1 ]
then
	echo "Could not find file $1" 1>&2
	exit 1
fi
# send command line inputs as first record to awk script
(echo $*; cat $1) | awk '
BEGIN {
	iter = 0;
	line = -100;
	l = 2;		# default command line fld to look for 1st line number
	iterfield = -1;	# default - no iterations
	comfield = 0;	# default no command line variable
	dflag = 0;	# default debugging is off
	sflag = 0;	# default sd is off
}
NR == 1 {
	if(NF == 1) {
		printf("Usage: avg file_name [-[ds]c command_field] [-[ds]i iterations] header line_num col_num [header line_num col_num]...\n");
		errflag = 1;
		exit;
	}
	numstat = NF - 1;
	if($2 ~ /d/) {			# debug mode
		dflag = 1;
	}
	if($2 ~ /s/) {			# sd mode
		sflag = 1;
	}
	if($2 ~ /c/) {			# watch for change in comfield
		comfield = $3;
		numstat -= 2;
		l = 4;
	}
	else if($2 ~ /i/) {		# sum over iterflag iterations
		iterfield = $3;
		numstat -= 2;
		l = 4;
		users = 1;
	}

	if(numstat%3 != 0) {
		printf("Usage: avg file_name [-[ds]c command_field] [-[ds]i iterations] header line_num col_num [header line_num col_num]...\n");
		errflag = 1;
		exit;
	}
	numstat = numstat/3;
	for(i = 0; i < numstat; i++) {
		m = 3 * i + l;
		j = m + 1;
		k = m + 2;
		linum[i] = $j;
		field[i] = $k;
		headr[i] = $m;
	}
	printf("Config");
	for(i = 0; i < numstat; i++) {
		printf("	%8.8s", headr[i]);
	}
	printf("\n");

}
/Command:/ {
	if(comfield) {
		musers = $comfield;
		if(pusers == "") {
			pusers = musers;
		}
		if(musers != pusers) {
			printf("%-7.7s", pusers);
			for(i = 0; i < numstat; i++) {
				printf("	%8.2f", data[i]/iter);
			}
			printf("\n");
			if(sflag) {
				printf("sd");
				for(i = 0; i < numstat; i++) {
					mean = data[i]/iter;
					var = (1/(iter-1)) * (datasq[i] -\
						iter * mean * mean);
					if(var < 0)
						sd = 0;
					else
						sd = sqrt(var);
					printf("	%8.2f", sd);
					datasq[i] = 0;
				}
				printf("\n");
			}
			for(i = 0; i < numstat; i++) {
				data[i] = 0;
			}
			iter = 0;
			pusers = musers;
		}
	}
	else if(iterfield > 0) {
		if(iterfield == iter) {
			printf("%d", users);
			for(i = 0; i < numstat; i++) {
				printf("	%8.2f", data[i]/iter);
			}
			printf("\n");
			if(sflag) {
				printf("sd");
				for(i = 0; i < numstat; i++) {
					mean = data[i]/iter;
					var = (1/(iter-1)) * (datasq[i] -\
						iter * mean * mean);
					if(var < 0)
						sd = 0;
					else
						sd = sqrt(var);
					printf("	%8.2f", sd);
					datasq[i] = 0;
				}
				printf("\n");
			}
			for(i = 0; i < numstat; i++) {
				data[i] = 0;
			}
			users++;
			iter = 0;
		}
	}
	iter++;
	line = 1;
}

{
	for(i = 0; i < numstat; i++) {
		if(line == linum[i]) {
			j = field[i];
			data[i] += $j;
			if(sflag)
				datasq[i] += $j * $j;
			if(dflag) {
				printf("Line %2d Sum %7.2f > %s\n", line,  data[i], $0);
			}
		}
	}
	line++;
}
END {
	if(errflag)
		exit;
	if(comfield)
		printf("%-7.7s", pusers);
	else
		printf("%d", users);
	for(i = 0; i < numstat; i++) {
		printf("	%8.2f", data[i]/iter);
	}
	printf("\n");
	if(sflag) {
		printf("sd");
		for(i = 0; i < numstat; i++) {
			mean = data[i]/iter;
			var = (1/(iter - 1)) * (datasq[i] - iter * mean * mean);
			if(var < 0)
				sd = 0;
			else
				sd = sqrt(var);
			printf("	%8.2f", sd);
		}
		printf("\n");
	}
}'



