#!/bin/sh
# cgroup_throttle_count.sh -- "how much time can be attributed to CPU throttling?"


# reset all the usages
cgroup_reset_stat() {
	echo 0 >/cgroups/cpuacct/fos/cpuacct.usage
	echo 0 >/cgroups/cpuacct/mgmnt/cpuacct.usage
	echo 0 >/cgroups/cpuacct/cpuacct.usage
	date "+%s" > /tmp/fos_cgroup_prev_time

}
#collect output of top for ppc and x86
collect_top() {


if [ "$arch" == "ppc" ]; then
	export TERM=vt100; export TERMINFO=/usr/share/terminfo;LINES=15 top -n 2 -b |tail -n 15 >> 2 >> $cgroup_log
else
	export TERM=vt100; export TERMINFO=/usr/share/terminfo;top -n 2 -b |grep "load average" -A 15|tail -n 15 >> 2 >> $cgroup_log

fi

}
if [ ! -d "/cgroups" ]; then
  exit;
fi
cgroup_log="/var/log/fos_cgroup.log"
tted=0
psaux=0
previous_time=0
current_time=0
arch=`uname -m`


num_cpus=`grep -c processor /proc/cpuinfo`
#assumption is script is being run in 60 secs
current_time=(`date "+%s"`)
if [ -f /tmp/fos_cgroup_prev_time ]; then
	previous_time=(`cat /tmp/fos_cgroup_prev_time`)
	freq=(`expr $current_time - $previous_time`)
else
	freq=60
fi
max_root_usage=$((num_cpus * freq))
if [ $max_root_usage -eq 0 ]; then
  echo "zero max_root_usage exiting: $num_cpus $freq" >> $cgroup_log
  exit;
fi


fos_usage_nsec=`cat "/cgroups/cpuacct/fos/cpuacct.usage"`
mgmnt_usage_nsec=`cat "/cgroups/cpuacct/mgmnt/cpuacct.usage"`
#others_usage_nsec=`cat "/cgroups/cpuacct/others/cpuacct.usage"`
root_usage_nsec=`cat "/cgroups/cpuacct/cpuacct.usage"`

fos_usage_sec=`expr $fos_usage_nsec / 1000000000` 
mgmnt_usage_sec=`expr $mgmnt_usage_nsec /  1000000000`
#others_usage_sec=`expr $others_usage_nsec / 1000000000`
root_usage_sec=`expr $root_usage_nsec / 1000000000`

root_usage_percent1=$(( root_usage_sec * 100 ))
root_usage_percent=`expr $root_usage_percent1 / $max_root_usage`


echo " " >> $cgroup_log
echo "`date`" >> $cgroup_log
echo "current_time:$current_time previous_time:$previous_time freq:$freq" >> $cgroup_log
echo "*************************" >> $cgroup_log
#if any date change, skip the logging to avoid wrong calculation
if [ $previous_time -gt $current_time ]; then
	echo "Time change detected skipping logging" >> $cgroup_log
	# reset all the usages
	cgroup_reset_stat;
	exit;
fi
if [ $freq -gt 100 ]; then
	echo "Time change detected skipping logging" >> $cgroup_log
	# reset all the usages
	cgroup_reset_stat;
	exit
fi
echo "Total cpu usage: $root_usage_percent" >> $cgroup_log

if [ $root_usage_sec -ge $max_root_usage ]; then #cpu is hitting 100%
	max_fos_usage_sec=$(((`cat "/cgroups/cpu/fos/cpu.shares"`) * (freq * num_cpus) ))
	max_mgmnt_usage_sec=$(((`cat "/cgroups/cpu/mgmnt/cpu.shares"`) * (freq * num_cpus) ))
#	max_others_usage_sec=$(((`cat "/cgroups/cpu/others/cpu.shares"`) * (freq * num_cpus) ))

	max_fos_usage_sec=`expr $max_fos_usage_sec / 100`
	max_mgmnt_usage_sec=`expr $max_mgmnt_usage_sec / 100`
#	max_others_usage_sec=`expr $max_others_usage_sec / 100`

	#echo "###### max usage for cgroups $max_fos_usage_sec $max_mgmnt_usage_sec $max_others_usage_sec ########"

	#if the usage is more than the maximum allowed usage, raslog
	if [ $fos_usage_sec -gt  $max_fos_usage_sec ];
	then
		fos_usage_percent1=$(( fos_usage_sec * 100 ))
		fos_usage_percent=`expr $fos_usage_percent1 / $max_root_usage`
		max_fos_usage_percent=`cat "/cgroups/cpu/fos/cpu.shares"`
		group="fos-group" 
		echo "cgroup $group may be getting throttled. Guaranteed cpu usage $max_fos_usage_percent%  actual usage $fos_usage_percent%" >> $cgroup_log
		echo "High CPU processes:" >> $cgroup_log
		collect_top;
		/fabos/cliexec/errlogtest -i SRM-5007 -r -a $group $fos_usage_percent $max_fos_usage_percent 
		echo " " >> $cgroup_log
		tted=1
		psaux=1
	fi
	if [ $mgmnt_usage_sec -gt $max_mgmnt_usage_sec ]; then
		#mgmnt_usage_percent=`expr $mgmnt_usage_sec * 100`
		mgmnt_usage_percent1=$(( mgmnt_usage_sec * 100 ))
		mgmnt_usage_percent=`expr $mgmnt_usage_percent1 / $max_root_usage`
		max_mgmnt_usage_percent=`cat "/cgroups/cpu/mgmnt/cpu.shares"`
		group="management-telemetry-group" 
		echo "cgroup $group may be getting throttled. Guaranteed cpu usage $max_mgmnt_usage_percent% actual usage $mgmnt_usage_percent%" >> $cgroup_log
		if [ $tted -eq 0 ]; then
		echo "High CPU processes:" >> $cgroup_log
		#ps aux --sort=-pcpu|head -n 16 >> $cgroup_log
		psaux=1
		collect_top;
		fi
		/fabos/cliexec/errlogtest -i SRM-5007 -r -a $group $mgmnt_usage_percent $max_mgmnt_usage_percent 
		echo " " >> $cgroup_log
		tted=1
	fi

fi

if [ $tted -eq 1 ]; then
	echo "cgroup accounting data:"
	echo "/cgroups/cpuacct/cpuacct.usage `cat "/cgroups/cpuacct/cpuacct.usage"`" >> $cgroup_log
	echo "/cgroups/cpuacct/fos/cpuacct.usage `cat "/cgroups/cpuacct/fos/cpuacct.usage"`" >> $cgroup_log
	echo "/cgroups/cpuacct/mgmnt/cpuacct.usage `cat "/cgroups/cpuacct/mgmnt/cpuacct.usage"`" >> $cgroup_log
	echo " " >> $cgroup_log
fi
	echo "cpu usage for the interval" >> $cgroup_log
	#echo "$root_usage_nsec $root_usage_sec $max_root_usage" >> $cgroup_log
	#echo "$root_usage_percent1 $root_usage_percent" >> $cgroup_log
	fos_usage_percent1=$(( fos_usage_sec * 100 ))
	fos_usage_percent=`expr $fos_usage_percent1 / $max_root_usage`
	max_fos_usage_percent=`cat "/cgroups/cpu/fos/cpu.shares"`
	group="fos-group" 
	echo "$group actual cpu usage $fos_usage_percent guaranteed usage $max_fos_usage_percent" >> $cgroup_log
	if [ $fos_usage_percent -gt  $max_fos_usage_percent ];
	then
		if [ $psaux -eq 0 ]; then
		echo "High CPU processes: top -b  -n 2 |tail -n 15" >> $cgroup_log
		collect_top;
		psaux=1
		fi
		/fabos/cliexec/errlogtest -i SRM-5009 -r -a $group $fos_usage_percent $max_fos_usage_percent
	fi
	mgmnt_usage_percent1=$(( mgmnt_usage_sec * 100 ))
	mgmnt_usage_percent=`expr $mgmnt_usage_percent1 / $max_root_usage`
	max_mgmnt_usage_percent=`cat "/cgroups/cpu/mgmnt/cpu.shares"`
	group="management-telemetry-group" 
	echo "$group actual cpu usage $mgmnt_usage_percent guaranteed usage $max_mgmnt_usage_percent" >> $cgroup_log
	if [ $mgmnt_usage_percent -gt  $max_mgmnt_usage_percent ];
	then
		if [ $psaux -eq 0 ]; then
		echo "High CPU processes: top -b  -n 2  |tail -n 15" >> $cgroup_log
		collect_top;
		psaux=1
		fi
		/fabos/cliexec/errlogtest -i SRM-5009 -r -a $group $mgmnt_usage_percent $max_mgmnt_usage_percent 
	fi

# reset all the usages
cgroup_reset_stat;



