#!/bin/ksh
#
# License:	Please refer to the license file (license.txt) for license and support information
#
# Purpose:	Dump process information from both kernel(kdb) and user space (proctools/dbx).
# Usage:    	See print_usage().
# Last update:  See PDUMPVERSION
# Contact:	Tao Chen (chenttao@cn.ibm.com)

export PDUMPVERSION="Version Jan.18.2015"
export LASTPDUMPVERSION="Version Jan.08.2013"
export LANG=C
export LC_ALL=C
export PATH=/sbin:/usr/sbin:/usr/bin

print_usage()
{
        print "\nUsage: ${0##/*/} [ -k | -d ] <PID>"
	print "       -k: skip kdb"
	print "       -p: call proctools"
	print "       -d: call dbx (implies -p)"
	print "       -l: long mode with more output\n"
}

clean_up()
{
        print -u2 "\nScript is stopped. Partial output is saved in $OFILE.\n"
        exit
}

#################################################################################################### general
#
# This section should only use commands that least likely to hang.
#

get_general_config()
{
    print "\nGetting general environment data ..."
    {
	print "Date:     $(date +%d%h%Y-%H.%M.%S)"
	print "Machine:  $(hostname) - $(uname -M)"
	print "pdump.sh: $PDUMPVERSION"
	print "User:     $(whoami)"

	## if not root, BIT is not a must, since we won't run kdb
	if [[ $(whoami) = "root" ]]; then  
		BIT=$((`bootinfo -K`)) 
		print "Kernel:   $BIT-bit"
	fi

	print ""
	lslpp -Lc \
		bos.up \
		bos.mp \
		bos.mp64 \
		bos.rte.libc \
		bos.rte.libpthreads \
		bos.rte.filesystem \
		bos.rte.lvm \
		bos.adt.debug \
		bos.sysmgt.serv_aid 2>&1 | awk '
		/^bos/ {
			gsub(/:/, " ")
			printf("%-8s - %-s\n", $3, $2)
		}'

	print "\nemgr -l:"
	emgr -l 2>&1 | grep -p ID

	## 'C' and 'TIME' give hint on CPU usage (looping or hanging)
	print "\n# ps -fp $PID"
	ps -fp $PID

	if [[ $(whoami) = "root" ]]; then  
		print "\n# svmon -P $PID"
		svmon -P $PID 2>/dev/null | egrep -v "\-\-"
		print "\n# svmon -G"
		svmon -G 2>/dev/null | egrep -v "\-\-"
	fi

	if [[ -f /usr/bin/proctree ]]; then
		print "\n# proctree $PID" 
		proctree $PID 2>&1
	fi

    } >> $OFILE

}
## END of get_general_config()

#################################################################################################### kdb

run_kdb()
{
	print "Dumping process information from kdb ...\n"

	print "\n# kdb \n" >> $OFILE
	print "\tdumping process slot $PSLT ..."

	BK="\n\n\n\n\n"

	## 08/08/2006: tpid reports wrong list of threads, use th -p pslot instead
	printf " th -p %d $BK proc %d $BK lle -v -p %d\n" $PSLT $PSLT $PSLT \
	| kdb | sed -n -e '/^(.*)>/,$p' >> $OFILE 

	> pdump.tslot
	awk '
	/^pvthread|^thread/ {
		sub(/!/, " "); 
		sub(/>/, " "); 
		if ($4 != "ZOMB") 
			print $2 >> "pdump.tslot"
	}' < $OFILE

	if [[ ! -s pdump.tslot ]]; then
		print -u2 "Error getting thread list. Skip other kdb commands."
		return
	fi

	# USER is a proc structure shared by all threads, 
	# in kdb though, 'user' command actually prints u-block of each threads.
	# ( USER = u-block - uthread ) 
	# so only the first thread's full user output is saved here.

	# buld a command list
	print "\tbuilding kdb commands ..."
	COMMS=""

	FIRSTT="true"
	for TSLT in `cat pdump.tslot`
	do 
		print "\tthread slot $TSLT ..."
		if [[ $FIRSTT = "true" ]]; then
			COMMS=$COMMS"$BK th $TSLT $BK user $TSLT $BK f $TSLT $BK f -v $TSLT $BK sw $TSLT $BK mst $BK dr iar $BK sr64 $BK segst64 $BK u -ad"
			########## 2008/08/21: add file output ##########
			filecnt=0
			echo "u -f $TSLT" | kdb | awk '/fd .* fp/ {gsub(/fp\.\./, "", $3); print $3}' | sort -u | while read fp
			do
				let filecnt=$filecnt+1
				if [[ $filecnt -gt 20 ]]; then
					echo "\n... warning: open file count greater than 20, skip ...\n" >> $OFILE
					break
				fi
				COMMS=$COMMS"$BK file $fp"
			done

			FIRSTT="false"
		else
			COMMS=$COMMS"$BK th $TSLT $BK user -ut $TSLT $BK f $TSLT $BK f -v $TSLT $BK sw $TSLT $BK mst"
		fi
	done
	rm pdump.tslot >/dev/null 2>&1 

	############################################## lock in kdb ##############################################
	#
	#
	# for each thread in this section, f and f -v output are collected.
	# f is for easy copy/paste into pmr/email
	# f -v is more comprehensive than "set 10; set 18".

	# Get lock info if any thread is waiting on lock
	# Sample:
	# pvthread+01F300  499 harmad   SLEEP 1F3001 03C   9  0 F10000E33E1D8000 slist_table+000800 
	# pvthread+020900  521 lspv     SLEEP 209061 03C   2  0 082522C0         slist_table+000E20 

	# select threads based on unique name and lock address,
	# because if two threads have the same name and lock address,
	# chances are they are in the same stack.
 	# "uniq -f1" means ignore one field (tslot) when dermine uniqueness.

	COMMS=$COMMS"$BK lq"
	COMMS=$COMMS"$BK th -w WLOCK"
	COMMS=$COMMS"$BK th -w WSLOCK"

	echo "th -w WLOCK \n th -w WSLOCK"  | kdb | grep ^pvthread | sort -k9 -k3 >  pdump.lock
	awk '{ print $2, $3, $9 }' < pdump.lock | uniq -f1 | while read tslot tname lkaddr
	do 
		## clk command covers slk just fine.
		COMMS=$COMMS"$BK clk $lkaddr $BK f $tslot $BK f -v $tslot"
	done
	rm pdump.lock >/dev/null 2>&1

	# Get stack for those holding a lock (in earlier version we only check SLEEP, but now for all but "kdb_64".
	# Sample:
	# pvthread+018500  389 scopeux  SLEEP 185015 03C   8 0 F10000E33E1D9538 
	# pvthread+01F300  499 harmad   SLEEP 1F3001 03C   9 0 F10000E33E1D8000 slist_table+000800
	# pvthread+02EB00  747 sas      SLEEP 2EB04F 064  13 0 vmmswpft+77793160
	# pvthread+026D00  621!kdb_64   RUN   26D0067 061   24         0

	COMMS=$COMMS"$BK th -lk"  
	echo "th -lk" | kdb | awk '/^pvthread|^thread/ { sub(/!/, " "); sub(/>/, " "); if ($3 != "kdb_64") print $2}' | while read tslot 
	do
		COMMS=$COMMS"$BK f $tslot $BK f -v $tslot"
	done
	
	## 01/25/2006: dla may generate endless "Out of lock descriptors" error
	# COMMS=$COMMS"$BK dla"

	#
	#
	############################################## end of lock ##############################################

	# I/O
	# mounted filesystem
	COMMS=$COMMS"$BK vfs"
	COMMS=$COMMS"$BK pdt *"
	COMMS=$COMMS"$BK th -w WPGIN"

	# runqueue
	COMMS=$COMMS"$BK rq"
	COMMS=$COMMS"$BK rqi"
	COMMS=$COMMS"$BK th -r"

	# kernel extension: 
	COMMS=$COMMS"$BK lke"

	if [[ -n $LONGMODE ]]
	then

		# library 
		COMMS=$COMMS"$BK lle -l32"
		COMMS=$COMMS"$BK lle -l64"

		# IPC :
		COMMS=$COMMS"$BK ipc 1 1"
		COMMS=$COMMS"$BK ipc 2 1"
		COMMS=$COMMS"$BK ipc 3 1"
	fi

	# send commands to kdb
	print "\texecuting kdb commands ..."
	print $COMMS | kdb | sed -n -e '/^(.*)>/,$p' >> $OFILE



} 
## END of run_kdb()

#################################################################################################### proc

run_proctools()
{
	print "\nDumping process information with proc tools ...\n"
	
	print "\n# proccred $PID" >> $OFILE
	proccred $PID 	>> $OFILE 2>&1

	print "\n# procfiles $PID" >> $OFILE
	procfiles $PID 	>> $OFILE 2>&1

	print "\n# procflags $PID" >> $OFILE
	procflags $PID 	>> $OFILE 2>&1

	print "\n# procldd $PID" >> $OFILE
	procldd $PID 	>> $OFILE 2>&1

	print "\n# procmap $PID" >> $OFILE
	procmap  $PID 	>> $OFILE 2>&1

	print "\n# procsig $PID" >> $OFILE
	procsig $PID 	>> $OFILE 2>&1

	print "\n# procstack $PID" >> $OFILE
	procstack $PID 	>> $OFILE 2>&1

	# now in general
	#print "\n# proctree $PID" >> $OFILE
	#proctree $PID 	>> $OFILE 2>&1

	print "\n# procwdx $PID" >> $OFILE
	procwdx $PID 	>> $OFILE 2>&1
}

#################################################################################################### dbx

run_dbx()
{
	print "\nDumping process information from dbx ...\n"

        DBXCMD="/usr/bin/dbx -a $PID"

        print "\n# dbx -a $PID \n" >> $OFILE

	## Get the current thread's output first
	print "\n p '(dbx) where' \n where \
	      \n p '(dbx) x' \n x     \
	      \n p '(dbx) (\$stkp)/200' \n (\$stkp)/200 \
	      \n p '(dbx) map' \n map   \
	      \n p '(dbx) p __n_pthreads' \n p __n_pthreads \
	      \n p '(dbx) p __multi_threaded' \n p __multi_threaded \
	      \n p '(dbx) mutex' \n mutex \
	      \n p '(dbx) condition' \n condition \
	      \n p '(dbx) rwlock' \n rwlock \
	      \n p '(dbx) dump .' \n dump . \
	      \n p '(dbx) th' \n th \
	      \n p '(dbx) detach' \n detach" | $DBXCMD >> $OFILE 2>&1

	if [[ $? -ne 0 ]]; then
		print -u "Can not dbx attach to the process. Skip dbx."
		return 
	fi

	sed -n -e '/(dbx) th/,$p' $OFILE | grep "k-tid" >/dev/null 2>&1 
	## multi-threaded
	if [[ $? -eq 0 ]] 
	then
		## build a command list
		BRK='p "."'
		SECT=">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
		COMMS="$BRK\n"

		COMMS=$COMMS"p '$SECT thread info'\n$BRK\n thread info\n$BRK\n"

		## Since thread number is not necessarily continuous,
		## we have to get the exact thread number.

		sed -n -e '/(dbx) th/,$p' $OFILE | awk '/^.\$t[0-9]/ {print $1}' | while read TPT
		do
			PT=${TPT#*t}

			print "\tdumping tid $PT ..."
			COMMS=$COMMS"p '$SECT thread current $PT' \n$BRK\n thread current $PT\n"
			COMMS=$COMMS"p '$SECT x $PT' \n$BRK\n x \n$BRK\n"
			COMMS=$COMMS"p '$SECT where $PT' \n$BRK\n where \n$BRK\n"
			COMMS=$COMMS"p '$SECT (\$stkp)/100 $PT' \n$BRK\n (\$stkp)/100 \n$BRK\n"
		done
		COMMS=$COMMS"p '(dbx) detach' \n detach"

		print "\n# dbx -a $PID \n" >> $OFILE
		print $COMMS | $DBXCMD 2>/dev/null >> $OFILE
	fi

	print "\tlisting object files ..."
	print "\n + List of object files: \n" >> $OFILE
	awk '/Object name: / { print $NF }' $OFILE | /usr/bin/sort | /usr/bin/uniq | while read obj
	do
		## the main program may not locate in the current directory
		## so ls -l may return error > /dev/null
		realobj=$obj
		/bin/ls -l $realobj >> $OFILE 2>/dev/null
		/bin/ls -l $realobj 2>/dev/null | /usr/bin/grep ^lrwx >/dev/null 2>&1
		while [[ $? -eq 0 ]]
		do
			realobj=`/bin/ls -l $realobj 2>/dev/null | awk '{print $NF}'`
			/bin/ls -l $realobj >> $OFILE  2>/dev/null
			/bin/ls -l $realobj 2>/dev/null | /usr/bin/grep ^lrwx >/dev/null 2>&1
		done
	done
} 
## END of run_dbx

#################################################################################################### main

while getopts :kdplh flag ; do
        case $flag in
                k)      NOKDB=1;;
                d)      USEDBX=1;;
                p)      PROCTOOL=1;;
		l)	LONGMODE=1;;
                h)      print_usage
                        return 0;;
                \?)     print -u2 "\nInvalid parameter"
                        print_usage
                        return 1;;
        esac
done
shift $(($OPTIND -1))

## check tools
{
	## check permission to run kdb
	if [[ -z $NOKDB && $(whoami) != root ]]; then
		print -u2 "\n'root' authority is required for kdb (use '-k' to skip kdb).\n" 
		return 1
	fi

	## check dbx tool
	if [[ -n $USEDBX && (! -f /usr/bin/dbx) ]]; then
		print -u2 "/usr/bin/dbx doesn't exist. Install bos.adt.debug or use '-d' to skip dbx."
		return 1
	fi
}

## validate PID
{
	## need one parameter
	if [[ $# -ne 1 ]]; then
		print_usage
		return 1
	fi

	## numeric?
	if [[ ${1##+([0-9])} != "" ]] ; then
		print -u2 "\n$1 is not a PID"
		print_usage
		return 1
	fi

	## existing PID?
	/bin/ps -p $1 > /dev/null 2>&1
	if [[ $? -eq 1 ]]; then
		print -u2 "\nPID $1 doesn't exist.\n"
		return 1
	fi

	PID=$1
}


## create output file
{
	OFILE="pdump.$(/bin/ps -p $PID -ocomm=).$PID.`date +%d%h%Y-%H.%M.%S`.out"
	> $OFILE
	if [[ $? -ne 0 ]]; then
		print -u2 "\nCannot create output file in the current directory. Please check permission.\n"
		return 1
	fi
}

trap clean_up TERM INT

## collect output
{
	get_general_config

	if [[ -z $NOKDB ]]; then

		## from PID to PSLT
                osl=$(oslevel | cut -c1-3)
                if [[ $BIT -eq 32 ]]
                then
                        let PSLT=$PID/256
                else
                        if [[ $osl = "5.1" || $osl = "4.3" ]]
                        then
                                let PSLT=$PID/8192
                        else
				## PSLT calculation changed from 6.1.5.0 due to feature 716192
				let TL=`lslpp -l bos.mp64 | awk '/mp64/ {print $2; exit}' | cut -d'.' -f3`
				if [[ $osl = "5.3" || ($osl = "6.1" && $TL -lt 5) ]]; then
					let PSLT=$((`echo $PID/256%16*16384+$PID/4096 | bc`))
				else
					let PSLT=$((`echo $PID/256%256*1024+$PID/65536 | bc`))
				fi
                        fi
                fi
                # init is an exception (1/$DIVISOR = 0)
                if [[ $PID -eq 1 ]]; then
                        PSLT=1
                fi

		run_kdb
	fi

	if [[ -f /usr/bin/procstack ]]; then
		if [[ -n $PROCTOOL || -n $USEDBX ]]; then
			run_proctools ## new in 5.2+
		fi
	fi

	if [[ -n $USEDBX ]]; then
		run_dbx
	fi

	# extra data
	{
		print "\n# ps -mp $PID -o THREAD\n"
		ps -mp $PID -o THREAD
		print "\n# ps auxeww $PID\n"
		ps auxeww $PID
		print "\n# ps -efk \n" 
		ps -efk 

		if [[ -n $LONGMODE ]]; then
			print "\n# ipcs -a \n"
			ipcs -a
			if [[ `oslevel` = "5.3.0.0" ]]; then
				print "\n# genld -ld | egrep -p "^Proc_pid:[[:blank:]]+$PID[[:blank:]]"\n"
				genld -ld | egrep -p "^Proc_pid:[[:blank:]]+$PID[[:blank:]]"
				print "\n# genkld -d \n"
				genkld -d
			else
				print "\n# genld -l | egrep -p "^Proc_pid:[[:blank:]]+$PID[[:blank:]]"\n"
				genld -l | egrep -p "^Proc_pid:[[:blank:]]+$PID[[:blank:]]"
			fi
			
			## network stuff
			netstat -Aan
		fi
	} >> $OFILE
}

print "\nDone.\nOutput file is $OFILE\n"
/bin/ls -l $OFILE 
print

## The End
## Change Log:
## 07.31.2007 - move some long output to $LONGMODE only, such as ipc, sys loader, netstat, etc., to reduce default output file size
##            - add dr iar & segment register commands
## 08.21.2008 - add file output for each ufd entry
## 02.02.2009 - add svmon -G output
## 02.25.2011 - Srinivasa Rao: Effective from 61TL05 the calculations for PROCSLOT has changed due to enhanced affinity feature 716192.
## 03.02.2011 - Jun Kuwahara: $klvl >= "6.1.5.0" does not work, -ge does, 
##            - change code to be more strict: '=' for oslevel string, '-lt' for TL arithmetic.
## 03.29.2011 - a mistake in PID -> pslot was caught by Shang Li
## 01.08.2013 - Yuta Chiba: kdb doesn't always run on cpu 0, change prompt match from ^(0) to ^(.*)>
## 01.18.2015 - add 'WSLOCK' since WLOCK no longer covers WSLOCK; add stacks for non-SLEEP lock owners
