#!/usr/bin/perl
#
# Copyright 2002 VMware, Inc.  All rights reserved.
#
# Summary log analyzer

use strict;

use POSIX qw(strftime);

#Constants
my $logFileName = "vmksummary";
my $logFileDir = "vmksummary.d";
my $logRoot = "/var/log/";
my $maxRotatedLogs = 36;
my %prettyNames = ( 'unload'=>"VMkernel unloaded", 
                    'start'=>'server booting', 
                    'hb:vmk not loaded'=>'VMkernel not loaded',
                    'hb:running linux'=> 'server booted into linux',
                    'reboot'=>'server rebooting',
                    'halt'=>'server halted',);
my $hbInterval = 3600;     #how far apart heartbeats should be in seconds
my $hbErrorMargin = 0.05;  #what percentage difference is allowed between 
                           #heartbeat times and timestamps.

my $maxCauseLength = 80;   #max number of characters to print for a cause

#Globals
my @logData;
my $malformedLines = 0;
my @states;
my $causeNeeded;
my %stats;
my %muiOperationRequested;
my %warnings;

# AddWarnings
#
# prints a message to stderr, and saves text so that it
# can later be appended to html report.
sub AddWarning($) 
{
   my $str = shift;

   print stderr "Warning: $str\n";
   if (defined $warnings{$str}) {
      $warnings{$str}++;
   } else {
      $warnings{$str} = 1;
   }
}

#
# processLine()
#
# Parses a single line of the summary log, and stores the info into the @logData
# array.
#
sub processLine($$)
{
   my $line = shift;
   my $file = shift;
   my %entry;

   chomp($line);

   #read time stamp
   if ($line =~ m/^(\w*\s+\d+\s+\d+:\d+:\d+)\s+(.*)$/) {
      my $date = $1;
      $line = $2;
      if ($line =~ /^([^ ]+)\s*(\w*):\s+\((\d+)\)\s+(.*)$/) {
         $entry{time} = $3;
         $line = $4;
      } elsif ($line =~ m/^([^ ]+)\s*(\w*):\s+(.*)$/) {
         # old style -- no longer supported (no way to determine year)
         next;
         $line = $3;
         $entry{time} = `date -d "$date" +"\%s"`;    #this is lame and slow
         chomp($entry{time});
      } else {
         goto malformed;
      }
      $entry{hostname} = $1;
      $entry{prog} = $2;
      if ($line =~ m/vmk loaded,\s+(\d*\.?\d*),\s+(\d*\.?\d*)(.*)\s*$/) {
         $entry{vmkernel} = 1;
         $entry{vmnix} = 1;
         $entry{uptime} = $1;
         $entry{vmk_uptime} = $2;
         my @opt_stats = split(/,/,$3);
         $entry{num_vms} = @opt_stats[1];
         $entry{vmk_build} = @opt_stats[2];
         $entry{vmx_build} = @opt_stats[3];
         $entry{swap_used} = @opt_stats[4];
         $entry{memHog_1} = @opt_stats[5];
         $entry{memHog_2} = @opt_stats[6];
         $entry{memHog_3} = @opt_stats[7];
         $entry{type} = "hb";
#         AddWarning("Gots $entry{num_vms}, $entry{vmk_build}, $entry{swap_used}, $entry{memHog_3} for $line");
      }elsif ($line =~ m/vmk not loaded,\s+(\d*\.?\d*),\s+(.*)\s*$/) {
         $entry{vmkernel} = 0;
         $entry{uptime} = $1;
         $entry{vmnix} = ($2 =~ m/vmnix/);
         $entry{type} = "hb";
      }elsif ($line =~ m/Rebooting system/) {
         $entry{type} = "reboot";
      }elsif ($line =~ m/Starting system/) {
         $entry{type} = "start";
      }elsif ($line =~ m/Halting system/) {
         $entry{type} = "halt";
      }elsif ($line =~ m/unloaded VMkernel/i) {
         $entry{type} = "unload";
      }elsif ($line =~ m/loaded VMkernel/i) {
         $entry{type} = "load";
      }elsif ($line =~ m/UserAction\s+(.*)/) {
         $entry{mui_text} = $1;
         chomp($entry{mui_text});
         $entry{type} = "mui_op";
      }elsif ($line =~ m/VMkernel error:/i) {
         if ($line =~ m/\@BlueScreen:\s*(.*)$/) {
            $entry{psod_text} = "VMkernel error: $1";
         } else {
            $entry{psod_text} = "VMkernel error: Unknown (no core file)";
         }
         $entry{type} = "vmkerror";
      }elsif ($line =~ m/VMkernel error/i) {
         $entry{psod_text} = "VMkernel error";
         $entry{type} = "vmkerror";
      }else {
         $entry{type} = "unknown";
         $entry{line} = $line;
         AddWarning("Couldn't parse entry: $date:[$line] in $file");
      }
      push (@logData, \%entry);
      return;
   }
malformed:
#   print "malformned\n";
   $malformedLines++;
   AddWarning("Malformed line in $file ignored.");
}

#
# readOneLog
#
# Adds the contents of the specified log into the @logData array
#
# Log files are ordered oldest to newest entry.  This ordering is
# preserverd when adding elements to @logData.
#
sub readOneLog($) 
{
   my $file = shift;
   local *F;
   print stderr "Reading log $file\n";

   if (!open(F, "<$file")) {
      return 0;
   }
   while(<F>) {
      processLine($_, $file);
   }
   close(F);
   return 1;
}

#
# readAllLogs()
#
# Reads in all of the summary logs.  It is assumed that
# the current log is in $logRoot/$logFileName, and that
# the older logs are in $logRoot/$logFileDir/$logFileName.X
# (where X = [1..36]).
#
sub readAllLogs()
{
   my $i;
   my $found = 0;
   my $success = 0;

   #the log with the highest suffix has the oldest
   #data -- By reading logs in highest to lowest order
   #@logData will hold the lines in the same order
   #they were  logged.
   for ($i = $maxRotatedLogs; $i > 0; $i--) {
      my $fname = "$logRoot/$logFileDir/$logFileName.$i";
      if (-e $fname) {
         $found = 1;
         if (readOneLog($fname) ) {
            $success = 1;
         }
      }elsif ($found) {
         AddWarning("Ignoring missing log file: $fname\n");
      }
   }
   if (readOneLog($logRoot . $logFileName)) {
      $success = 1;
   }

   return $success;
}

#
# dumpRawLog()
#
# debuggin fn for verifing log files were correctly read
#
sub dumpRawLog()
{
   foreach (@logData) {
      print "$_->{time} $_->{type} ";
      if ($_->{vmkernel}) {
         print "VMkernel loaded";
      }
      print "\n";
   }
}

#
# printDownTimeHtml()
#
# Prints out the the list of downtime events of type $type
# (sched vs unsched), and the causes of these events.
#
sub printDownTimeHtml($) 
{
   my $type = shift;
   my @schedArr = sort { $stats{$type}{cause}{$b}{percent} <=> 
                        $stats{$type}{cause}{$a}{percent} } 
                        keys %{$stats{$type}{cause}};
   my $report;

   $report .= "<ul>\n";
   foreach my $event (@schedArr) {
      my $seconds = $stats{$type}{cause}{$event}{duration};
      my $printableName = $prettyNames{$event} || $event;
      if (length($printableName) > $maxCauseLength) {
         $printableName = substr($printableName, 0, $maxCauseLength) . "...";
      }
      next if ($seconds == 0);
      #print "<li> $stats{$type}{cause}{$event}{percent}% (" . printTime($seconds). ") $printableName, ";
      $report .= "<li> $stats{$type}{cause}{$event}{percent}% $printableName ";
      if ($stats{$type}{cause}{$event}{count} == 1) {
         $report .= "(1 instance)\n";
      } else {
         $report .= "($stats{$type}{cause}{$event}{count} instances)\n";
      }
   }
   $report .= "</ul>\n";
   return $report;
}

# 
# generateReport()
#
# generate a fancy looking report
#
sub generateReport($)
{
   my $host = `hostname`;
   my $text = shift;
   my $startDate;
   my $endDate;
   my $report;

   chomp($host);

   $startDate= strftime "%b %e, %Y", localtime($logData[0]->{time});
   $endDate= strftime "%b %e, %Y", localtime($logData[$#logData]->{time});
   $report .= "<h1 align=\"center\">Availability Report for $host</h1>\n";
   $report .= "<h2 align=\"center\">$startDate - $endDate</h2>\n\n";

   $report .= "<p><b>Availability:</b> $stats{Availability}\n";
   $report .= "<p><b>Total time:</b> " . printTime($stats{TotalTime}) . "\n";
   $report .= "<ul>\n<li><b>Uptime:</b> " . printTime($stats{UpTime}) . "\n";
   $report .= "<li><b>Downtime:</b> " . printTime($stats{DownTime}) ."\n</ul>\n";

   $report .= "<p>Note: Downtime is any time the system isn't capable of running \n" .
         "Virtual Machines.  This includes reboots, crashes, configuration and running linux\n\n";

   if ($stats{DownTime} > 0) {
      $report .= "<hr><h3>Downtime Analysis:</h3>\n";
      $report .= "<h4> $stats{DownPercent} (" . printTime($stats{DownTime}) . ") downtime caused by:</h4>\n";
      $report .= "<ul><li>     $stats{scheduled}{percent}% (" . printTime($stats{scheduled}{totalDuration}) . 
            ") scheduled downtime\n";
      $report .= "<li>     $stats{unscheduled}{percent}% (" . printTime($stats{unscheduled}{totalDuration}) . 
            ") unscheduled downtime\n</ul>\n\n";


      if ($stats{scheduled}{totalDuration} > 0) {
         $report .= "<h4>Reasons for scheduled downtime:</h4>\n";
         $report .= printDownTimeHtml('scheduled');
      }

      if ($stats{unscheduled}{totalDuration} > 0) {
         $report .= "<h4>Reasons for unscheduled downtime:</h4>\n";
         $report .= printDownTimeHtml('unscheduled');
      }
   }

   $report .= "\n";

   $report .= "<hr><h3>Stats:</h3>\n";
   $report .= "   <p><b>Current uptime:</b> $stats{curUptime}\n";
   $report .= "   <br><b>Longest uptime:</b> $stats{longestUptime}\n";
   $report .= "   <br><b>Shortest uptime:</b> $stats{shortestUptime}\n";
   $report .= "   <br><b>Average uptime:</b> $stats{avgUptime}\n\n";

   $report .= "   <p><b>Longest downtime:</b> $stats{longestDowntime}\n";
   $report .= "   <br><b>Shortest downtime:</b> $stats{shortestDowntime}\n";
   $report .= "   <br><b>Average downtime:</b> $stats{avgDowntime}\n\n";

   $report .= "   <p><b>Maximum VMs Sampled:</b> $stats{max_vms}\n";
   $report .= "   <br><b>Average VMs Sampled:</b> $stats{avg_vms}\n\n";
  
   #Can only find out server info if the vmkernel is loaded.
   if (vmkernelLoaded()) {
      $report .= "<hr><h3>Server Information:</h3>";
      $report .= "   <p><b>Number of CPUs:</b> $stats{cpus}, $stats{cputype}\n";
      $report .= "   <p><b>Installed Memory:</b> $stats{memory}\n";
      if (defined $stats{build}) {
         $report .= "   <p><b>Current Build:</b> $stats{build}\n";
      }
   }


   if (keys(%warnings) > 0) {
      $report .= "<hr><h3>Warnings</h3>\n";
      $report .= "<ul>\n";
      foreach my $key (keys %warnings) {
         $report .= "<li> $key ($warnings{$key})\n";
      }
      $report .= "\n</ul>\n";
   }

   $report .= "<br><br><p align=\"right\">Report generated " . `date`;

   if ($text) {
      $report =~ s/(<[^>]*>)//g;
   }
   print $report;
}

#
# vmkernelLoaded
#
# returns true if the vmkernel is loaded
#
sub vmkernelLoaded()
{
  return -e '/proc/vmware/vm';
}

#
# addState
#
#  Adds a new state to the states array.
sub addState($$$$)
{
   my ($start, $end, $name, $cause) = @_;
   push(@states, {startIdx=>$start, endIdx=>$end, name=>$name, cause=>$cause});
}

#
# State transition functions.  
# Every log entry type should have a function defined below.
#
# These functions should take the current state and the line in the log,
# as parameters, and return the new state, and optionally the reason
# for moving to this new state.
#
my %actions = (
   hb => \&processHeartBeat,
   load => \&processVMKLoad,
   unload => \&processVMKUnload,
   vmkerror => \&processVMKError,
   reboot => \&processReboot,
   halt => \&processHalt,
   start => \&processStart,
   mui_op => \&processMUI_Op,
);

sub processHeartBeat($$) {
  my ($curState, $line) = @_;

  if ($logData[$line]->{vmkernel}) {
     if ($logData[$line]->{num_vms} > $stats{max_vms}) {
        $stats{max_vms} = $logData[$line]->{num_vms};
     }
     $stats{total_vm_samples} += $logData[$line]->{num_vms};
     $stats{vm_sample_count}++;
     $stats{build} = $logData[$line]->{vmk_build};
     return 'vmkernel', ('hb:vmk loaded');
  }

  if ($logData[$line]->{vmnix}) {
     return ('vmnix', 'hb:vmk not loaded');
  }

  return ('linux', 'hb:running linux');
}

sub processVMKLoad($$) {
  my ($curState, $line) = @_;
  return 'vmkernel';
}

sub processVMKUnload($$) {
  my ($curState, $line) = @_;
  return 'vmnix'
}

#
# processVMKError
#
# On startup, we check for core dump files.  If a new one exists, 
# then a message gets placed in the summary log.  The causeNeeded
# global lets us go back and fill in the cause  of the most recent 
# 'down' state with this information.
#
sub processVMKError($$) {
  my ($curState, $line) = @_;

  if (defined ($causeNeeded)) {
     $causeNeeded->{cause} = $logData[$line]->{psod_text};
     $causeNeeded = undef;
  }
  return $curState;
}

sub processReboot($$) {
  my ($curState, $line) = @_;
  my $cause = undef;
  my $type = $logData[$line]{type};

  # check to see if this op was generated through the mui
  if ($type =~ m/$muiOperationRequested{op}/) {
     $cause = $muiOperationRequested{text};
     undef(%muiOperationRequested);
  }
  return ('off', $cause);
}

sub processHalt($$) {
  my ($curState, $line) = @_;
  my $cause = undef;
  my $type = $logData[$line]{type};

  # check to see if this op was generated through the mui
  if ($type =~ m/$muiOperationRequested{op}/) {
     $cause = $muiOperationRequested{text};
     undef(%muiOperationRequested);
  }
  return ('off', $cause);
}

#
# processMUI_Op()
#
# If a reset / halt is done through the mui it is logged.
# Cache the operation and the reason, so that when we later
# see the actual reboot / halt we know what caused it.
#
sub processMUI_Op($$) {
  my ($curState, $line) = @_;
  if ($logData[$line]->{mui_text} =~ m/(.*):(.*)$/) {
     $muiOperationRequested{op} = lc($1);
     $muiOperationRequested{text} = "$1 for $2";
  } else {
     $muiOperationRequested{op} = lc($logData[$line]->{mui_text});
     $muiOperationRequested{text} = "$muiOperationRequested{op} via VMware Management Interface";
  }
  return $curState;
}

#
# processStart
#
# The start message gets logged when the server boots.  If we see
# this message, but aren't currently in the off state, then the
# system must have crashed, been reset, or powered off.  
# We create a new state here, and setup $causeNeeded so that
# if a coredump is found the cause of this event will be changed
# from "unknown" to "vmkernel error".
#
sub processStart($$) {
  my ($curState, $line) = @_;

  $causeNeeded = undef;

  # clear out any cached mui op (should only matter if the server
  # died between the message from the mui being logged and the
  # actual reboot / halt).
  undef(%muiOperationRequested);


  if ($curState eq 'vmkernel') {
     # we just crashed / reset / power offed
     #must assume crash started immediately after last timestamp in log.
     my $start = $states[$#states]{endIdx};
     addState($start, $start, 'down', 'unknown (powerfail / reset?)');
     $causeNeeded = $states[$#states];
  }
  return 'on';
}


#
# analyzeLog()
#
# Steps through the log line by line and determines what
# state the server is in.  The start, and end indexes of the
# state, as well as what caused the state change are stored
# in the @states array.
#
# Possible states are:
#   vmkernel:   vmkernel loaded
#   vmnix :     running vmnix kernel (vmkernel not loaded)
#   linux:      booted into linux
#   off:        server is off (result of shutdown / reboot)
#   on:         server is on (either vmnix or linux)
#   down:       server has crashed / powerfailure, hard reset.
# 
sub analyzeLog()
{
   my $i;
   my $curState = "unknown";
   my $newState;
   my %entry = (startIdx=>0);
   addState(0, 0, 'unknown', 'unknown');

   print stderr "Analyzing data ...\n";
   for ($i = 0; $i < @logData; $i++) {
      my $type = $logData[$i]{type};
      my $fn = $actions{$type};
      if ( not defined($fn) ) {
         print stderr "Warning unknown type $type: '$logData[$i]{line}'\n";
         next;
      }
      my $cause;
      ($newState , $cause) = &$fn($curState, $i, $type);
      $cause ||= $type;
      $states[$#states]{endIdx} = $i;
      if ($newState ne $curState) {
         addState($i, $i, $newState, $cause);
         $curState = $newState;
      }
   }
}

# helper fn for printTime
sub plural($$$$) {
   my ($value, $singular, $plural, $null) = @_;
   return $null if ($value == 0);
   return "$value $singular" if ($value == 1);
   return "$value $plural";
}

#
# printTime()
#
# Takes paramter in seconds, and a returns a string representing
# that amount of time in days, hours, minutes, seconds.  It tries to be
# clever in how it does this.  (ex: if the time in question is several
# days then we probably don't care about the seconds field).
#
sub printTime($)
{
   my $r = shift;
   my $days;
   my $minutes;
   my $hours;
   my $time;
   my $neg = "";

   if ($r < 0) {
      $neg = "-";
      $r = -$r;
   }

   $days = int($r / (60 * 60 * 24));
   $r %=  (60 * 60 * 24);
   $hours = int ($r / (60 * 60));
   $r %= (60 * 60);
   $minutes = int($r / (60));

   if ($days == 0 && $hours == 0 && $minutes == 0) {
     return $neg . plural(int($r), "second", "seconds", "0");
   }

   if ($days == 0 && $hours == 0) {
      return $neg . plural($minutes, "minute", "minutes", "0");
   }

   if ($days == 0) {
     return $neg . sprintf("%.1f hours", $hours + $minutes / 60.0);
   }
   return $neg . plural($days, "day, ", "days, ","") . plural($hours, "hour", "hours", "");
}

#
# dumpStates()
#
# Dumps out the raw states, their duration and reason for entering the state.
#
sub dumpStates()
{
   print "Dumping States:\n";

   printf(stderr "<br>%-10s %10s %10s %15s    cause\n","State","Start Idx", "End Idx", "Duration(hh:mm)");
   foreach my $state (@states) {
      my $duration = ($logData[$state->{endIdx}]->{time} - $logData[$state->{startIdx}]->{time});
      printf(stderr "%-10s %10d %10d %15s    $state->{cause}\n",
         $state->{name}, $state->{startIdx}, $state->{endIdx}, printTime($duration));
   }
}


#
# sum()
#
# Sums up the time spend in a states whose name match the parameter $match
sub sum($)
{
   my $match = shift;
   my $state;
   my $sum = 0;

   foreach $state (@states) {
     if ($state->{name} =~ m/$match/) {
        $sum += $state->{duration};
     }
   }
   return $sum;
}


#
# verifyDuration
#
# tries to figure out how long a state lasted based on the uptime
# numbers recorded in the heartbeat.  Trusts the vmkernel uptime the most,
# followed by the linux uptimes.  If the uptime from the heartbeats roughly
# matches the timestamp duration, use the timestamp value -- it is potentially
# more up to date  (heartbeats only come every $hbInterval).
#
# 2 reasons not to always use the uptime as reported by the heart beats:
#   1) inaccurate for small amounts of time.  (heartbeats only come every hour)
#   2) inaccurate system clock, and user has configured ntp or something.
sub verifyDuration($$)
{
   my ($state, $duration) = @_;
   my $numHbs = 0;
   my $lastHb;
   my $firstHb;
   my $durFromHb;
   my $errorFactor;

   for (my $i = $state->{startIdx}; $i <=$state->{endIdx}; $i++) {
      if ($logData[$i]{type} eq "hb") {
         $lastHb = $i;
         $firstHb = $i if (!defined($firstHb));
         $numHbs++;
      }
   }

   return $duration if (!defined $firstHb);

   if ($state->{name} eq "vmkernel") {
      $durFromHb = $logData[$lastHb]{vmk_uptime};
      $errorFactor = 1;

      # vmkernel counter wraps every 49 days (2^32 jiffies).
      # don't trust it if we are even close to the limit, use the
      # uptime as reported by linux instead.

      my $linuxUptime = ($numHbs > 1) ? $logData[$lastHb]{uptime} - $logData[$firstHb]{uptime} : 0;
      if ($linuxUptime / (60 * 60 * 24) >= 48) {
         $errorFactor = 2;
         $durFromHb = $linuxUptime;
      }
   } elsif ($state->{name} eq "linux") {
      $durFromHb = $logData[$lastHb]{uptime};
      $errorFactor = 1;
   } elsif ($numHbs > 1) {
      # must subtract the differences in the uptime between the first
      # and last heartbeats logged to find duration.  Thus we could have
      # $hbInterval error at both the start and end of the range
      $errorFactor = 2;
      $durFromHb = $logData[$lastHb]{uptime} - $logData[$firstHb]{uptime};
   } else {
     # not enough heartbeats to tell anything.
     return $duration;
   }

#   XXX could check to see if this vaguely matches the number of heartbeats recorded.
#   if (abs($numHbs * $hbInterval - $durFromHb) > $hbErrorMargin * $durFromHb) {
#      # Something is really wrong.  log a warning and continue
#      # (possibly spurious heartbeats)
#   }

   if (abs(($durFromHb - $duration)) > ($hbErrorMargin * $duration) + ($errorFactor * $hbInterval)) {
      # this will happen everytime the user changes the serves date / time by a lot --
      # so trust the uptime instead of the date. 
      print stderr "Heartbeats report " . printTime($durFromHb) . 
         " but timestamps report " . printTime($duration) . " ($firstHb - $lastHb)\n";
      return $durFromHb;
   }

   # everything seems fine, return duration as reported by time stamps
   return $duration;
}

#
# calculateDuration
#
# tries to figure out how long a state lastested.  Checks to see if
# time is doing strange things (like moving backwards) and accounts
# for it.
#
sub calculateDuration($) 
{
   my ($state) = @_;
   my $duration = ($logData[$state->{endIdx}]->{time} - $logData[$state->{startIdx}]->{time});

   $duration = verifyDuration($state, $duration);

   if ($duration < 0)  {
      # time must have gone backwards.  This should only
      # happen after a reboot / crash or if there are too few
      # heartbeats logged.
      AddWarning("Time went backwards.  Reported downtime durations may be inaccurate.");
      $duration = 0;
   }
   return $duration;
}

#
# calcStats()
#
# calculates all the stats that eventually get printed in the report, and stores
# them in the global $stats.
#
sub calcStats()
{
   my $numUptimes = 0;
   my $numDowntimes = 0;

   # find the longest / shortest up and down times.
   foreach my $state (@states) {
      my $duration = calculateDuration($state);
      $state->{duration} = $duration;
      if($state->{name} eq 'vmkernel') {
         if (!defined $stats{longestUptime}) {
            $stats{longestUptime} = $stats{shortestUptime} = $duration;
         }
         $stats{longestUptime} = $duration if ($duration > $stats{longestUptime});
         $stats{shortestUptime} = $duration if ($duration < $stats{shortestUptime});
         $numUptimes++;
      } else {
         #filter out lame 0 duration states so they don't screw up the average
         next if ($duration == 0);
         if (!defined $stats{longestDowntime}) {
            $stats{longestDowntime} = $stats{shortestDowntime} = $duration;
         }
         $stats{longestDowntime} = $duration if ($duration > $stats{longestDowntime});
         $stats{shortestDowntime} = $duration if ($duration < $stats{shortestDowntime});
         $numDowntimes++;
      }
   }

   $stats{TotalTime} = sum('.*');
   $stats{DownTime} = sum('down|vmnix|linux|off|on');
   $stats{UpTime} = $stats{TotalTime} - $stats{DownTime};

   #make sure we don't divide by zero
   if (!$stats{TotalTime} || !$stats{UpTime}) {
      return 0;
   }

   $stats{Availability} = sprintf("%2.3f%%", 100.0 * ($stats{UpTime} / $stats{TotalTime}));
   $stats{DownPercent} = sprintf("%4.1f%%", 100.0 * ($stats{DownTime} / $stats{TotalTime}));


   $stats{avg_vms} = sprintf("%2.2f", $stats{total_vm_samples} / $stats{vm_sample_count});

   breakoutDowntime();

   if (!vmkernelLoaded()) {
     $stats{curUptime} = "0 (VMkernel not loaded)";
   }else {
      $stats{curUptime} = printTime(`cat /proc/vmware/uptime | awk '{print \$1}'`);
      #current uptime is more recent than the stats used to collect longest uptime
      $stats{longestUptime} = $stats{curUptime} if ($stats{curUptime} >$stats{longestUptime});

      #grab some simple system info.
      $stats{cpus} = `cat /proc/vmware/sched/ncpus`;
      chomp($stats{cpus});
      $stats{cputype} = `cat /proc/cpuinfo | grep "model name" | awk -F: '{print \$2}'`;
      chomp($stats{cputype});
      $stats{memory} = `cat /proc/meminfo | grep MachineMem | awk -F: '{print \$2}'`;
      chomp($stats{memory});
   }

   #convert uptimes to text values
   $stats{avgUptime} = printTime($stats{UpTime} / $numUptimes);
   if ($numDowntimes > 0 ) {
      $stats{avgDowntime} = printTime($stats{DownTime} / $numDowntimes);
   } else {
      $stats{avgDowntime} = 0;
   }
   $stats{longestUptime} = printTime($stats{longestUptime});
   $stats{shortestUptime} = printTime($stats{shortestUptime});
   $stats{longestDowntime} = printTime($stats{longestDowntime});
   $stats{shortestDowntime} = printTime($stats{shortestDowntime});

   return 1;
}

# breakoutDowntime()
#
# Parse the @states array to determine when the server was "down".
# "Down" is any time the vmkernel isn't loaded.
#
# XXX could be smarter here: The sequence unload, reboot, booting, load is all
# really just one downtime sequence whose cause was probably reboot (assuming that
# the unload happened close to the reboot).  Should report it as just one downtime
# with one cause. 
sub breakoutDowntime() {
   my %scheduled;
   my %unscheduled;

   if ($stats{DownTime} <= 0) {
      return;
   }

   foreach my $state (@states) {
      if ($state->{name} ne 'vmkernel') {
         if ($state->{name} eq 'down') {
            $unscheduled{totalDuration} += $state->{duration};
            #XXX need to preprocess cause field
            $unscheduled{cause}{$state->{cause}}{count} += 1;
            $unscheduled{cause}{$state->{cause}}{duration} += $state->{duration};
         } else {
            $scheduled{totalDuration} += $state->{duration};
            $scheduled{cause}{$state->{cause}}{count} += 1;
            $scheduled{cause}{$state->{cause}}{duration} += $state->{duration};
         }
      }
   }

   $unscheduled{percent} = sprintf("%4.1f", 100 * ($unscheduled{totalDuration} / $stats{DownTime}));
   $scheduled{percent} = sprintf("%4.1f", 100 * ($scheduled{totalDuration} / $stats{DownTime}));

   if ($scheduled{totalDuration} != 0) {
      foreach my $cause (keys %{$scheduled{cause}}) {
         $scheduled{cause}{$cause}{percent} = sprintf("%4.1f", 100 * 
               ($scheduled{cause}{$cause}{duration} / $scheduled{totalDuration}));
      }
   }

   if ($unscheduled{totalDuration} != 0) {
      foreach my $cause (keys %{$unscheduled{cause}}) {
         $unscheduled{cause}{$cause}{percent} = sprintf("%4.1f", 100 * 
               ($unscheduled{cause}{$cause}{duration} / $unscheduled{totalDuration}));
      }
   }

   $stats{scheduled} = \%scheduled;
   $stats{unscheduled} = \%unscheduled;
}

sub main()
{
   if (!readAllLogs()) {
      print "<p>Not enough data collected to generate an availability report";
      return;
   }
   #dumpRawLog();
   analyzeLog();
   #dumpStates();
   if (!calcStats()) {
      print "<p>Not enough data collected to generate an availability report";
      return;
   } else {
      generateReport($ARGV[0] =~ m/text/);
   }
   print stderr "Done!\n";
}

main();
