#!/usr/bin/perl #display helpfile if ($ARGV[0] eq "-help") { print "\nThis script calculates the usage statistics for linux cluster from the Rtop\n"; print "summary files. The results are summarized in HTML format. The main page is\n"; print "data.html\n\n"; print "The script must be called from the directory containing the Rtop files to\n"; print "be summarized\n\n"; print "usage is vnAccount [-p ] [-r ] [-help]\n\n"; print "-p: sets the maxmimum value of the horizontal scale for the bar graphs.\n"; print "-p must be followed by a number between 0 and 100. If the -p switch is not\n"; print "used, the default maximum width of the bar graph is 15%.\n\n"; print "-r: stores the resulting html and temporary files in a directory. If a\n"; print "directory is specified then the script creates the directory \"vnAccount\" in\n"; print "the specified directory. All the specified directories must already exist.\n"; print "If a directory is not specified then the script attempts to create the directory\n"; print "\"vnAccount\" in the current directory. If that is not possible then the script\n"; print "attempts to create the directory \"/tmp/logname/vnAccount\" where \"logname\" is\n"; print "the user name of the current user. If that fails, the script exits. If the\n"; print "switch is not used the files are created in the current directory.\n\n"; print "-help: displays this text.\n\n"; print "Note: this script makes use of a GNU extension. It will run properly only on a\n"; print "Linux machine, not an SGI.\n\n"; } #check for valid switches elsif ($ARGV[0] && $ARGV[0] ne "-r" && $ARGV[0] ne "-p") { print "Incorrect switch. Type 'vnAccount -help' to see the switch options\n"; } else { #set defaults $normalize = 15; $webdirect = "."; while (@ARGV) { $switch = shift @ARGV; next unless ($switch =~ /^-/); #check for the directory switch if ($switch eq "-r") { #if a directory is given, use that one if ($ARGV[0] && $ARGV[0] =~ /^[^-]/) { mkdir("$ARGV[0]/vnAccount", 0777); $webdirect = "$ARGV[0]/vnAccount"; } else { #try to create the directory where vnAccount is called $webdirect = "vnAccount"; mkdir(vnAccount, 0777); unless (open(TMEP, "+>vnAccount/tmep")) { #if that fails read the user name of the current user $logname = $ENV{LOGNAME}; $webdirect = "/tmp/$logname/vnAccount"; #try to create the directories /tmp/$logname and /tmp/$logname/vnAccount mkdir ("/tmp/$logname", 0777); mkdir("/tmp/$logname/vnAccount", 0777); } #try to write to the created directory. If that fails, die. open (TMEP, "+>$webdirect/tmep") || die "Can't create directory $webdirect : $!\n"; } } #set maximum value for horizontal scale on bar graph elsif ($switch eq "-p") { #check to make sure "-p" is followed by a number between 0 and 100 if ($ARGV[0] =~ /\d+/ && $ARGV[0] >= 0 && $ARGV[0] <= 100) { $normalize = $ARGV[0]; } else { die "The switch -p must be followed by a number from 0-100\n"; } } } open (TMEP, "+>$webdirect/tmep") || die "Can't create files in directory $webdirect : $!\n"; #sort by time created, oldest first and list in a column open(DIRECT, "ls -1 | sort |"); while() { chop; #make sure to pick up only the Rtop files (they start with the number 2) if ($_ =~ /^[0-9]{4,4}/) { @lines[$i] = $_; $i = $i + 1; } } $k = 0; #time cluster has been running $cluster_time = 0; #go through all the Rtop files, oldest first open (FIRST, $lines[0]); while() { #go through all the process lines if ($_ =~ /^\d+/) { ($initial, $node, $pid, $user, $pri, $ni, $size, $rss, $share, $stat, $lib, $cpu, $mem, $time, $process) = split(/\s+/, $_); #ignore all "idle" or "root" processes unless ($user eq "idle" || $user eq "root" || $node =~ /^[^v]/ || $process eq "top") { #distinguish between minutes and minutes:seconds form and turn both into total minutes if ($time =~ /m/) { ($minutes) = split(/m/, $time); $minutes = 2 * $minutes; #double time since top assumes both processors are working } else { ($minutes, $seconds) = split(/:/, $time); $minutes = 2*($minutes + ($seconds / 60)); } #save the run-time of the first processes so that it can be subtracted off the total later $initial_process{$pid} = $minutes . ":" . $user . ":" . $node; $initial_time{$user} = ($initial_time{$user} + $minutes); #create an array which holds the total time used by each user ($total_time) #create an array which holds all the individual process data per user if ($total_time{$user}) { #update the arrays ($usage, $numprocess) = split(/:/, $total_time{$user}); $$user{$pid} = $node . "+" . $process . "+" . $minutes . "+" . $date_time . "+" . $date_time; $usage = $usage + $minutes; $numprocess++; #count the number of processes each user has started $total_time{$user} = $usage . ":" . $numprocess; } else { #create the relevant entries for each array if they don't already exist $$user{$pid} = $node . "+" . $process . "+" . $minutes . "+" . $date_time . "+" . $date_time; $total_time{$user} = $minutes . ":" . 1; } } } #get date stamp on file elsif ($_ =~ /^[a-z]/) { chop; ($address, $usage, $date_time) = split(/\s+/, $_, 3); $first_date = $date_time; } } print $k, "\n"; #now process the rest of the files foreach $j (1..$#lines) { $now = $lines[$j]; #open the current file open(NOW, $now) || die "Can't open $now: $!\n"; while() { #split into pieces #get all process info if ($_ =~ /^\d+/) { ($initial, $node, $pid, $user, $pri, $ni, $size, $rss, $share, $stat, $lib, $cpu, $mem, $time, $process) = split(/\s+/, $_); #ignore all "idle" or "root" processes unless ($user eq "idle" || $user eq "root" || $node =~ /^[^v]/ || $process eq "top") { #distinguish between minutes and minutes:seconds form and turn both into total minutes if ($time =~ /m/) { ($minutes) = split(/m/, $time); $minutes = 2 * $minutes; #double time since top assumes both processors are working } else { ($minutes, $seconds) = split(/:/, $time); $minutes = 2*($minutes + ($seconds / 60)); } #update the total_time and user arrays if ($total_time{$user}) { ($usage, $numprocess) = split(/:/, $total_time{$user}); if ($$user{$pid}) { ($master_node, $master_process, $master_time, $master_date, $stop_date) = split(/\+/, $$user{$pid}, 5); $$user{$pid} = $master_node. "+" . $master_process . "+" . $minutes . "+" . $master_date . "+" . $date_time; $usage = $usage + ($minutes - $master_time); $total_time{$user} = $usage . ":" . $numprocess; } else { $$user{$pid} = $node . "+" . $process . "+" . $minutes . "+" . $date_time . "+" . $date_time; $usage = $usage + $minutes; $numprocess++; $total_time{$user} = $usage . ":" . $numprocess; } } else { $$user{$pid} = $node . "+" . $process . "+" . $minutes . "+" . $date_time . "+" . $date_time; $total_time{$user} = $minutes . ":" . 1; } } } #get date stamp on file elsif ($_ =~ /^[a-z]/) { chop; ($address, $usage, $date_time) = split(/\s+/, $_, 3); } } #print the number of the file that was just read print $j, " "; } print "\n"; #save the date of the oldest file $last_date = $date_time; #print the results to a file open(TEMP, "+>$webdirect/temp"); open(TMEP, "+>$webdirect/tmep"); $all_user = 0; #subtract the initial run time of processes in the first Rtop files foreach $user (sort keys(%total_time)) { ($usage, $numprocess) = split(/:/, $total_time{$user}); if ($initial_time{$user}) { $usage = ($usage - $initial_time{$user}); foreach $pid (sort keys(%$user)) { next unless ($initial_process{$pid}); ($initial_minutes, $initial_user, $initial_node) = split(/:/, $initial_process{$pid}); ($node, $process, $minutes, $date, $stop_date) = split(/\+/, $$user{$pid}, 5); next unless ($user eq $initial_user && $node eq $initial_node); $minutes = ($minutes - $initial_process{$pid}); $$user{$pid} = $node . "+" . $process . "+" . $minutes . "+" . $date . "+" . $stop_date; } } $all_user = ($all_user + $usage); printf TEMP "%-7f %-8s %-5s\n", $usage, $numprocess, $user; } close(TEMP); #sort by time, longest running at the top `sort -n -r -o$webdirect/temp $webdirect/temp`; open(TEMP, "$webdirect/temp"); open(WEB, "+>$webdirect/data.html"); #take the first_date and last_date and calculate the number of minutes between them #use the date system call to convert each date to the number of seconds since 00:00:00 Jan 1, 1970 #note: this is a GNU extension to date, it won't work on an SGI $firstdate = `date -d "$first_date" +%s`; $lastdate = `date -d "$last_date" +%s`; $cluster_time = (($lastdate-$firstdate)/60); #calculate the total cluster time: the time the cluster has been running multiplied by the number of processors $cluster_time = 128*$cluster_time; #hourmin converts from a time format of minutes to hrs:min format #the first argument is time in minutes &hourmin(*cluster_time, *cluster_hours, *cluster_minutes); &hourmin(*all_user, *all_hours, *all_minutes); $total_perc = 100*($all_user/$cluster_time); #start writing HTML code printf WEB "\n\nTotal time used for cluster processes\n\n\n\n"; printf WEB "

Summary of Cluster Usage

\n"; printf WEB "Note: The timings and statistics rooted in this document are currently for informational purposes only, and are not guaranteed to be completely correct. They do provide, however, a representative view of overall cluster usage.

\n"; printf WEB "Percentage of total cluster time used = %1.1f\%
\nTotal cluster time available (hrs : min): %1s:%02d
\nTotal time used for running processes (hrs : min) = %1s:%02d
\n
\n",$total_perc, $cluster_hours, $cluster_minutes, $all_hours, $all_minutes; printf WEB "The time used by each user is displayed as a percentage of the total cluster time available. The width of the bar corresponds to the percentage of time used.\n"; printf WEB "The height of the bar corresponds to the number of processes run by that user.\n"; printf WEB "Click on the user name on the right to see each user's process information.
\n
\n"; printf WEB "Processes counted between %1s and %1s\n", $first_date, $last_date; #create the main table in the main page printf WEB "\n\t\n\t\n\t\n\t\n", $normalize; #TEMP is the file that contains all the total_time data sorted by each user's usage time while() { chop; ($user_time,$numprocess,$user) = split(/\s+/, $_); $report_percent = 100*($user_time/($cluster_time)); #do the calculations for the lengths of the bar graphs $display_percent = 100*($report_percent/$normalize); #convert time back to hours:minutes format &hourmin(*user_time, *user_hours, *user_minutes); #do the calculations for the height of the bar graphs. Height goes as log10 of the number of processes except if number of processes = 1 then height = 1 as well if ($numprocess <= 1) { $height = 1; } else { $height = 30*(log($numprocess)/2.302585093); } #create a table in each row of the main table. The length of the inner table is set by percent: this creates the bar graph appearance #both the heights of the main table row and the inner table have to be set the same to get the height difference printf WEB "\t\n\t\n\t\n\t\n", $height, $height, $display_percent, $user, $report_percent, $user, $numprocess; open (TEMP1, "+>$webdirect/temp1"); #print to TEMP1 for sorting by process for each user foreach $pid (sort keys(%$user)) { ($master_node, $master_process, $master_time, $master_date, $stop_date) = split(/\+/, $$user{$pid}, 5); printf TEMP1 "%-12s %-7f %-5s %-4s %-28s+%28s\n", $master_process, $master_time, $pid, $master_node, $master_date, $stop_date; } close (TEMP1); `sort -o$webdirect/temp1 $webdirect/temp1`; open (TEMP1, "$webdirect/temp1"); open (TEMP2, "+>$webdirect/temp2"); open (PERSON, "+>$webdirect/$user.html"); printf PERSON "\n\n%ls's process information\n\n\n\n", $user; printf PERSON "Click on the process to see the usage details for that process\n
\n%1s used %1s:%02d (hrs : min) of processing time\n

\n", $user, $user_hours, $user_minutes; printf PERSON "
\n\t\t\n\t\t\n\t
0%1s\% 
Percentage of time used / User / No. processes
\n\t\t\n\t
 
%1.1f\% / %1s / %1s
\n\t\n\t\n\t \n\t\n\t\n\t\n"; #initialize lastprocess to something unlikely to be an actual process name #numproc counts the number of different processes run by each user; used to create a unique filename for PROCESS $lastprocess = 1; $numproc = 0; #TEMP1 is the file that contains all the process data for one user sorted by amount of time used for each process open (FINAL, "+>$webdirect/final"); while () { chop; ($temp_process, $temp_time, $temp_pid, $temp_node, $temp_date) = split(/\s+/, $_, 5); $nextprocess = $temp_process; #if this is the first line in TEMP1 (no prior process) or if the process hasn't changed then print the info to TEMP2 and reassign the lastprocess if ($nextprocess eq $lastprocess || $lastprocess eq 1) { printf TEMP2 "%-7f %5s %-12s %-4s %-56s\n", $temp_time, $temp_pid, $temp_process, $temp_node, $temp_date; $lastprocess = $nextprocess; } #when the process changes: close TEMP2; sort it by time, longest running process first else { #procnum counts the number of times each process was run; printed in PERSON $procnum = 0; $process_time = 0; $numproc++; close (TEMP2); `sort -n -r -o$webdirect/temp2 $webdirect/temp2`; open (TEMP2, "$webdirect/temp2"); open (PROCESS, "+>$webdirect/$user$numproc"); printf PROCESS "USER: %-8s\n\n", $user; printf PROCESS " PROCESS TIME (hrs:min) PID NODE DATE STARTED DATE STOPPED\n"; #TEMP2 is the file that contains the process data for one process and one user sorted by time #open the text file for that process; read the lines from TEMP2 and print to PROCESS while () { chop; ($master_time, $master_pid, $master_process, $master_node, $master_date) = split(/\s+/, $_, 5); ($start_date, $stop_date) = split(/\+/, $master_date, 2); $process_time = $process_time + $master_time; &hourmin(*master_time, *master_hours, *master_minutes); $procnum++; if ($stop_date ne $last_date) { printf PROCESS "%4s: %-12s %11s:%02d %-5s %-4s %-28s %-28s\n", $procnum, $master_process, $master_hours, $master_minutes, $master_pid, $master_node, $start_date, $stop_date; } else { printf PROCESS "%4s: %-12s %11s:%02d %-5s %-4s %-28s -\n", $procnum, $master_process, $master_hours, $master_minutes, $master_pid, $master_node, $start_date; } } #update the HTML file for the user which lists the processes printf FINAL "%7f %-8s %-12s %2s %2s\n", $process_time, $user, $master_process, $procnum, $numproc; close (TEMP2); close (PROCESS); #open a new TEMP2 and print the new information; reassign lastprocess open (TEMP2, "+>$webdirect/temp2"); printf TEMP2 "%-7f %5s %-12s %-4s %-28s\n", $temp_time, $temp_pid, $temp_process, $temp_node, $temp_date; $lastprocess = $nextprocess; } } #the last process in TEMP1 won't go through the else section above so now do: sort TEMP2 by time, print PROCESS and update PERSON $procnum = 0; $process_time = 0; $numproc++; close (TEMP2); `sort -n -r -o$webdirect/temp2 $webdirect/temp2`; open (TEMP2, "$webdirect/temp2"); open (PROCESS, "+>$webdirect/$user$numproc"); printf PROCESS "USER: %-8s\n\n", $user; printf PROCESS " PROCESS TIME (hrs:min) PID NODE DATE STARTED DATE STOPPED \n"; while () { chop; ($master_time, $master_pid, $master_process, $master_node, $master_date) = split(/\s+/, $_, 5); ($start_date, $stop_date) = split(/\+/, $master_date, 2); $process_time = $process_time + $master_time; &hourmin(*master_time,*master_hours,*master_minutes); $procnum++; if ($stop_date ne $last_date) { printf PROCESS "%4s: %-12s %11s:%02d %-5s %-4s %-28s %-28s\n", $procnum, $master_process, $master_hours, $master_minutes, $master_pid, $master_node, $start_date, $stop_date; } else { printf PROCESS "%4s: %-12s %11s:%02d %-5s %-4s %-28s -\n", $procnum, $master_process, $master_hours, $master_minutes, $master_pid, $master_node, $start_date; } } printf FINAL "%7f %-8s %-12s %2s %2s\n", $process_time, $user, $master_process, $procnum, $numproc; close (FINAL); `sort -n -r -o$webdirect/final $webdirect/final`; close (TEMP2); close (PROCESS); open (FINAL, "$webdirect/final"); while () { chop; ($process_time, $user, $master_process, $procnum, $numproc) = split (/\s+/, $_); &hourmin(*process_time, *process_hours, *process_minutes); printf PERSON "\t\n\t\n\t\n\t\n\t\n\t\n",$user,$user,$numproc, $master_process, $procnum, $process_hours, $process_minutes; } #finish off the HTML for PERSON printf PERSON "
USER  PROCESS  NUMBER OF PROCESSES  TIME USED FOR PROCESS  
%1s  %1s   %1s  %1s:%02d  
\n
\n"; printf PERSON "Back to total time graph\n\n\n"; close (PERSON); } #finish off the HTML for WEB require "ctime.pl"; $today = &ctime(time); printf WEB "
\n
\n
Generated on %1s by vnAccount.
\n", $today; printf WEB "Copyright 2000 by Hanna Ruotsalainen.
\n"; printf WEB "Supported by CIAR and NSERC
\n"; printf WEB"\n"; print "The files were put in directory ", $webdirect, "\n"; print "The main page is called data.html\n"; } #subroutine which changes from minutes format to hrs:min format sub hourmin { #minute_time is passed to the subroutine, hours and minutes are altered in the routine and then accessed back at the subroutine call local(*minute_time, *hours, *minutes) = @_; local ($seconds); $hours_float = ($minute_time/60); #split on the decimal point ($minutes, $seconds) = split(/\./, $minute_time); ($hours) = split(/\./, $hours_float); #round the seconds up or down to the nearest minute if ($seconds > 50) { #use mod for the minutes; returns the remainder from minutes/60 $minutes = ($minutes%60)+1; if ($minutes > 59) { $hours++; $minutes = 0; } } else { $minutes = ($minutes%60); } }