Graphing with R
Revision as of 17:17, 17 September 2013 by PeterHarding (talk | contribs) (→Generating the CSV Data)
Processing Data on WIndows 7
I use the following BAT file to batch up R in a folder containing multiple data sets.
@echo off set R_TERM="C:\Apps\R\R-3.0.1\bin\i386\Rterm.exe" set FILES=*.csv for %%F in (%FILES%) DO ( ECHO Processing %%F %R_TERM% --no-restore --no-save --args %%F < plot.r > plot.out 2>&1 )
R Script File
args <- commandArgs(trailingOnly = TRUE) print(args) name <- args[1] regexp <- "([^_]*)_([[:digit:]]{4})([[:digit:]]{2})([[:digit:]]{2}).*" host <- sub(pattern=regexp, replacement="\\1", x=name) year <- sub(pattern=regexp, replacement="\\2", x=name) month <- sub(pattern=regexp, replacement="\\3", x=name) day <- sub(pattern=regexp, replacement="\\4", x=name) date <- sprintf("%s-%s-%s", year, month, day) rm(args) data_file <- sprintf("%s_%s%s%s.csv", host, year, month, day) print(data_file) data <- read.csv(data_file, header=T) # data$Timestamp <- strptime(paste(data$Date, data$Time), "%Y-%m-%d %H:%M:%S") data$Timestamp <- strptime(data$DateTime, "%Y-%m-%d %H:%M:%S") title <- sprintf("%s - %s - %% CPU Utilization", date, host) data$CPU <- data$User + data$System print(title) png(sprintf("%s_%s%s%s.png", host, year, month, day)) plot(data$Timestamp, data$CPU, main=title, type="h", col="light blue", xlab="Time", ylab="Ucpu%", lwd=1) points(data$Timestamp, data$CPU, col="blue") abline(h=mean(data$CPU), lty=2, col="red") #s <- spline(data$CPU) lines(smooth.spline(data$Timestamp, data$CPU, df = 10), lty = 3, col = "Dark Green") # #lines(s$x, s$y, type="b", pch=22, col="blue", lty=2) dev.off()
Generating the CSV Data
The following script (CpuCsv.py) is used to select a subset of a VMSTAT output. File:CpuCsv.py
#!/usr/bin/env python # # Author: Peter Harding <plh@performiq.com.au> # # PerformIQ Pty. Ltd. # Suite 230, # Level 2, # 1 Queens Road, # MELBOURNE, VIC, 3004 # # Mobile: 0418 375 085 # # # NAME # cpucsv.py - Skeleton python script # # SYNOPSIS # cpucsv.py [-dqv] # # PARAMETERS # See __doc__ below # # DESCRIPTION # ... # # RETURNS # 0 for successful completion, 1 for any error # # FILES # ... # #-------------------------------------------------------------------------- """ Usage: $ cpucsv.py [-dv] -p *.vmstat Parameters: -p Parse -d Increment Debug level -v Set Verbose """ #-------------------------------------------------------------------------- import os import re import sys import time import getopt import random import pickle import pprint import logging import urllib from datetime import datetime from performiq import Enum, Logger #-------------------------------------------------------------------------- __at_id__ = "@(#) cpucsv.py [2.3.01] 2013-02-20" __version__ = re.sub(r'.*\[([0-9.]*)\].*', r'\1', __at_id__) quiet_flg = False verbose_flg = False debug_level = 0 table_name = "people" DSERVER_PORT = 9570 MAX_REQUESTS = 200 lf = None log = None LOG_DIR = "/tmp" home_dir = None p_crlf = re.compile(r'[\r\n]*') p_hour = re.compile(r'([0-9]*):') pp = pprint.PrettyPrinter(indent=3) #========================================================================== class Vmstats: TotalCount = 0 #-------------------------------------------------------------------- @classmethod def count_row(cls): cls.TotalCount += 1 #-------------------------------------------------------------------- def __init__(self, row): Vmstats.count_row() cols = row.split() m = p_hour.search(cols[1]) if m: self.Hour = int(m.group(1)) self.Date = cols[0] self.Time = cols[1] self.Interrupts = cols[12] self.ContextSwitches = cols[13] self.User = cols[14] self.System = cols[15] self.Idle = cols[16] self.WaitIO = cols[17] self.st = cols[18] #-------------------------------------------------------------------- def __str__(self): return "%s %s,%s,%s,%s,%s,%s,%s" % ( self.Date, self.Time, self.Interrupts, self.ContextSwitches, self.User, self.System, self.Idle, self.WaitIO, ) #-------------------------------------------------------------------- #========================================================================== # And here is the real work... def do_work(fname): Logger.Info("[do_work]") p_name = re.compile(r'([^_]*)_([0-9]*)_([0-9]*)') m = p_name.search(fname) if m: hostname = m.group(1) run_date = m.group(2) print hostname, run_date else: print "Bad file name - %s" % fname fname_in = "%s.vmstat" % fname fname_out = "%s_%s.csv" % (hostname, run_date) try: f_in = open(fname_in, 'r') except IOError, msg: sys.stderr.write(fname_in + ': cannot open: ' + `msg` + '\n') sys.exit(1) try: f_out = open(fname_out, 'w+') except IOError, msg: sys.stderr.write(fname_out + ': cannot open: ' + `msg` + '\n') sys.exit(1) p_procs = re.compile(r'procs') p_hdr = re.compile(r'free') # Skip header lines f_in.readline() f_in.readline() f_in.readline() f_out.write("DateTime,Interrupts,ContextSwitches,User,System,Idle,WaitIO\n") while True: line = f_in.readline() if not line: break # Truncate EoL markers from end of line line = p_crlf.sub('', line) # or 'line = line[:-1]' m = p_procs.search(line) if m: continue if p_hdr.search(line): continue stats = Vmstats(line) print stats if stats.Hour >= 13: f_out.write("%s\n" % (stats, )) if stats.Hour > 17: break f_in.close() f_out.close() #========================================================================= def usage(): print __doc__ #------------------------------------------------------------------------- def main(argv): global debug_level global quiet_flg global verbose_flg global target global home_dir try: home_dir = os.environ['HOME'] except: print "Set HOME environment variable and re-run" sys.exit(0) Modes = Enum(["Info", "Parse", ]) mode = Modes.Info filename = "test" filenames = [] try: opts, args = getopt.getopt(argv, "dD:f:hpqvV?", ("debug", "debug-level=", "file=", "help", "quiet", "verbose", "version")) except getopt.error, msg: usage() return 1 for opt, arg in opts: if opt in ("-?", "-h", "--help"): usage() return 0 elif opt in ('-d', '--debug'): debug_level += 1 elif opt in ('-D', '--debug-level'): debug_level = int(arg) elif opt in ('-f', '--file'): mode = Modes.Parse filenames.append(arg) elif opt in ('-p',): mode = Modes.Parse elif opt in ('-q', '--quiet'): quiet_flg = True elif opt in ('-v', '--verbose'): verbose_flg = True elif opt in ('-V', '--version'): if quiet_flg: print __version__ else: print "[cpucsv] Version: %s" % __version__ return 1 else: usage() return 1 p_vmstat = re.compile(r'(.*).vmstat') for arg in args: m = p_vmstat.search(arg) if m: filenames.append(m.group(1)) else: print "Bad filename [%s]" % arg sys.stderr.write("[cpucsv] Working directory is %s\n" % os.getcwd()) if (debug_level > 0): sys.stderr.write("[cpucsv] Debugging level set to %d\n" % debug_level) sys.stderr.flush() Logger.Init(name='cpucsv') if mode == Modes.Info: Logger.Info('Info') elif mode == Modes.Parse: Logger.Info('Parsing') for filename in filenames: do_work(filename) else: Logger.Info('Nothing to do') return 0 #-------------------------------------------------------------------------- if __name__ == '__main__' or __name__ == sys.argv[0]: try: sys.exit(main(sys.argv[1:])) except KeyboardInterrupt, e: print "[cpucsv] Interrupted!" #-------------------------------------------------------------------------- """ Revision History: Date Who Description -------- --- ------------------------------------------------------------ 20031014 plh Initial implementation 20111101 plh Add in Enums for modal behaviour 20130220 plh Reconstructed performiq module Problems to fix: To Do: Issues: """