Difference between revisions of "Graphing with R"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) |
PeterHarding (talk | contribs) |
||
(3 intermediate revisions by the same user not shown) | |||
Line 77: | Line 77: | ||
[[Image:Lnfsna51 20130814.png]] | [[Image:Lnfsna51 20130814.png]] | ||
[[File:Lnfsna51 20130814. | [[File:Lnfsna51 20130814.csv]] | ||
=Generating the CSV Data= | |||
The following script (CpuCsv.py) is used to select a subset of a VMSTAT output. [[File:CpuCsv.py]] | |||
<pre> | |||
#!/usr/bin/env python | |||
# | |||
# Author: Peter Harding <plh@performiq.com.au> | |||
# | |||
# PerformIQ Pty. Ltd. | |||
# Suite 230, | |||
# Level 2, | |||
# 1 Queens Road, | |||
# MELBOURNE, VIC, 3004 | |||
# | |||
# Mobile: 0418 375 085 | |||
# | |||
# | |||
# NAME | |||
# cpucsv.py - Skeleton python script | |||
# | |||
# SYNOPSIS | |||
# cpucsv.py [-dqv] | |||
# | |||
# PARAMETERS | |||
# See __doc__ below | |||
# | |||
# DESCRIPTION | |||
# ... | |||
# | |||
# RETURNS | |||
# 0 for successful completion, 1 for any error | |||
# | |||
# FILES | |||
# ... | |||
# | |||
#-------------------------------------------------------------------------- | |||
""" | |||
Usage: | |||
$ cpucsv.py [-dv] -p *.vmstat | |||
Parameters: | |||
-p Parse | |||
-d Increment Debug level | |||
-v Set Verbose | |||
""" | |||
#-------------------------------------------------------------------------- | |||
import os | |||
import re | |||
import sys | |||
import time | |||
import getopt | |||
import random | |||
import pickle | |||
import pprint | |||
import logging | |||
import urllib | |||
from datetime import datetime | |||
from performiq import Enum, Logger | |||
#-------------------------------------------------------------------------- | |||
__at_id__ = "@(#) cpucsv.py [2.3.01] 2013-02-20" | |||
__version__ = re.sub(r'.*\[([0-9.]*)\].*', r'\1', __at_id__) | |||
quiet_flg = False | |||
verbose_flg = False | |||
debug_level = 0 | |||
table_name = "people" | |||
DSERVER_PORT = 9570 | |||
MAX_REQUESTS = 200 | |||
lf = None | |||
log = None | |||
LOG_DIR = "/tmp" | |||
home_dir = None | |||
p_crlf = re.compile(r'[\r\n]*') | |||
p_hour = re.compile(r'([0-9]*):') | |||
pp = pprint.PrettyPrinter(indent=3) | |||
#========================================================================== | |||
class Vmstats: | |||
TotalCount = 0 | |||
#-------------------------------------------------------------------- | |||
@classmethod | |||
def count_row(cls): | |||
cls.TotalCount += 1 | |||
#-------------------------------------------------------------------- | |||
def __init__(self, row): | |||
Vmstats.count_row() | |||
cols = row.split() | |||
m = p_hour.search(cols[1]) | |||
if m: | |||
self.Hour = int(m.group(1)) | |||
self.Date = cols[0] | |||
self.Time = cols[1] | |||
self.Interrupts = cols[12] | |||
self.ContextSwitches = cols[13] | |||
self.User = cols[14] | |||
self.System = cols[15] | |||
self.Idle = cols[16] | |||
self.WaitIO = cols[17] | |||
self.st = cols[18] | |||
#-------------------------------------------------------------------- | |||
def __str__(self): | |||
return "%s %s,%s,%s,%s,%s,%s,%s" % ( | |||
self.Date, | |||
self.Time, | |||
self.Interrupts, | |||
self.ContextSwitches, | |||
self.User, | |||
self.System, | |||
self.Idle, | |||
self.WaitIO, | |||
) | |||
#-------------------------------------------------------------------- | |||
#========================================================================== | |||
# And here is the real work... | |||
def do_work(fname): | |||
Logger.Info("[do_work]") | |||
p_name = re.compile(r'([^_]*)_([0-9]*)_([0-9]*)') | |||
m = p_name.search(fname) | |||
if m: | |||
hostname = m.group(1) | |||
run_date = m.group(2) | |||
print hostname, run_date | |||
else: | |||
print "Bad file name - %s" % fname | |||
fname_in = "%s.vmstat" % fname | |||
fname_out = "%s_%s.csv" % (hostname, run_date) | |||
try: | |||
f_in = open(fname_in, 'r') | |||
except IOError, msg: | |||
sys.stderr.write(fname_in + ': cannot open: ' + `msg` + '\n') | |||
sys.exit(1) | |||
try: | |||
f_out = open(fname_out, 'w+') | |||
except IOError, msg: | |||
sys.stderr.write(fname_out + ': cannot open: ' + `msg` + '\n') | |||
sys.exit(1) | |||
p_procs = re.compile(r'procs') | |||
p_hdr = re.compile(r'free') | |||
# Skip header lines | |||
f_in.readline() | |||
f_in.readline() | |||
f_in.readline() | |||
f_out.write("DateTime,Interrupts,ContextSwitches,User,System,Idle,WaitIO\n") | |||
while True: | |||
line = f_in.readline() | |||
if not line: break | |||
# Truncate EoL markers from end of line | |||
line = p_crlf.sub('', line) # or 'line = line[:-1]' | |||
m = p_procs.search(line) | |||
if m: continue | |||
if p_hdr.search(line): continue | |||
stats = Vmstats(line) | |||
print stats | |||
if stats.Hour >= 13: | |||
f_out.write("%s\n" % (stats, )) | |||
if stats.Hour > 17: | |||
break | |||
f_in.close() | |||
f_out.close() | |||
#========================================================================= | |||
def usage(): | |||
print __doc__ | |||
#------------------------------------------------------------------------- | |||
def main(argv): | |||
global debug_level | |||
global quiet_flg | |||
global verbose_flg | |||
global target | |||
global home_dir | |||
try: | |||
home_dir = os.environ['HOME'] | |||
except: | |||
print "Set HOME environment variable and re-run" | |||
sys.exit(0) | |||
Modes = Enum(["Info", "Parse", ]) | |||
mode = Modes.Info | |||
filename = "test" | |||
filenames = [] | |||
try: | |||
opts, args = getopt.getopt(argv, "dD:f:hpqvV?", | |||
("debug", "debug-level=", "file=", "help", "quiet", "verbose", "version")) | |||
except getopt.error, msg: | |||
usage() | |||
return 1 | |||
for opt, arg in opts: | |||
if opt in ("-?", "-h", "--help"): | |||
usage() | |||
return 0 | |||
elif opt in ('-d', '--debug'): | |||
debug_level += 1 | |||
elif opt in ('-D', '--debug-level'): | |||
debug_level = int(arg) | |||
elif opt in ('-f', '--file'): | |||
mode = Modes.Parse | |||
filenames.append(arg) | |||
elif opt in ('-p',): | |||
mode = Modes.Parse | |||
elif opt in ('-q', '--quiet'): | |||
quiet_flg = True | |||
elif opt in ('-v', '--verbose'): | |||
verbose_flg = True | |||
elif opt in ('-V', '--version'): | |||
if quiet_flg: | |||
print __version__ | |||
else: | |||
print "[cpucsv] Version: %s" % __version__ | |||
return 1 | |||
else: | |||
usage() | |||
return 1 | |||
p_vmstat = re.compile(r'(.*).vmstat') | |||
for arg in args: | |||
m = p_vmstat.search(arg) | |||
if m: | |||
filenames.append(m.group(1)) | |||
else: | |||
print "Bad filename [%s]" % arg | |||
sys.stderr.write("[cpucsv] Working directory is %s\n" % os.getcwd()) | |||
if (debug_level > 0): sys.stderr.write("[cpucsv] Debugging level set to %d\n" % debug_level) | |||
sys.stderr.flush() | |||
Logger.Init(name='cpucsv') | |||
if mode == Modes.Info: | |||
Logger.Info('Info') | |||
elif mode == Modes.Parse: | |||
Logger.Info('Parsing') | |||
for filename in filenames: | |||
do_work(filename) | |||
else: | |||
Logger.Info('Nothing to do') | |||
return 0 | |||
#-------------------------------------------------------------------------- | |||
if __name__ == '__main__' or __name__ == sys.argv[0]: | |||
try: | |||
sys.exit(main(sys.argv[1:])) | |||
except KeyboardInterrupt, e: | |||
print "[cpucsv] Interrupted!" | |||
#-------------------------------------------------------------------------- | |||
""" | |||
Revision History: | |||
Date Who Description | |||
-------- --- ------------------------------------------------------------ | |||
20031014 plh Initial implementation | |||
20111101 plh Add in Enums for modal behaviour | |||
20130220 plh Reconstructed performiq module | |||
Problems to fix: | |||
To Do: | |||
Issues: | |||
""" | |||
</pre> | |||
[[Category:R]] | [[Category:R]] |
Latest revision as of 17:17, 17 September 2013
Processing Data on WIndows 7
I use the following BAT file to batch up R in a folder containing multiple data sets.
@echo off set R_TERM="C:\Apps\R\R-3.0.1\bin\i386\Rterm.exe" set FILES=*.csv for %%F in (%FILES%) DO ( ECHO Processing %%F %R_TERM% --no-restore --no-save --args %%F < plot.r > plot.out 2>&1 )
R Script File
args <- commandArgs(trailingOnly = TRUE) print(args) name <- args[1] regexp <- "([^_]*)_([[:digit:]]{4})([[:digit:]]{2})([[:digit:]]{2}).*" host <- sub(pattern=regexp, replacement="\\1", x=name) year <- sub(pattern=regexp, replacement="\\2", x=name) month <- sub(pattern=regexp, replacement="\\3", x=name) day <- sub(pattern=regexp, replacement="\\4", x=name) date <- sprintf("%s-%s-%s", year, month, day) rm(args) data_file <- sprintf("%s_%s%s%s.csv", host, year, month, day) print(data_file) data <- read.csv(data_file, header=T) # data$Timestamp <- strptime(paste(data$Date, data$Time), "%Y-%m-%d %H:%M:%S") data$Timestamp <- strptime(data$DateTime, "%Y-%m-%d %H:%M:%S") title <- sprintf("%s - %s - %% CPU Utilization", date, host) data$CPU <- data$User + data$System print(title) png(sprintf("%s_%s%s%s.png", host, year, month, day)) plot(data$Timestamp, data$CPU, main=title, type="h", col="light blue", xlab="Time", ylab="Ucpu%", lwd=1) points(data$Timestamp, data$CPU, col="blue") abline(h=mean(data$CPU), lty=2, col="red") #s <- spline(data$CPU) lines(smooth.spline(data$Timestamp, data$CPU, df = 10), lty = 3, col = "Dark Green") # #lines(s$x, s$y, type="b", pch=22, col="blue", lty=2) dev.off()
Generating the CSV Data
The following script (CpuCsv.py) is used to select a subset of a VMSTAT output. File:CpuCsv.py
#!/usr/bin/env python # # Author: Peter Harding <plh@performiq.com.au> # # PerformIQ Pty. Ltd. # Suite 230, # Level 2, # 1 Queens Road, # MELBOURNE, VIC, 3004 # # Mobile: 0418 375 085 # # # NAME # cpucsv.py - Skeleton python script # # SYNOPSIS # cpucsv.py [-dqv] # # PARAMETERS # See __doc__ below # # DESCRIPTION # ... # # RETURNS # 0 for successful completion, 1 for any error # # FILES # ... # #-------------------------------------------------------------------------- """ Usage: $ cpucsv.py [-dv] -p *.vmstat Parameters: -p Parse -d Increment Debug level -v Set Verbose """ #-------------------------------------------------------------------------- import os import re import sys import time import getopt import random import pickle import pprint import logging import urllib from datetime import datetime from performiq import Enum, Logger #-------------------------------------------------------------------------- __at_id__ = "@(#) cpucsv.py [2.3.01] 2013-02-20" __version__ = re.sub(r'.*\[([0-9.]*)\].*', r'\1', __at_id__) quiet_flg = False verbose_flg = False debug_level = 0 table_name = "people" DSERVER_PORT = 9570 MAX_REQUESTS = 200 lf = None log = None LOG_DIR = "/tmp" home_dir = None p_crlf = re.compile(r'[\r\n]*') p_hour = re.compile(r'([0-9]*):') pp = pprint.PrettyPrinter(indent=3) #========================================================================== class Vmstats: TotalCount = 0 #-------------------------------------------------------------------- @classmethod def count_row(cls): cls.TotalCount += 1 #-------------------------------------------------------------------- def __init__(self, row): Vmstats.count_row() cols = row.split() m = p_hour.search(cols[1]) if m: self.Hour = int(m.group(1)) self.Date = cols[0] self.Time = cols[1] self.Interrupts = cols[12] self.ContextSwitches = cols[13] self.User = cols[14] self.System = cols[15] self.Idle = cols[16] self.WaitIO = cols[17] self.st = cols[18] #-------------------------------------------------------------------- def __str__(self): return "%s %s,%s,%s,%s,%s,%s,%s" % ( self.Date, self.Time, self.Interrupts, self.ContextSwitches, self.User, self.System, self.Idle, self.WaitIO, ) #-------------------------------------------------------------------- #========================================================================== # And here is the real work... def do_work(fname): Logger.Info("[do_work]") p_name = re.compile(r'([^_]*)_([0-9]*)_([0-9]*)') m = p_name.search(fname) if m: hostname = m.group(1) run_date = m.group(2) print hostname, run_date else: print "Bad file name - %s" % fname fname_in = "%s.vmstat" % fname fname_out = "%s_%s.csv" % (hostname, run_date) try: f_in = open(fname_in, 'r') except IOError, msg: sys.stderr.write(fname_in + ': cannot open: ' + `msg` + '\n') sys.exit(1) try: f_out = open(fname_out, 'w+') except IOError, msg: sys.stderr.write(fname_out + ': cannot open: ' + `msg` + '\n') sys.exit(1) p_procs = re.compile(r'procs') p_hdr = re.compile(r'free') # Skip header lines f_in.readline() f_in.readline() f_in.readline() f_out.write("DateTime,Interrupts,ContextSwitches,User,System,Idle,WaitIO\n") while True: line = f_in.readline() if not line: break # Truncate EoL markers from end of line line = p_crlf.sub('', line) # or 'line = line[:-1]' m = p_procs.search(line) if m: continue if p_hdr.search(line): continue stats = Vmstats(line) print stats if stats.Hour >= 13: f_out.write("%s\n" % (stats, )) if stats.Hour > 17: break f_in.close() f_out.close() #========================================================================= def usage(): print __doc__ #------------------------------------------------------------------------- def main(argv): global debug_level global quiet_flg global verbose_flg global target global home_dir try: home_dir = os.environ['HOME'] except: print "Set HOME environment variable and re-run" sys.exit(0) Modes = Enum(["Info", "Parse", ]) mode = Modes.Info filename = "test" filenames = [] try: opts, args = getopt.getopt(argv, "dD:f:hpqvV?", ("debug", "debug-level=", "file=", "help", "quiet", "verbose", "version")) except getopt.error, msg: usage() return 1 for opt, arg in opts: if opt in ("-?", "-h", "--help"): usage() return 0 elif opt in ('-d', '--debug'): debug_level += 1 elif opt in ('-D', '--debug-level'): debug_level = int(arg) elif opt in ('-f', '--file'): mode = Modes.Parse filenames.append(arg) elif opt in ('-p',): mode = Modes.Parse elif opt in ('-q', '--quiet'): quiet_flg = True elif opt in ('-v', '--verbose'): verbose_flg = True elif opt in ('-V', '--version'): if quiet_flg: print __version__ else: print "[cpucsv] Version: %s" % __version__ return 1 else: usage() return 1 p_vmstat = re.compile(r'(.*).vmstat') for arg in args: m = p_vmstat.search(arg) if m: filenames.append(m.group(1)) else: print "Bad filename [%s]" % arg sys.stderr.write("[cpucsv] Working directory is %s\n" % os.getcwd()) if (debug_level > 0): sys.stderr.write("[cpucsv] Debugging level set to %d\n" % debug_level) sys.stderr.flush() Logger.Init(name='cpucsv') if mode == Modes.Info: Logger.Info('Info') elif mode == Modes.Parse: Logger.Info('Parsing') for filename in filenames: do_work(filename) else: Logger.Info('Nothing to do') return 0 #-------------------------------------------------------------------------- if __name__ == '__main__' or __name__ == sys.argv[0]: try: sys.exit(main(sys.argv[1:])) except KeyboardInterrupt, e: print "[cpucsv] Interrupted!" #-------------------------------------------------------------------------- """ Revision History: Date Who Description -------- --- ------------------------------------------------------------ 20031014 plh Initial implementation 20111101 plh Add in Enums for modal behaviour 20130220 plh Reconstructed performiq module Problems to fix: To Do: Issues: """