Graphing with R
Jump to navigation
Jump to search
Processing Data on WIndows 7
I use the following BAT file to batch up R in a folder containing multiple data sets.
@echo off
set R_TERM="C:\Apps\R\R-3.0.1\bin\i386\Rterm.exe"
set FILES=*.csv
for %%F in (%FILES%) DO (
ECHO Processing %%F
%R_TERM% --no-restore --no-save --args %%F < plot.r > plot.out 2>&1
)
R Script File
args <- commandArgs(trailingOnly = TRUE)
print(args)
name <- args[1]
regexp <- "([^_]*)_([[:digit:]]{4})([[:digit:]]{2})([[:digit:]]{2}).*"
host <- sub(pattern=regexp, replacement="\\1", x=name)
year <- sub(pattern=regexp, replacement="\\2", x=name)
month <- sub(pattern=regexp, replacement="\\3", x=name)
day <- sub(pattern=regexp, replacement="\\4", x=name)
date <- sprintf("%s-%s-%s", year, month, day)
rm(args)
data_file <- sprintf("%s_%s%s%s.csv", host, year, month, day)
print(data_file)
data <- read.csv(data_file, header=T)
# data$Timestamp <- strptime(paste(data$Date, data$Time), "%Y-%m-%d %H:%M:%S")
data$Timestamp <- strptime(data$DateTime, "%Y-%m-%d %H:%M:%S")
title <- sprintf("%s - %s - %% CPU Utilization", date, host)
data$CPU <- data$User + data$System
print(title)
png(sprintf("%s_%s%s%s.png", host, year, month, day))
plot(data$Timestamp, data$CPU, main=title, type="h", col="light blue", xlab="Time", ylab="Ucpu%", lwd=1)
points(data$Timestamp, data$CPU, col="blue")
abline(h=mean(data$CPU), lty=2, col="red")
#s <- spline(data$CPU)
lines(smooth.spline(data$Timestamp, data$CPU, df = 10), lty = 3, col = "Dark Green")
#
#lines(s$x, s$y, type="b", pch=22, col="blue", lty=2)
dev.off()
Generating the CSV Data
The following script (CpuCsv.py) is used to select a subset of a VMSTAT output. File:CpuCsv.py
#!/usr/bin/env python
#
# Author: Peter Harding <plh@performiq.com.au>
#
# PerformIQ Pty. Ltd.
# Suite 230,
# Level 2,
# 1 Queens Road,
# MELBOURNE, VIC, 3004
#
# Mobile: 0418 375 085
#
#
# NAME
# cpucsv.py - Skeleton python script
#
# SYNOPSIS
# cpucsv.py [-dqv]
#
# PARAMETERS
# See __doc__ below
#
# DESCRIPTION
# ...
#
# RETURNS
# 0 for successful completion, 1 for any error
#
# FILES
# ...
#
#--------------------------------------------------------------------------
"""
Usage:
$ cpucsv.py [-dv] -p *.vmstat
Parameters:
-p Parse
-d Increment Debug level
-v Set Verbose
"""
#--------------------------------------------------------------------------
import os
import re
import sys
import time
import getopt
import random
import pickle
import pprint
import logging
import urllib
from datetime import datetime
from performiq import Enum, Logger
#--------------------------------------------------------------------------
__at_id__ = "@(#) cpucsv.py [2.3.01] 2013-02-20"
__version__ = re.sub(r'.*\[([0-9.]*)\].*', r'\1', __at_id__)
quiet_flg = False
verbose_flg = False
debug_level = 0
table_name = "people"
DSERVER_PORT = 9570
MAX_REQUESTS = 200
lf = None
log = None
LOG_DIR = "/tmp"
home_dir = None
p_crlf = re.compile(r'[\r\n]*')
p_hour = re.compile(r'([0-9]*):')
pp = pprint.PrettyPrinter(indent=3)
#==========================================================================
class Vmstats:
TotalCount = 0
#--------------------------------------------------------------------
@classmethod
def count_row(cls):
cls.TotalCount += 1
#--------------------------------------------------------------------
def __init__(self, row):
Vmstats.count_row()
cols = row.split()
m = p_hour.search(cols[1])
if m:
self.Hour = int(m.group(1))
self.Date = cols[0]
self.Time = cols[1]
self.Interrupts = cols[12]
self.ContextSwitches = cols[13]
self.User = cols[14]
self.System = cols[15]
self.Idle = cols[16]
self.WaitIO = cols[17]
self.st = cols[18]
#--------------------------------------------------------------------
def __str__(self):
return "%s %s,%s,%s,%s,%s,%s,%s" % (
self.Date,
self.Time,
self.Interrupts,
self.ContextSwitches,
self.User,
self.System,
self.Idle,
self.WaitIO,
)
#--------------------------------------------------------------------
#==========================================================================
# And here is the real work...
def do_work(fname):
Logger.Info("[do_work]")
p_name = re.compile(r'([^_]*)_([0-9]*)_([0-9]*)')
m = p_name.search(fname)
if m:
hostname = m.group(1)
run_date = m.group(2)
print hostname, run_date
else:
print "Bad file name - %s" % fname
fname_in = "%s.vmstat" % fname
fname_out = "%s_%s.csv" % (hostname, run_date)
try:
f_in = open(fname_in, 'r')
except IOError, msg:
sys.stderr.write(fname_in + ': cannot open: ' + `msg` + '\n')
sys.exit(1)
try:
f_out = open(fname_out, 'w+')
except IOError, msg:
sys.stderr.write(fname_out + ': cannot open: ' + `msg` + '\n')
sys.exit(1)
p_procs = re.compile(r'procs')
p_hdr = re.compile(r'free')
# Skip header lines
f_in.readline()
f_in.readline()
f_in.readline()
f_out.write("DateTime,Interrupts,ContextSwitches,User,System,Idle,WaitIO\n")
while True:
line = f_in.readline()
if not line: break
# Truncate EoL markers from end of line
line = p_crlf.sub('', line) # or 'line = line[:-1]'
m = p_procs.search(line)
if m: continue
if p_hdr.search(line): continue
stats = Vmstats(line)
print stats
if stats.Hour >= 13:
f_out.write("%s\n" % (stats, ))
if stats.Hour > 17:
break
f_in.close()
f_out.close()
#=========================================================================
def usage():
print __doc__
#-------------------------------------------------------------------------
def main(argv):
global debug_level
global quiet_flg
global verbose_flg
global target
global home_dir
try:
home_dir = os.environ['HOME']
except:
print "Set HOME environment variable and re-run"
sys.exit(0)
Modes = Enum(["Info", "Parse", ])
mode = Modes.Info
filename = "test"
filenames = []
try:
opts, args = getopt.getopt(argv, "dD:f:hpqvV?",
("debug", "debug-level=", "file=", "help", "quiet", "verbose", "version"))
except getopt.error, msg:
usage()
return 1
for opt, arg in opts:
if opt in ("-?", "-h", "--help"):
usage()
return 0
elif opt in ('-d', '--debug'):
debug_level += 1
elif opt in ('-D', '--debug-level'):
debug_level = int(arg)
elif opt in ('-f', '--file'):
mode = Modes.Parse
filenames.append(arg)
elif opt in ('-p',):
mode = Modes.Parse
elif opt in ('-q', '--quiet'):
quiet_flg = True
elif opt in ('-v', '--verbose'):
verbose_flg = True
elif opt in ('-V', '--version'):
if quiet_flg:
print __version__
else:
print "[cpucsv] Version: %s" % __version__
return 1
else:
usage()
return 1
p_vmstat = re.compile(r'(.*).vmstat')
for arg in args:
m = p_vmstat.search(arg)
if m:
filenames.append(m.group(1))
else:
print "Bad filename [%s]" % arg
sys.stderr.write("[cpucsv] Working directory is %s\n" % os.getcwd())
if (debug_level > 0): sys.stderr.write("[cpucsv] Debugging level set to %d\n" % debug_level)
sys.stderr.flush()
Logger.Init(name='cpucsv')
if mode == Modes.Info:
Logger.Info('Info')
elif mode == Modes.Parse:
Logger.Info('Parsing')
for filename in filenames:
do_work(filename)
else:
Logger.Info('Nothing to do')
return 0
#--------------------------------------------------------------------------
if __name__ == '__main__' or __name__ == sys.argv[0]:
try:
sys.exit(main(sys.argv[1:]))
except KeyboardInterrupt, e:
print "[cpucsv] Interrupted!"
#--------------------------------------------------------------------------
"""
Revision History:
Date Who Description
-------- --- ------------------------------------------------------------
20031014 plh Initial implementation
20111101 plh Add in Enums for modal behaviour
20130220 plh Reconstructed performiq module
Problems to fix:
To Do:
Issues:
"""
