Graphing with R

From PeformIQ Upgrade
Revision as of 17:17, 17 September 2013 by PeterHarding (talk | contribs) (→‎Generating the CSV Data)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Processing Data on WIndows 7

I use the following BAT file to batch up R in a folder containing multiple data sets.


@echo off

set R_TERM="C:\Apps\R\R-3.0.1\bin\i386\Rterm.exe"

set FILES=*.csv

for %%F in (%FILES%) DO  (
    ECHO Processing %%F
    %R_TERM% --no-restore --no-save --args %%F < plot.r >  plot.out 2>&1
)

File:Plot.bat script

R Script File


args <- commandArgs(trailingOnly = TRUE)

print(args)

name <- args[1]

regexp <- "([^_]*)_([[:digit:]]{4})([[:digit:]]{2})([[:digit:]]{2}).*"

host <- sub(pattern=regexp, replacement="\\1", x=name)
year <- sub(pattern=regexp, replacement="\\2", x=name)
month <- sub(pattern=regexp, replacement="\\3", x=name)
day <- sub(pattern=regexp, replacement="\\4", x=name)

date <- sprintf("%s-%s-%s", year, month, day)

rm(args)

data_file <- sprintf("%s_%s%s%s.csv", host, year, month, day)

print(data_file)

data <- read.csv(data_file, header=T)


# data$Timestamp <- strptime(paste(data$Date, data$Time), "%Y-%m-%d %H:%M:%S")

data$Timestamp <- strptime(data$DateTime, "%Y-%m-%d %H:%M:%S")

title <- sprintf("%s - %s - %% CPU Utilization", date, host)

data$CPU <- data$User + data$System

print(title)

png(sprintf("%s_%s%s%s.png", host, year, month, day))

plot(data$Timestamp, data$CPU, main=title, type="h", col="light blue", xlab="Time", ylab="Ucpu%", lwd=1)

points(data$Timestamp, data$CPU, col="blue")

abline(h=mean(data$CPU), lty=2, col="red")

#s <- spline(data$CPU)

lines(smooth.spline(data$Timestamp, data$CPU, df = 10), lty = 3, col = "Dark Green")
#
#lines(s$x, s$y, type="b", pch=22, col="blue", lty=2)

dev.off()

File:Plot.r

Lnfsna51 20130814.png File:Lnfsna51 20130814.csv

Generating the CSV Data

The following script (CpuCsv.py) is used to select a subset of a VMSTAT output. File:CpuCsv.py

#!/usr/bin/env python
#
#       Author:  Peter Harding  <plh@performiq.com.au>
#
#                PerformIQ Pty. Ltd.
#                Suite 230,
#                Level 2,
#                1 Queens Road,
#                MELBOURNE, VIC, 3004
#
#                Mobile:  0418 375 085
#
#
# NAME
#   cpucsv.py - Skeleton python script
#
# SYNOPSIS
#   cpucsv.py [-dqv]
#
# PARAMETERS
#   See __doc__ below
#
# DESCRIPTION
#   ...
#
# RETURNS
#   0 for successful completion, 1 for any error
#
# FILES
#   ...
#
#--------------------------------------------------------------------------
"""
Usage:

   $ cpucsv.py [-dv] -p *.vmstat


Parameters:

   -p              Parse
   -d              Increment Debug level
   -v              Set Verbose

"""
#--------------------------------------------------------------------------

import os
import re
import sys
import time
import getopt
import random
import pickle
import pprint
import logging
import urllib

from datetime import datetime

from performiq import Enum, Logger

#--------------------------------------------------------------------------

__at_id__     = "@(#)  cpucsv.py  [2.3.01]  2013-02-20"
__version__   = re.sub(r'.*\[([0-9.]*)\].*', r'\1', __at_id__)

quiet_flg     = False
verbose_flg   = False

debug_level   = 0

table_name    = "people"
DSERVER_PORT  = 9570
MAX_REQUESTS  = 200

lf            = None
log           = None

LOG_DIR       = "/tmp"
home_dir      = None

p_crlf        = re.compile(r'[\r\n]*')
p_hour        = re.compile(r'([0-9]*):')

pp            = pprint.PrettyPrinter(indent=3)

#==========================================================================

class Vmstats:
    TotalCount = 0

    #--------------------------------------------------------------------

    @classmethod
    def count_row(cls):
        cls.TotalCount += 1

   #--------------------------------------------------------------------

    def __init__(self, row):
        Vmstats.count_row()

        cols = row.split()

        m = p_hour.search(cols[1])

        if m:
            self.Hour             = int(m.group(1))

        self.Date             = cols[0]
        self.Time             = cols[1]

   
        self.Interrupts       = cols[12]
        self.ContextSwitches  = cols[13]
        self.User             = cols[14]
        self.System           = cols[15]
        self.Idle             = cols[16]
        self.WaitIO           = cols[17]
        self.st               = cols[18]

   #--------------------------------------------------------------------

    def __str__(self):
        return "%s %s,%s,%s,%s,%s,%s,%s" % (
                   self.Date,
                   self.Time,
                   self.Interrupts,
                   self.ContextSwitches,
                   self.User,
                   self.System,
                   self.Idle,
                   self.WaitIO,
              )

   #--------------------------------------------------------------------

#==========================================================================
# And here is the real work...

def do_work(fname):
    Logger.Info("[do_work]")

    p_name = re.compile(r'([^_]*)_([0-9]*)_([0-9]*)')

    m = p_name.search(fname)

    if m:
        hostname = m.group(1)
        run_date = m.group(2)
        print hostname, run_date
    else:
        print "Bad file name - %s" % fname

    fname_in  = "%s.vmstat" % fname
    fname_out = "%s_%s.csv" % (hostname, run_date)

    try:
        f_in = open(fname_in, 'r')
    except IOError, msg:
        sys.stderr.write(fname_in + ': cannot open: ' + `msg` + '\n')
        sys.exit(1)

    try:
        f_out = open(fname_out, 'w+')
    except IOError, msg:
        sys.stderr.write(fname_out + ': cannot open: ' + `msg` + '\n')
        sys.exit(1)

    p_procs = re.compile(r'procs')
    p_hdr   = re.compile(r'free')


    # Skip header lines

    f_in.readline()
    f_in.readline()
    f_in.readline()

    f_out.write("DateTime,Interrupts,ContextSwitches,User,System,Idle,WaitIO\n")

    while True:
        line = f_in.readline()

        if not line: break

        #  Truncate EoL markers from end of line

        line = p_crlf.sub('', line)  # or 'line = line[:-1]'

        m = p_procs.search(line)

        if m:  continue

        if p_hdr.search(line):  continue

        stats = Vmstats(line)

        print stats

        if stats.Hour >= 13:
            f_out.write("%s\n" % (stats, ))

        if stats.Hour > 17:
	    break

    f_in.close()
    f_out.close()

#=========================================================================

def usage():
    print __doc__

#-------------------------------------------------------------------------

def main(argv):
    global debug_level
    global quiet_flg
    global verbose_flg
    global target
    global home_dir

    try:
        home_dir = os.environ['HOME']
    except:
        print "Set HOME environment variable and re-run"
        sys.exit(0)

    Modes      = Enum(["Info", "Parse", ])

    mode       = Modes.Info
    filename   = "test"
    filenames  = []

    try:
        opts, args = getopt.getopt(argv, "dD:f:hpqvV?",
                ("debug", "debug-level=", "file=", "help", "quiet", "verbose", "version"))
    except getopt.error, msg:
        usage()
        return 1

    for opt, arg in opts:
        if opt in ("-?", "-h", "--help"):
            usage()
            return 0
        elif opt in ('-d', '--debug'):
            debug_level                       += 1
        elif opt in ('-D', '--debug-level'):
            debug_level                        = int(arg)
        elif opt in ('-f', '--file'):
            mode = Modes.Parse
            filenames.append(arg)
        elif opt in ('-p',):
            mode = Modes.Parse
        elif opt in ('-q', '--quiet'):
            quiet_flg                          = True
        elif opt in ('-v', '--verbose'):
            verbose_flg                        = True
        elif opt in ('-V', '--version'):
            if quiet_flg:
                print __version__
            else:
                print "[cpucsv]  Version: %s" % __version__
            return 1
        else:
           usage()
           return 1

    p_vmstat = re.compile(r'(.*).vmstat')

    for arg in args:
        m = p_vmstat.search(arg) 
        if m:
             filenames.append(m.group(1))
        else:
             print "Bad filename [%s]" % arg

    sys.stderr.write("[cpucsv]  Working directory is %s\n" % os.getcwd())

    if (debug_level > 0): sys.stderr.write("[cpucsv]  Debugging level set to %d\n" % debug_level)

    sys.stderr.flush()

    Logger.Init(name='cpucsv')

    if mode == Modes.Info:
        Logger.Info('Info')
    elif mode == Modes.Parse:
        Logger.Info('Parsing')
        for filename in filenames:
            do_work(filename)
    else:
        Logger.Info('Nothing to do')

    return 0

#--------------------------------------------------------------------------

if __name__ == '__main__' or __name__ == sys.argv[0]:
    try:
        sys.exit(main(sys.argv[1:]))
    except KeyboardInterrupt, e:
        print "[cpucsv]  Interrupted!"

#--------------------------------------------------------------------------

"""
Revision History:

     Date     Who   Description
   --------   ---   ------------------------------------------------------------
   20031014   plh   Initial implementation
   20111101   plh   Add in Enums for modal behaviour
   20130220   plh   Reconstructed performiq module

Problems to fix:

To Do:

Issues:


"""