# R script to create a histogram pdf file from a predict-2nd bychain file # Usage: R --vanilla --quiet --args 'fname' < bychain2hist.r >& R.bychain2hist.log # input file: fname.by-chain # output file: fname.by-chain.hist.pdf infile.bychain.suffix <- ".by-chain" outfile.hist.pdf.suffix <- ".by-chain.hist.pdf" # find the fname argument after --args my.args <- commandArgs() my.argnum <- which(my.args == "--args") fname <- NULL fname <- my.args[my.argnum+1] if (!length(fname)) { print("Error: expecting --args 'fname'") quit() } file.prefix <- fname infile.bychain <- paste (file.prefix, infile.bychain.suffix, sep="") outfile.hist.pdf <- paste (file.prefix, outfile.hist.pdf.suffix, sep="") bychain.data <- read.table( infile.bychain, header=FALSE, as.is=TRUE) # expected format of by-chain file: ## chainID Bits_saved IDBits Q7 SOV object count SOV_E SOV_B SOV_G ... #1ollA 1.1881 ------- 0.6543 0.6722 2.1785 188 0.8434 1.0000 0.1667 ... #1jnrA 0.8839 ------- 0.6199 0.5452 1.7765 642 0.6928 0.0000 0.1071 ... bits.saved <- bychain.data$V2 objective <- bychain.data$V6 # sanity check bits.saved[1:5] objective [1:5] # ############################################################# # # PLOTTED OUTPUT BEGINS HERE # # ############# # histogram plots # function for plotting histograms with labels containing mean,sd my.plothist <- function (data, data.nm) { # arguments # data: data to be plotted # data.nm character string of name of data # get integer min and max of data to compute breaks # as.integer() truncates towards zero, need to round down for negative min if (min(data,na.rm=TRUE) < 0) data.min <- as.integer(min(data,na.rm=TRUE)) -1 if (min(data,na.rm=TRUE) >= 0) data.min <- as.integer(min(data,na.rm=TRUE)) data.max <- as.integer(max(data,na.rm=TRUE)) + 1 data.brk <- seq(data.min,data.max,0.1) data.mean <- mean(data,na.rm=TRUE) data.sd <- sd (data,na.rm=TRUE) data.sem <- data.sd/sqrt((length(data)-1)) data.main <- paste ("Histogram of ",data.nm, sep="") data.xlab <- paste ("mean=", formatC(data.mean,format="f",digits=2), " std.dev=", formatC(data.sd, format="f",digits=2), " std.err.mean=", formatC(data.sem, format="f",digits=3), sep="") hist (data, breaks = data.brk, main = data.main, xlab = data.xlab) } # end of function # set output device and filename pdf(outfile.hist.pdf) my.plothist (bits.saved, "bits.saved") my.plothist (objective, "objective") # restore output device to default dev.off() quit()