### Title: ECOL592 Introduction to R Lecture 11 ### Date created: 20140428 ### Last updated: 20140505 ### ### ### Author: Michael Koontz ### ### ### Intention: Script file for ECOL592 Intro to R Lecture 11. Finish matplot(), introduce par(mfrow=), user-defined functions, and source() file.choose() # Find the beetles.csv file beetles <- read.csv("/Users/mikoontz/Documents/R/ECOL592 Introduction to R/Lectures/ECOL592 Intro to R Lecture 10/beetles.csv") head(beetles) ### We need to transpose this data frame in order to use matplot() such that each column represents a single series ### We can use the t() function ### Here I'm only transposing the columns that have the actual population sizes so that our resulting matrix is entirely numeric transposed.beetles <- t(beetles[, 3:ncol(beetles)]) # In parts: 3:ncol(beetles) beetles[, 3:ncol(beetles)] t(beetles[, 3:ncol(beetles)]) colnames(transposed.beetles) <- beetles$ID head(transposed.beetles) ### By default, matplot() will plot each new series in a new color and a new plotting character, which is very slow with even moderate-sized data sets. Be sure to fix the type, color, and line type as a first pass ### Pass a vector specifying your x axis as the first argument, then the matrix to plot in the second argument matplot(x=0:7, y=transposed.beetles, type="l", col=1, lty=1) ### You can still use the indexing from the original (non transposed) data frame to inform your plot. Just remember that rows match up with columns and columns match up with rows. # The following line returns our logical indexing vector for when the Environment is "fluctuating." Normally we put this in the row indexing area in bracket notation, but we want to specify columns in our transposed data frame instead beetles$Environment=="fluctuating" fluctuating.idx <- beetles$Environment=="fluctuating" # Normally beetles[fluctuating.idx, ] # With the transposed data frame transposed.beetles[, fluctuating.idx] matplot(x=0:7, y=transposed.beetles[, fluctuating.idx], col="red", type="l", lty=1) ### matplot() has an add= argument that we use to add data to an existing plot stable.idx <- beetles$Environment=="stable" stable.idx matplot(x=0:7, y=transposed.beetles[, stable.idx], col="blue", type="l", lty=1, add=TRUE) ### Try using the par(mfrow=) parameter specification to fit multiple plots onto one window ### The mfrow argument takes a 2 element numeric vector corresponding to the number of rows and the number of columns you want your plots to take up par(mfrow=c(1,2)) ### Also note I've taken away the add=TRUE argument in the second call to matplot() and specified the y axis limits so they are on the same scale matplot(x=0:7, y=transposed.beetles[, fluctuating.idx], col="red", type="l", lty=1, ylim=c(0,120)) matplot(x=0:7, y=transposed.beetles[, stable.idx], col="blue", type="l", lty=1, ylim=c(0,120)) ### ### ### User-defined functions ### ### # The logit function ("log odds") takes a proportion on the interval (0,1) and puts it on the interval (-Inf, +Inf) x <- 0.1 logit.x <- log(x/(1-x)) logit.x curve(log(x/(1-x)), from=0, to=1) # As you might expect, the inverse logit function takes a number on the interval (-Inf, +Inf) and puts it on the interval (0,1) x <- 1.2 inv.logit.x <- exp(x)/(1+exp(x)) inv.logit.x curve(exp(x)/(1+exp(x)), from=-4, to=4) # These calculations are useful especially when doing logistic regressions, but there aren't any built-in functions in R to do them # So we can build our own! # This really gets to the heart of programming in R. Most things you (especially if you do them a lot) should be put into a function. # Remember the 3 parts of a function: # 1) It takes in arguments # 2) It does something to those arguments # 3) It returns ONE thing # Here, our new function takes in an "x" argument, calculates the logit of that "x," and returns the result as a numeric variable logit <- function(x) { logit.x <- log(x/(1-x)) logit.x } logit(0.6) proportion <- 0.6 logit.proportion <- logit(proportion) logit.proportion # Vectorization still works how it does with native functions proportions <- c(0.6, 0.7, 0.1) logit.proportions <- logit(proportions) logit.proportions # In class check: Create a function that calculates the inverse logit of the argument passed to it, and returns it ### ### ### Error bars in a function ### ### # Some of you have used the arrows() function to make error bars around a plot. Let's build that into a function. # Create "theoretical" data (credit: Clint Leach) set.seed(50) population.size <- matrix(data=0, nrow=15, ncol=10) population.size[,1] <- rpois(15, lambda=20) head(population.size) # These parameters are for our population growth model R <- 1.5 #Intrinsic, density independent rate of growth alpha <- 0.002 #Density dependent parameter for (i in 2:ncol(population.size)) { lambda <- population.size[,i-1] * R * exp(-alpha*population.size[,i-1]) # The mean of the next population size will depend on the previous population size population.size[,i] <- rpois(15, lambda=lambda) # We use 25 draws from a Poisson distribution, one draw for each replicate population } # Take a look head(population.size) # Here, I've caluclated the standard error for the population size at each time step by using the apply() function by column (MARGIN=2) SE <- apply(population.size, MARGIN=2, FUN=function(x) sd(x)/sqrt(length(x))) SE # Calculate the mean population size pop.means <- apply(population.size, MARGIN=2, FUN=mean) pop.means par(mfrow=c(1,1)) #Reset our graphics parameters so we only have 1 graph per Quartz window # Here we can look at all of the iterations at once matplot(1:10, t(population.size), type="l", lty=1, col=1) # Remember to transpose the matrix by using the t() function so that each series is contained in a separate column (we had each series in a separate row before) # Or we can just look at the means plot(1:10, pop.means, type="l", ylim=c(0,175)) # With arrows(), you specify where you want the arrow to start (x0, y0) and where you want it to stop (x1, y1). We can use vectors for each of those 4 coordinates. So we want to keep the original x values (1:10) for both the start and stop, and for the y values, we want to start at 1 standard error below the mean and finish 1 standard error above the mean # The code= argument specifies whether you don't want an arrow head (code=0), whether you want it on the head or the the tail of the arrow, or both. If you do use the arrow head, use the angle=90 to make it flat, and the length= argument to adjust the length arrows(x0=1:10, y0=pop.means-SE, x1=1:10, y1=pop.means+SE, code=0) # Or... plot(1:10, pop.means, type="l", ylim=c(0,175)) arrows(x0=1:10, y0=pop.means-SE, x1=1:10, y1=pop.means+SE, code=3, length=0.1, angle=90) # Put it in a function! error.bars <- function(x, y, offset) { arrows(x0=x, y0=y-offset, x1=x, y1=y+offset, code=3, length=0.1, angle=90) } plot(1:10, pop.means, type="l", ylim=c(0,175)) error.bars(x=1:10, y=pop.means, offset=SE) # Save the function in a different file and use the source() function in any other script to be able to use those user-defined functions. file.choose() source() #################### End of Lecture 11 ### Also cool, but we won't talk about it par(mar=c(7,10,3,4)+0.1, mgp=c(5,1.5,0)) matplot(1:10, t(population.size), type="l", lty=1, col=1, las=1, main="Ricker Population Dynamics", ylab="N", cex.main=3, cex.axis=2, cex.lab=3, xlab="Generation") # Our matrix plot like usual polygon(x=c(1:10, 10:1), y=c(pop.means+SE, rev(pop.means-SE)), col="gray", lty=0) # Use the polygon function to add a polygon to an existing plot. It takes two important arguments, x= and y= . You have to specify all of the vertices of the closed polygon. We can "shade" the standard error region by using all of the x values with the y values plus the standard error, and then IN REVERSE DIRECTION the same x values but the y values minus the standard error. lines(1:10, pop.means, col="red", lwd=3) # The parts: # For x: c(1:10, 10:1) # For y: (note the rev() function reverses the order of the vector) c(pop.means+SE, rev(pop.means-SE)) ### ### ### More bonus plotting ### ### matplot(1:10, t(population.size), type="l", lty=1, col=1, las=1, main="Ricker Population Dynamics", ylab=NA, cex.main=3, cex.axis=2, cex.lab=3, xlab="Generation") # Recreate ### ### The las= argument in mtext(). "Margin text" ### mtext(side=2, las=1, text="N", line=6, cex=3) # Use the las=1 argument to turn the label so it is parallel to the y axis # Use the paste() function when you want a variable in with your character string. For instance in your plot title, let's add the number of generations, but use a variable so when that number changes so will our title matplot(1:10, t(population.size), type="l", lty=1, col=1, las=1, main=paste("Ricker Dynamics for", ncol(population.size), "Generations", sep=" "), ylab=NA, cex.main=3, cex.axis=2, cex.lab=3, xlab="Generation") # Use the expression label for "pretty print." You can do all sorts of things with this. Greek letters, fractions, subscripts, superscripts, summation notation, product notation, etc. mtext(side=2, las=1, text=expression(N[t]), line=6, cex=3) # Use text() to add text within the plot window. You can use "pretty print" here too # Use expression() and paste() together to combine pretty print with character text text(x=1, y=150, labels=expression(paste(N[t+1]," = ", RN[t]*e^paste(-alpha,N[t]) )) , pos=4, cex=1.5) # Use bquote() when you want pretty print, variables, AND text. Anything wrapped in a .() gets evaluated, anything else is treated as an expression. Use ~ to denote a space text(x=1, y=132, labels=bquote(alpha~"="~.(alpha)), pos=4, cex=1.5) text(x=1, y=114, labels=bquote(R~"="~.(R)), pos=4, cex=1.5) polygon(x=c(1:10, 10:1), y=c(pop.means+SE, rev(pop.means-SE)), col="gray", lty=0) lines(1:10, pop.means, col="red", lwd=3) ### ### ### Even more advanced plotting ### ### ### Especially for journal publication, there are very specific expectations for figures ### Use the extra arguments in the pdf() function to specify the size of plots in inches, the font size (remember to go back and erase any cex= arguments you used), and the font type names(pdfFonts()) # To get a list of fonts available ### You can use layout() in place of par(mfrow=) to get multiple panel plots of different sizes layout.matrix <- matrix(data=c(1,2,1,3), nrow=2, ncol=2) layout.matrix par(mar=c(2,10,3,2) + 0.1) layout(mat=layout.matrix, respect=TRUE) matplot(1:10, t(population.size), type="l", lty=1, col=1, las=1, main="Ricker Population Dynamics", ylab=NA, cex.main=3, cex.axis=2, cex.lab=3, xlab=NA) # par(mar=c(6,10,1,0) + 0.1) plot(1:10, pop.means, type="l", xlab=NA, ylim=c(0,175), ylab=NA, las=1, cex.axis=2) error.bars(x=1:10, y=pop.means, offset=SE) par(mar=c(6,8,1,2) + 0.1) plot(1:10, pop.means, type="l", xlab=NA, ylim=c(0,175), ylab=NA, las=1, cex.axis=2) polygon(x=c(1:10, 10:1), y=c(pop.means+SE, rev(pop.means-SE)), col="gray", lty=0) lines(1:10, pop.means, col="red", lwd=3) ### By specifying the outer= argument to be true in your mtext() function call, you can make a shared axis label for a paneled plot mtext(side=1, text="Generation", outer=TRUE, line=-1, cex=3, at=0.55) mtext(side=2, text=expression(N[t]), outer=TRUE, line=-25, las=1, cex=3) ### ### ### Bonus data manipulation tools ### ### # match() takes in a vector and a scalar that you are trying to find in the vector. It returns the index value of the FIRST instance of the thing you are trying to find test <- c(1, 435, 32, 1, 5, 12, 54, 2, 4, 1, 5, 2) match(2, test) # Just the population sizes in the beetles data frame head(beetles) head(beetles[,3:ncol(beetles)]) # Index value, per row, of the first time the population was 0. That is, the generation the population went extinct apply(beetles[,3:ncol(beetles)], MARGIN=1, FUN=function(x) match(0, x)) # Another example... set.seed(11) species.list <- c("QUEAGR", "QUEGAR", "QUEKEL", "QUEDEC", "AMOCAL", "PIEN", "PICO") plotting.colors <- rainbow(length(species.list)) species <- unlist(replicate(10, sample(species.list), simplify=FALSE)) saplings <- rnbinom(70, size=20, mu=50) trees <- rnbinom(70, size=10, mu=12) survey <- data.frame(species, saplings, trees) head(survey) ## par(mfrow=c(1,1), mar=c(4,4,4,4)+0.1) # Using match(), we can plot all groups by different colors in 1 line! plot(survey$saplings, survey$trees, col=plotting.colors[match(survey$species, species.list)], pch=19, main="Tree Count vs. Sapling Count") legend("topright", bty="n", legend=species.list, col=plotting.colors, pch=19) ### The parts: plotting.colors[match(survey$species, species.list)] match(survey$species, species.list) # Returns the index value of the first instance where each element in turn of survey$species is found in species.list. The species are in a random order in the survey data frame, but that doesn't matter if we use match(). When we have the index value of where each row's species occurs in species.list, we can just use that to index on the plotting.colors variable. ### ### ### ### Know how to use %in% ! It is like a compound 'or' statement, taking the place of many conditional statements linked together with | ### head(CO2) # I want all of the rows where the conc column is 95, 250, 500, or 675 # We could write out our logical indexing vector as: CO2$conc == 95 | CO2$conc == 250 | CO2$conc == 500 | CO2$conc == 675 # Or we could do the same thing with %in% CO2$conc %in% c(95, 250, 500, 675) indexing <- CO2$conc %in% c(95, 250, 500, 675) CO2[indexing, ] ### Additional functions to check out axis() # Suppress the axis from being drawn in the original plot() or matplot() call using xaxt="n" or yaxt="n" and then add your own afterwards. This way you can put the tick marks whereever you like, and label them whatever you like rainbow() # Takes in a number argument and creates a vector of that number of unique colors. Great for plotting when you don't want to hard code how many colors you are going to need. As you add more species, for instance, rainbow(number.of.species) will keep adding new colors for you jitter() # Offsets plotting characters by a random component so that all of the points don't stack up on each other