### Title: ECOL592 Introduction to R Lecture 2
### Date created: 20140105
### Last updated: 20140126
###
###
### Author: Michael Koontz
###
###
### Intention: Script file for ECOL592 Intro to R Lecture 2. Data structures, accessing data, describing data
rm(list=ls())
###
###
### Part 1: Data structures
###
###
num.of.tree.species <- 21 # numeric
fruit.1 <- "apples" # character
c(1,2,3,6,86,10) # vector; using the c() function (c stands for 'concatenate'); links elements together to form a single vector
1:10 # vector
years.active <- 1985:2013 # Now I can always go back and call 'years.active' to retrieve this vector. How?
# A matrix
plot.layout <- matrix(data=1:9, nrow=3, ncol=3)
### How can you tell what kind of variable it is?
class(num.of.tree.species)
class(plot.layout)
### 'Coercing' variables to be a different class (that you know they should be)
foo <- "46"
class(x=foo)
foo * 5
bar <- as.numeric(x=foo)
bar * 5
class(x=bar)
### Or, to overwrite, how would you redefine foo?
###
### Function recap
###
# Takes in arguments
# Does something to those arguments
# Returns ONE thing
#
# We looked at log() and exp()
# Function that RETURNS a vector
seq(from=1, to=10, by=2) # vector
?seq # To get help on a function
# Functions that TAKE IN vectors as arguments
mean(years.active)
sd(years.active)
###
###
### Part 2: Vectorization
###
###
### Vectorization operations in R -- Super important concept
# R can be wicked fast in calculating vector algebra and matrix algebra
years.active - 1900
height <- c(62, 69, 71)
weight <- c(130, 185, 205)
wgt.hgt.ratio <- weight / height
wgt.hgt.ratio
####################
### Common Error!###
####################
# Vector lengths don't match up
height2 <- c(65, 67)
weight2 <- c(140, 150, 160)
wgt.hgt.ratio2 <- weight2 / height2
### Using a dataset built in to R. ToothGrowth describes the length of guinea pig teeth at three doses of vitamin C, with two different delivery methods (orange juice or ascorbic acid)
###
###
### Part 3: The most important data structure
###
###
ToothGrowth # Look at the variable!
# A data.frame is a useful data structure in R that can have many columns, each of which can be a different data type
# Often the data structure used for analysis, and we'll spend most of our time using it
###
###
### Part 4: Descriptions of data structures
###
###
# What is/are the argument(s) for the str()? How do you find out?
?str
str(object=ToothGrowth) # A general description of your variable
str(object=fruit.1)
str(object=years.active)
# length() takes a vector argument and returns a single number representing the length of that vector
# What is the length of 'years.active' ?
length(x=years.active)
# dim() takes a data.frame argument and returns a 2-element vector representing the number of rows and columns
# of that data.frame
dim(x=ToothGrowth) # rows first, then columns
### In class check
# What is wrong with dim(x=years.active)?
names(x=ToothGrowth) # returns the names of the data.frame
# You can use this function to change the names of the data.frame by assigning a vector of different names to names(ToothGrowth)
### In class check
# Use the assignment operator and the c() function to change all of the names in the ToothGrowth data frame to be 'length', 'delivery', and 'dose'
# data.frame too big? Look at the first or the last 6 rows with head() or tail()
# This is handy to get a sense of what the data look like and what the column names are
head(x=ToothGrowth)
tail(x=ToothGrowth)
###
###
### Part 5: Accessing data structures
###
###
## Bracket notation
years.active
years.active[4] # accesses the 4th element of the vector
years.active[5:7] #accesses the 5th, 6th, 7th elements of the vector
years.active[c(2,5,10)] #accesses the 2nd, 5th, and 10th elements of the vector
### Teaser for subsetting:
years.active[years.active < 1994] #use conditional statements within the brackets to subset
### In class check:
# What will dim(ToothGrowth)[1] return?
# Brackets for data frames
ToothGrowth[3, 3] # accesses the element in the 3rd row and the 4th column
ToothGrowth[3, ] # accesses the entire 3rd row (all columns)
ToothGrowth[ , 2] # accesses the entire 4th column
# $ followed by column name
ToothGrowth$delivery # accesses the whole 'delivery' column
ToothGrowth$length[4] # accesses the 4th element in the length column
### Using a dataset built in to R. CO2 describes the carbon dioxide uptake of different plants in different conditions
# Look at the variable
head(CO2)
# Describe the variable
str(CO2)
dim(CO2)
#Add a new column using $
CO2$block <- 1
head(CO2)
CO2$proportion <- CO2$uptake / CO2$conc
head(CO2)
########### End of second lecture