Sunday, August 23, 2015

R cheatsheet: data type and data structure

# mode represents how an object is stored in memory
# (numeric, character, list and function)
#
# class represents its abstract type.


# mode function: return the mode of R objects
num.obj <- seq(from=1, to=10, by=2)
mode(num.obj)

logical.obj <- c(TRUE, TRUE, FALSE, FALSE, FALSE)
mode(logical.obj)

character.obj <- c("a", "b", "c")
mode(character.obj)

is.numeric(num.obj)

is.character(num.obj)
is.logical(logical.obj)
is.numeric(logical.obj)

mode(mean)
is.function(mean)

# class function: return the class information of R objects
class(num.obj)
class(logical.obj)
class(character.obj)

typeof(num.obj)

mat.obj <- matrix(runif(9), ncol = 3, nrow = 3)
mode(mat.obj)
class(mat.obj)
print(mat.obj)
print(num.obj)

# factor: for categorical data
character.obj
is.factor(character.obj)
is.character(character.obj)
factor.obj <- as.factor(character.obj)
is.factor(factor.obj)
mode(factor.obj)
class(factor.obj)

factor1 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9))
factor1
levels(factor1)
labels(factor1)
factor2 <- factor(c(1, 2, 3, 4, 5, 6, 7, 8, 9), labels=letters[1:9])
factor2
levels(factor2)
labels(factor2)

# data frame
var1 <- c(101, 102, 103, 104, 105)
var2 <- c(25, 22, 29, 34, 33)
var3 <- c("Non-Diabetic", "Diabetic", "Non-Diabetic", "Non-Diabetic", "Diabetic")
var4 <- factor(c("male", "male", "female", "female", "male"))
diab.dat <- data.frame(var1, var2, var3, var4)
diab.dat
summary(diab.dat)
class(diab.dat)
mode(diab.dat)
typeof(diab.dat)

# matrices
mat.diab <- as.matrix(diab.dat)
class(mat.diab)
mode(mat.diab)
typeof(mat.diab)

num.mat <- matrix(rnorm(9), nrow = 3, ncol = 3)
num.mat
class(num.mat)
mode(num.mat)
t(num.mat)
t(num.mat) %*% num.mat

# array: can be of any number of dimensions
mat.array = array(dim = c(2, 2, 3))
mat.array[,,1] <- rnorm(4)
mat.array[,,2] <- rnorm(4)
mat.array[,,3] <- rnorm(4)

mat.array

obj.list <- list(elem1=var1, elem2=var2, elem3=var3,
                 elem4=var4, elem5=diab.dat, elem6=mat.array)

obj.list[1]
obj.list[[1]]
obj.list[[1]][2]

# Date variables
as.Date("1970-01-01")
as.numeric(as.Date("1970-01-01"))

as.numeric(as.Date("1970-01-02"))
as.Date("Jan-01-1970", format="%b-%d-%Y")

Sys.time()
substr(as.character(Sys.time()), 1, 10)
substr(as.character(Sys.time()), 12, 19)

# number of seconds since 1 January 1970
unclass(Sys.time())

# POSIXct: the signed number of seconds since the beginning of 1970 as a numeric vector
# more convenient for including in dataframes

# POSIXlt: a named list of vectors
# representing seconds, minutes, hours, days, months and years

date <- as.POSIXlt(Sys.time())
date
date$wday
date$yday
mode(date)
class(date)
typeof(date)
unclass(date)
date2 <- unlist(unclass(date))
class(date2)
mode(date2)
typeof(date2)
length(date)
length(date2)

date <- as.POSIXct(Sys.time())
date
unclass(date)
unlist(unclass(date))
class(date)
mode(date)
typeof(date)
length(date)

date.str <- "2015-12-02"
date4 <- as.POSIXct(date.str, format="%Y-%m-%d")
datetime.str <- "2015-12-02 16:30:00"
date5 <- as.POSIXct(datetime.str, format="%Y-%m-%d %H:%M:%S", tz="EST")
date5