Mahbubul Majumder, PhD
Sep 16, 2014
x <- c(3,5,9,2,8,7)
x
[1] 3 5 9 2 8 7
x[3]
[1] 9
x[-3]
[1] 3 5 2 8 7
i <- c(1,3,5)
x[i]
[1] 3 9 8
y <- matrix(x,ncol=3)
y
[,1] [,2] [,3]
[1,] 3 9 8
[2,] 5 2 7
y[2,3]
[1] 7
y[2,]
[1] 5 2 7
j <- sample(1:3,size=9,T)
y[2,j] = ?
x > 5
[1] FALSE FALSE TRUE FALSE TRUE TRUE
y
[,1] [,2] [,3]
[1,] 3 9 8
[2,] 5 2 7
i <- y > 5
i
[,1] [,2] [,3]
[1,] FALSE TRUE TRUE
[2,] FALSE FALSE TRUE
y[i]
[1] 9 8 7
k <- c(T,F,T)
y[,k]
[,1] [,2]
[1,] 3 8
[2,] 5 7
k <- y[2,] < 7
y[,k]
[,1] [,2]
[1,] 3 9
[2,] 5 2
df <- data.frame(y)
df
X1 X2 X3
1 3 9 8
2 5 2 7
names(df)
[1] "X1" "X2" "X3"
df[,c("X1","X3")]
X1 X3
1 3 8
2 5 7
ncol() nrow() colMeans() rowMeans()
df[,c(T,F,T)]
X1 X3
1 3 8
2 5 7
df$X1
[1] 3 5
df[df$X1==3,]
X1 X2 X3
1 3 9 8
df[[3]]
[1] 8 7
df[1]
& df[[1]]
?df[1]
X1
1 3
2 5
df[[1]]
[1] 3 5
str(df[1])
'data.frame': 2 obs. of 1 variable:
$ X1: num 3 5
df[1:2]
df[1:2,]
df[,1:2]
?df[1:2]
X1 X2
1 3 9
2 5 2
df[1:2,]
X1 X2 X3
1 3 9 8
2 5 2 7
df[,1:2]
X1 X2
1 3 9
2 5 2
z <- list(x,y,df)
z
[[1]]
[1] 3 5 9 2 8 7
[[2]]
[,1] [,2] [,3]
[1,] 3 9 8
[2,] 5 2 7
[[3]]
X1 X2 X3
1 3 9 8
2 5 2 7
z[[2]]
[,1] [,2] [,3]
[1,] 3 9 8
[2,] 5 2 7
z[[3]][3]
X3
1 8
2 7
z[[3]][[3]]
[1] 8 7
unlist(z)
X11 X12 X21 X22 X31 X32
3 5 9 2 8 7 3 5 9 2 8 7 3 5 9 2 8 7
tips <- read.csv("http://www.ggobi.org/book/data/tips.csv")
n <- 15 #sample size
rows <- nrow(tips)
indx <- sample(seq(rows),n)
tips[indx,]
obs totbill tip sex smoker day time size
66 66 20.08 3.15 M No Sat Night 3
31 31 9.55 1.45 M No Sat Night 2
195 195 16.58 4.00 M Yes Thu Day 2
35 35 17.78 3.27 M No Sat Night 2
217 217 28.15 3.00 M Yes Sat Night 5
79 79 22.76 3.00 M No Thu Day 2
202 202 12.74 2.01 F Yes Thu Day 2
211 211 30.06 2.00 M Yes Sat Night 3
231 231 24.01 2.00 M Yes Sat Night 4
82 82 16.66 3.40 M No Thu Day 2
166 166 24.52 3.48 M No Sun Night 3
86 86 34.83 5.17 F No Thu Day 4
216 216 12.90 1.10 F Yes Sat Night 2
76 76 10.51 1.25 M No Sat Night 2
32 32 18.35 2.50 M No Sat Night 4
myColumns <- c("tip", "day", "size")
tips[indx, myColumns]
tip day size
66 3.15 Sat 3
31 1.45 Sat 2
195 4.00 Thu 2
35 3.27 Sat 2
217 3.00 Sat 5
79 3.00 Thu 2
202 2.01 Thu 2
211 2.00 Sat 3
231 2.00 Sat 4
82 3.40 Thu 2
166 3.48 Sun 3
86 5.17 Thu 4
216 1.10 Sat 2
76 1.25 Sat 2
32 2.50 Sat 4
df
X1 X2 X3
1 3 9 8
2 5 2 7
subset(df,X1==3)
X1 X2 X3
1 3 9 8
subset(df,select=c('X1','X3'))
X1 X3
1 3 8
2 5 7
subset(tips, size > 5, select = myColumns)
tip day size
126 4.2 Thu 6
142 6.7 Thu 6
144 5.0 Thu 6
157 5.0 Sun 6
We prefer to use []
for subsetting. But sometimes it is hard to read the codes and hence may produce confusion.
Function subset()
is good for reading the code. Be cautious when using it in iterative functions.
? '[
'
? subset
x <- c("A","B","A","D")
indx <- which(x=="A")
indx
[1] 1 3
x[indx]
[1] "A" "A"
x[which(x!="A")]
[1] "B" "D"
id | age | weight | height | walk |
---|---|---|---|---|
1 | 35 | 160 | 67 | 25 |
2 | 56 | 155 | 59 | 40 |
id | variable | value |
---|---|---|
1 | age | 35 |
2 | age | 56 |
1 | weight | 160 |
2 | weight | 155 |
1 | height | 67 |
2 | height | 59 |
1 | walk | 25 |
2 | walk | 40 |
install.packages("reshape2")
library(reshape2)
melt()
changes data from wide to long
cast()
changes data from long to wide
wdf
id age weight height walk
1 1 35 160 67 25
2 2 56 155 59 40
lgf <- melt(wdf, id='id')
lgf
id variable value
1 1 age 35
2 2 age 56
3 1 weight 160
4 2 weight 155
5 1 height 67
6 2 height 59
7 1 walk 25
8 2 walk 40
dcast(lgf, id~variable)
id age weight height walk
1 1 35 160 67 25
2 2 56 155 59 40
foo <- function(x) {
return(sum(x^2))
}
dcast(lgf, id~variable,foo)
id age weight height walk
1 1 1225 25600 4489 625
2 2 3136 24025 3481 1600
? acast()
head(data.frame(USArrests))
Murder Assault UrbanPop Rape
Alabama 13.2 236 58 21.2
Alaska 10.0 263 48 44.5
Arizona 8.1 294 80 31.0
Arkansas 8.8 190 50 19.5
California 9.0 276 91 40.6
Colorado 7.9 204 78 38.7
wf <- data.frame(State=row.names(USArrests),USArrests)
mdat <- melt(wf, id='State')
mdat[sample(1:200,size=7),]
State variable value
152 Alaska Rape 44.5
163 Illinois Rape 24.0
105 California UrbanPop 91.0
95 Vermont Assault 48.0
59 Florida Assault 335.0
42 Tennessee Murder 13.2
76 Montana Assault 109.0
smiths
subject time age weight height
1 John Smith 1 33 90 1.87
2 Mary Smith 1 NA NA 1.54
mdat <- melt(smiths,id='subject', na.rm=T)
mdat
subject variable value
1 John Smith time 1.00
2 Mary Smith time 1.00
3 John Smith age 33.00
5 John Smith weight 90.00
7 John Smith height 1.87
8 Mary Smith height 1.54
dcast(mdat, subject~variable) ?
reshape2
package of R. For reshape package http://had.co.nz/reshape/
Hadley Wickham: Reshaping data with the reshape package. 21(12):1–20. http://www.jstatsoft.org/v21/i12