## -----------------------------------------------------------------------------
dd <- read.csv('https://stats.idre.ucla.edu/stat/data/hsb2.csv')
d <- dd[1:3, c(1:2, 8:9)]
d


## -----------------------------------------------------------------------------
wvars <- c("write", "math")
x <- reshape(d, varying=wvars, v.names="score", direction = "long")
x


## -----------------------------------------------------------------------------
x <- reshape(d, varying=wvars, v.names="score", times=wvars, timevar = "subject", direction = "long")
x


## -----------------------------------------------------------------------------
rownames(x) <- NULL
x


## -----------------------------------------------------------------------------
w <- reshape(x, idvar=c("id", "female"), timevar = "subject", direction = "wide")
w


## -----------------------------------------------------------------------------
cn <- colnames(w)
cn <- gsub("score.", "", cn)
colnames(w) <- cn
w


## -----------------------------------------------------------------------------
a <- dd[, 1:3]
# random sample of 100 records)
set.seed(1)
b <- dd[sample(nrow(dd), 100), c(1, 7:10)]


## -----------------------------------------------------------------------------
dim(a)
head(a)
table(a$female)
table(a$race)


## -----------------------------------------------------------------------------
dim(b)
head(b)


## -----------------------------------------------------------------------------
ab <- merge(a, b, by='id')
head(ab)


## -----------------------------------------------------------------------------
dim(ab)


## -----------------------------------------------------------------------------
ab <- merge(a, b, by='id', all.x=TRUE)
dim(ab)
head(ab)


## -----------------------------------------------------------------------------
merge(a[1:3,], b[1:3, ], by='id')
merge(a[1:3,], b[1:3, ], by='id', all.x=T)
merge(a[1:3,], b[1:3, ], by='id', all.y=T)
merge(a[1:3,], b[1:3, ], by='id', all=T)


## -----------------------------------------------------------------------------
tapply(ab$read, ab$female, mean, na.rm=TRUE)
tapply(ab$read, ab$race, mean, na.rm=TRUE)


## -----------------------------------------------------------------------------
x <- sample(10)
x
sort(x)


## -----------------------------------------------------------------------------
i <- order(x)
i
x[i]


## -----------------------------------------------------------------------------
set.seed(0)
x <- a[sample(nrow(a), 10), ]
x


## -----------------------------------------------------------------------------
oid <- order(x$id)
y <- x[oid, ]
y


## -----------------------------------------------------------------------------
oid <- order(x$race, x$female, x$id)
x[oid, ]