## ----------------------------------------------------------------------------- dd <- read.csv('https://stats.idre.ucla.edu/stat/data/hsb2.csv') d <- dd[1:3, c(1:2, 8:9)] d ## ----------------------------------------------------------------------------- wvars <- c("write", "math") x <- reshape(d, varying=wvars, v.names="score", direction = "long") x ## ----------------------------------------------------------------------------- x <- reshape(d, varying=wvars, v.names="score", times=wvars, timevar = "subject", direction = "long") x ## ----------------------------------------------------------------------------- rownames(x) <- NULL x ## ----------------------------------------------------------------------------- w <- reshape(x, idvar=c("id", "female"), timevar = "subject", direction = "wide") w ## ----------------------------------------------------------------------------- cn <- colnames(w) cn <- gsub("score.", "", cn) colnames(w) <- cn w ## ----------------------------------------------------------------------------- a <- dd[, 1:3] # random sample of 100 records) set.seed(1) b <- dd[sample(nrow(dd), 100), c(1, 7:10)] ## ----------------------------------------------------------------------------- dim(a) head(a) table(a$female) table(a$race) ## ----------------------------------------------------------------------------- dim(b) head(b) ## ----------------------------------------------------------------------------- ab <- merge(a, b, by='id') head(ab) ## ----------------------------------------------------------------------------- dim(ab) ## ----------------------------------------------------------------------------- ab <- merge(a, b, by='id', all.x=TRUE) dim(ab) head(ab) ## ----------------------------------------------------------------------------- merge(a[1:3,], b[1:3, ], by='id') merge(a[1:3,], b[1:3, ], by='id', all.x=T) merge(a[1:3,], b[1:3, ], by='id', all.y=T) merge(a[1:3,], b[1:3, ], by='id', all=T) ## ----------------------------------------------------------------------------- tapply(ab$read, ab$female, mean, na.rm=TRUE) tapply(ab$read, ab$race, mean, na.rm=TRUE) ## ----------------------------------------------------------------------------- x <- sample(10) x sort(x) ## ----------------------------------------------------------------------------- i <- order(x) i x[i] ## ----------------------------------------------------------------------------- set.seed(0) x <- a[sample(nrow(a), 10), ] x ## ----------------------------------------------------------------------------- oid <- order(x$id) y <- x[oid, ] y ## ----------------------------------------------------------------------------- oid <- order(x$race, x$female, x$id) x[oid, ]