## ----------------------------------------------------------------------------- d <- data.frame(id=1:10, name=c('Bob', 'Bobby', '???', 'Bob', 'Bab', 'Jim', 'Jim', 'jim', '', 'Jim'), score1=c(8, 10, 7, 9, 2, 5, 1, 6, 3, 4), score2=c(3,4,5,-999,5,5,-999,2,3,4), stringsAsFactors=FALSE) d ## ----------------------------------------------------------------------------- summary(d) ## ----------------------------------------------------------------------------- # which values in score2 are -999? i <- d$score2 == -999 # set these to NA d$score2[i] <- NA summary(d) ## ----------------------------------------------------------------------------- unique(d$name) table(d$name) ## ----------------------------------------------------------------------------- d$name[d$name %in% c("Bab", "Bobby")] <- "Bob" table(d$name) ## ----------------------------------------------------------------------------- # get the first letters first <- substr(d$name, 1, 1) # get the remainder remainder <- substr(d$name, 2, nchar(d$name)) # assure that the first letter is upper case first <- toupper(first) # combine name <- paste0(first, remainder) # assign back to the variable d$name <- name table(d$name) ## ----------------------------------------------------------------------------- d$name[d$name == "???"] <- NA table(d$name) ## ----------------------------------------------------------------------------- table(d$name, useNA="ifany") ## ----------------------------------------------------------------------------- d$name[9] ## ----------------------------------------------------------------------------- d$name[d$name == ""] <- NA table(d$name, useNA="ifany") ## ----------------------------------------------------------------------------- table(d[ c("name", "score2")]) ## ----------------------------------------------------------------------------- quantile(d$score1) range(d$score1) mean(d$score1) ## ----------------------------------------------------------------------------- quantile(d$score2) range(d$score2) quantile(d$score2, na.rm=TRUE) range(d$score2, na.rm=TRUE) ## ----plot1, fig.cap=""-------------------------------------------------------- par(mfrow=c(2,2)) plot(d$score1, d$score2) boxplot(d[, c("score1", "score2")]) plot(sort(d$score1)) hist(d$score2)