## ----setup, include=FALSE------------------------------------------------ library(knitr) ## ----sup_learn_1--------------------------------------------------------- x <- c(35, 47.2, 20.7, 27.1, 42.5, 40.1, 28.2, 35.8, 47.4, 21.3, 39, 30.6, 18.7, 8.4, 45.8, 14, 47.3, 31.5, 38, 32) y <- c(9, 11.5, 7.6, 8.8, 9.8, 11, 8.2, 8.6, 11.4, 7.4, 11.1, 7.9, 6, 4.6, 12.3, 6.1, 11.4, 8.8, 10.3, 9.4) plot(x, y) ## ----sup_learn_10-------------------------------------------------------- mlr <- lm(y~x) plot(x, y, pch=20, cex=1.5, las=1) abline(mlr, col="red", lwd=3) summary(mlr) ## ----sup_learn_20-------------------------------------------------------- # create the function from the data sf <- splinefun(x, y) # x prediction points px <- seq(1, 50, 0.25) # predictions py <- sf(px) plot(x, y, pch=20, cex=2, las=1) abline(mlr, col="red", lwd=3) lines(px, py, col="blue", lwd=3) ## ----sup_learn_30-------------------------------------------------------- rmse <- function(obs, prd, na.rm=FALSE) { sqrt(mean((obs - prd)^2, na.rm=na.rm)) } ## ----sup_learn_40-------------------------------------------------------- # no difference, perfect model rmse(1:10, 1:10) # a small difference, not bad rmse(1:10, 2:11) # rather different, much worse than the above rmse(1:10, 10:1) ## ----sup_learn_50-------------------------------------------------------- null <- rmse(1:10, mean(1:10)) null ## ----sup_learn_60-------------------------------------------------------- rmsenull <- function(obs, pred, na.rm=FALSE) { r <- rmse(obs, pred, na.rm=na.rm) null <- rmse(obs, mean(obs)) (null - r) / null } ## ------------------------------------------------------------------------ rmsenull(1:10, 1:10) rmsenull(1:10, 2:11) rmsenull(1:10, 10:1) ## ----sup_learn_70-------------------------------------------------------- plr <- predict(mlr, data.frame(x=x)) rmsenull(y, plr) ## ----sup_learn_80-------------------------------------------------------- psf <- sf(x) rmsenull(y, psf) ## ----sup_learn_90-------------------------------------------------------- n <- length(x) set.seed(321) i <- sample(n, 0.5 * n) i xa <- x[i] ya <- y[i] xb <- x[-i] yb <- y[-i] ## ----sup_learn_100------------------------------------------------------- mlr_a <- lm(ya~xa) sf_a <- splinefun(xa, ya) plr_a <- predict(mlr_a, data.frame(xa=xb)) psf_a <- sf_a(xb) rmsenull(yb, plr_a) rmsenull(yb, psf_a) ## ----sup_learn_110------------------------------------------------------- mlr_b <- lm(yb~xb) sf_b <- splinefun(xb, yb) plr_b <- predict(mlr_b, data.frame(xb=xa)) psf_b <- sf_b(xa) rmsenull(ya, psf_b) rmsenull(ya, plr_b) ## ----sup_learn_120, echo=FALSE, include=FALSE---------------------------- e <- c(rmsenull(yb, plr_a), rmsenull(ya, plr_b), rmsenull(yb, psf_a), rmsenull(ya, psf_b), NA, NA) e[5] <- mean(e[1:2]) e[6] <- mean(e[3:4]) e <- round(e,2) ## ----sup_learn_130, fig.width=10, fig.height=10-------------------------- #set up the plot plot(x, y, las=1, cex=0, xlim=c(0,80), ylim=c(3,15)) # original models abline(mlr, col="red", lwd=2, lty=3) lines(px, py, col="blue", lwd=2, lty=3) # sample A models abline(mlr_a, lwd=2, col="red") lines(px, sf_a(px), lwd=2, col="blue") # sample B models abline(mlr_b, lwd=2, col="red", lty=2) lines(px, sf_b(px), lwd=2, col="blue", lty=2) # sample A and B points points(xa, ya, cex=1.5) points(xb, yb, cex=1.5, pch=19) # a complex legend legend("bottomright", pch=c(1,19,rep(NA, 10)), lty=c(NA, NA, rep(c(NA, NA,1,2,3),2)), lwd=2, , pt.cex=1.5, bg="white", legend=c("Sample A", "Sample B", "", "Linear regression", "sample a", "sample b", "all data", "", "Spline model", "sample a", "sample b", "all data"), col=c("black", "black", "white", "white", rep("red", 3), "white", "white", rep("blue", 3)) ) ## ----sup_learn_140------------------------------------------------------- f <- function(x) x/10 + sin(x) + sqrt(x) X <- seq(1,50,0.1) plot(X, f(X), type="l") ## ----sup_learn_150------------------------------------------------------- set.seed(2) sx <- sample(X, 20) sy <- f(sx) ## ----sup_learn_160------------------------------------------------------- f <- function(x) {x - x^2 + x^3*sin(x)} X <- seq(1,50,0.1) Y <- f(X) plot(X, Y, type="l") ## ----sup_learn_170------------------------------------------------------- set.seed(2) xa <- sample(X, 40) ya <- f(xa) xb <- sample(X, 40) yb <- f(xb) ## ----sup_learn_180------------------------------------------------------- mlr <- lm(ya~xa) sf <- splinefun(xa, ya) rmsenull(yb, predict(mlr, data.frame(xa=xb))) rmsenull(yb, sf(xb)) mlr <- lm(yb~xb) sf <- splinefun(xb, yb) rmsenull(ya, predict(mlr, data.frame(xb=xa))) rmsenull(ya, sf(xa)) ## ----sup_learn_190------------------------------------------------------- px <- seq(1, 50, 0.25) py <- sf(px) plot(X, Y, type="l", lwd=3, las=1) points(xa, ya, cex=1.5) points(xb, yb, pch=19, cex=1.5) abline(mlr, col="red", lwd=2) lines(px, py, col="blue", lwd=2) legend("bottomleft", pch=c(1,19,rep(NA, 10)), lty=c(NA, NA, rep(c(NA, NA,1,2,3),2)), lwd=2, , pt.cex=1.5, bg="white", legend=c("Sample A", "Sample B", "", "Linear regression", "sample a", "sample b", "all data", "", "Spline model", "sample a", "sample b", "all data"), col=c("black", "black", "white", "white", rep("red", 3), "white", "white", rep("blue", 3)) )