## ----setup, include=FALSE------------------------------------------------
library(knitr)



## ----sup_learn_1---------------------------------------------------------
x <- c(35, 47.2, 20.7, 27.1, 42.5, 40.1, 28.2, 35.8, 47.4, 
		21.3, 39, 30.6, 18.7, 8.4, 45.8, 14, 47.3, 31.5, 38, 32)
y <- c(9, 11.5, 7.6, 8.8, 9.8, 11, 8.2, 8.6, 11.4, 7.4, 
		11.1, 7.9, 6, 4.6, 12.3, 6.1, 11.4, 8.8, 10.3, 9.4)
plot(x, y)


## ----sup_learn_10--------------------------------------------------------
mlr <- lm(y~x)
plot(x, y, pch=20, cex=1.5, las=1)
abline(mlr, col="red", lwd=3) 
summary(mlr)


## ----sup_learn_20--------------------------------------------------------
# create the function from the data
sf <- splinefun(x, y)
# x prediction points
px <- seq(1, 50, 0.25)
# predictions
py <- sf(px)

plot(x, y, pch=20, cex=2, las=1)
abline(mlr, col="red", lwd=3) 
lines(px, py, col="blue", lwd=3)


## ----sup_learn_30--------------------------------------------------------
rmse <- function(obs, prd, na.rm=FALSE) {
	sqrt(mean((obs - prd)^2, na.rm=na.rm))
}


## ----sup_learn_40--------------------------------------------------------
# no difference, perfect model
rmse(1:10, 1:10)
# a small difference, not bad
rmse(1:10, 2:11)
# rather different, much worse than the above
rmse(1:10, 10:1)


## ----sup_learn_50--------------------------------------------------------
null <- rmse(1:10, mean(1:10))
null


## ----sup_learn_60--------------------------------------------------------
rmsenull <- function(obs, pred, na.rm=FALSE) {
	r <- rmse(obs, pred, na.rm=na.rm)
	null <- rmse(obs, mean(obs))
	(null - r) / null
}


## ------------------------------------------------------------------------
rmsenull(1:10, 1:10)
rmsenull(1:10, 2:11)
rmsenull(1:10, 10:1)


## ----sup_learn_70--------------------------------------------------------
plr <- predict(mlr, data.frame(x=x))
rmsenull(y, plr)


## ----sup_learn_80--------------------------------------------------------
psf <- sf(x)
rmsenull(y, psf)


## ----sup_learn_90--------------------------------------------------------
n <- length(x)
set.seed(321)
i <- sample(n, 0.5 * n)
i
xa <- x[i]
ya <- y[i]
xb <- x[-i]
yb <- y[-i]


## ----sup_learn_100-------------------------------------------------------
mlr_a <- lm(ya~xa)
sf_a <- splinefun(xa, ya)
plr_a <- predict(mlr_a, data.frame(xa=xb))
psf_a <- sf_a(xb)
rmsenull(yb, plr_a)
rmsenull(yb, psf_a)


## ----sup_learn_110-------------------------------------------------------
mlr_b <- lm(yb~xb)
sf_b <- splinefun(xb, yb)
plr_b <- predict(mlr_b, data.frame(xb=xa))
psf_b <- sf_b(xa)
rmsenull(ya, psf_b)
rmsenull(ya, plr_b)


## ----sup_learn_120, echo=FALSE, include=FALSE----------------------------
e <- c(rmsenull(yb, plr_a), rmsenull(ya, plr_b), rmsenull(yb, psf_a), rmsenull(ya, psf_b), NA, NA)
e[5] <- mean(e[1:2])
e[6] <- mean(e[3:4])
e <- round(e,2)


## ----sup_learn_130, fig.width=10, fig.height=10--------------------------
#set up the plot
plot(x, y, las=1, cex=0, xlim=c(0,80), ylim=c(3,15))

# original models
abline(mlr, col="red", lwd=2, lty=3) 
lines(px, py, col="blue", lwd=2, lty=3)

# sample A models
abline(mlr_a, lwd=2, col="red")
lines(px, sf_a(px), lwd=2, col="blue")

# sample B models
abline(mlr_b, lwd=2, col="red", lty=2)
lines(px, sf_b(px), lwd=2, col="blue", lty=2)

# sample A and B points
points(xa, ya, cex=1.5)
points(xb, yb, cex=1.5, pch=19)

# a complex legend
legend("bottomright", pch=c(1,19,rep(NA, 10)), lty=c(NA, NA, rep(c(NA, NA,1,2,3),2)), lwd=2, , pt.cex=1.5, bg="white",
	legend=c("Sample A", "Sample B", "", "Linear regression", "sample a", "sample b", "all data", "", "Spline model", "sample a", "sample b", "all data"), 
	col=c("black", "black", "white", "white", rep("red", 3), "white", "white", rep("blue", 3))
)



## ----sup_learn_140-------------------------------------------------------
f <- function(x) x/10 + sin(x) + sqrt(x)
X <- seq(1,50,0.1)
plot(X, f(X), type="l")


## ----sup_learn_150-------------------------------------------------------
set.seed(2)
sx <- sample(X, 20)
sy <- f(sx)


## ----sup_learn_160-------------------------------------------------------
f <- function(x) {x - x^2 + x^3*sin(x)}
X <- seq(1,50,0.1)
Y <- f(X) 
plot(X, Y, type="l")


## ----sup_learn_170-------------------------------------------------------
set.seed(2)
xa <- sample(X, 40)
ya <- f(xa) 
xb <- sample(X, 40)
yb <- f(xb) 


## ----sup_learn_180-------------------------------------------------------
mlr <- lm(ya~xa)
sf <- splinefun(xa, ya)
rmsenull(yb, predict(mlr, data.frame(xa=xb)))
rmsenull(yb, sf(xb))

mlr <- lm(yb~xb)
sf <- splinefun(xb, yb)
rmsenull(ya, predict(mlr, data.frame(xb=xa)))
rmsenull(ya, sf(xa))


## ----sup_learn_190-------------------------------------------------------
px <- seq(1, 50, 0.25)
py <- sf(px)
plot(X, Y, type="l", lwd=3, las=1)
points(xa, ya, cex=1.5)
points(xb, yb, pch=19, cex=1.5)
abline(mlr, col="red", lwd=2) 
lines(px, py, col="blue", lwd=2)

legend("bottomleft", pch=c(1,19,rep(NA, 10)), lty=c(NA, NA, rep(c(NA, NA,1,2,3),2)), lwd=2, , pt.cex=1.5, bg="white",
	legend=c("Sample A", "Sample B", "", "Linear regression", "sample a", "sample b", "all data", "", "Spline model", "sample a", "sample b", "all data"), 
	col=c("black", "black", "white", "white", rep("red", 3), "white", "white", rep("blue", 3))
)