Modeling mortality¶
Introduction¶
To design and evaluate an index based insurance contract, we need to understand the relationship between the index and losses incurred. Here we develop such a relationship using the z-scored NDVI to predict livestock mortality, using survey data from Marsabit.
Data¶
Mortality¶
Mortality was computed from survey data collected in Marsabit between 2008 and 2015. First read the data.
mort <- agrodata::data_ibli("marsabit_mortality.rds")
mort[1:5, 1:7]
## hhid season year sublocation type stock loss
## 1 1002 SRSD 2008 DAKABARICHA Camel 0.000000 0
## 2 1002 SRSD 2008 DAKABARICHA Cattle 13.000000 0
## 3 1002 SRSD 2008 DAKABARICHA Shoat 4.000000 0
## 4 1002 SRSD 2008 DAKABARICHA TLU 13.400000 0
## 5 1002 SRSD 2008 DAKABARICHA TLU per capita 1.914286 0
Load data with causes of death.
cause <- agrodata::data_ibli("marsabit_losses")
cause <- cause[,c("hhid", "sublocation", "cause","month", "year")]
colnames(cause)[4] <- "season"
s <- cause$season
cause$season <- "SRSD"
cause$season[s >=3 & s <=9] <-"LRLD"
cause <- na.omit(cause)
head(cause, n=2)
## hhid sublocation cause season year
## 342 1002 DAKABARICHA Starvation/Drought LRLD 2009
## 343 1002 DAKABARICHA Starvation/Drought LRLD 2009
Merge this table with mortality table and extract mortality causes by starvation/drought.
mort <- merge(mort, cause, by=c("hhid","sublocation", "season", "year"))
mort <- mort[mort$cause == "Starvation/Drought", ]
We mainly care about the mortality rates. Compute aggregates by year, season and sublocation.
mort <- mort[mort$type == "TLU", ]
amort <- aggregate(mort[,c("stock_beginning", "loss")], mort[ , c("year", "season", "sublocation")], sum, na.rm=TRUE)
head(amort, n=2)
## year season sublocation stock_beginning loss
## 1 2009 LRLD BUBISA 8936.4 2072.0
## 2 2010 LRLD BUBISA 520.3 40.4
amort$mortality_rate = 1 - ((amort$stock_beginning - amort$loss) / amort$stock_beginning )
amort$mortality_rate[amort$mortality_rate==0] <- NA
NDVI¶
Load the zNDVI data for 2000–2015 and income data for Marsabit that we computed in the previous section. We reshape the data from “wide” to “long” format.
LR <- agrodata::data_ibli("marsabit_zLRLD")
SR <- agrodata::data_ibli("marsabit_zSRSD")
LR[1:3, 1:6]
## sublocation LRLD2000 LRLD2001 LRLD2002 LRLD2003 LRLD2004
## 1 ARAPAL -2.0961337 0.3242631 -0.3526237 1.1115084 0.3370314
## 2 BADASA -0.6657791 0.2393530 0.6495006 0.9258476 0.6766376
## 3 BALESA -1.0856171 0.1699284 -0.2237794 1.4356138 -0.3569806
lr <- reshape(LR, direction="long", varying = 2:ncol(LR), v.names="NDVI", timevar="year", times=2000:2015)
lr$id <- "LRLD"
sr <- reshape(SR, direction="long", varying = 2:ncol(LR), v.names="NDVI", timevar="year", times=2000:2015)
sr$id <- "SRSD"
ndvi <- rbind(lr, sr)
colnames(ndvi)[4] <- "season"
ndvi$year_season <- apply(ndvi[, c("year", "season")], 1, function(x) paste(x, collapse="_"))
head(ndvi, n=2)
## sublocation year NDVI season year_season
## 1.2000 ARAPAL 2000 -2.0961337 LRLD 2000_LRLD
## 2.2000 BADASA 2000 -0.6657791 LRLD 2000_LRLD
Merge mortality and NDVI¶
d <- merge(ndvi, amort, by=c("year", "season", "sublocation"))
Explore the relation between mortality and NDVI.
cols <- c("red", "blue")
seas <- c("SRSD", "LRLD")
pcols <- cols[match(d$season, seas)]
plot(mortality_rate~NDVI, data=d, xlab="zndvi", ylab="mortality", col=pcols, pch=20)
legend("topright", seas, col=cols, pch=20)
The relationship is a bit noisy (as to be expected). There is a lot of variability at zNDVI < -1. That could be a problem; as predictions of mortality at low zNDVI would be quite uncertain.
Mortality model¶
Build a local regression model.
dd <- na.omit(d[, c("NDVI", "mortality_rate")])
# add two "anchering points"
dd <- rbind(dd, c(-2.5, 0.9))
dd <- rbind(dd, c(-4, 1))
saveRDS(dd, "ndvi_mort.rds")
library(msir)
## Warning: package 'msir' was built under R version 4.3.1
## Package 'msir' version 1.3.3
## Type 'citation("msir")' for citing this R package in publications.
# Calculate and plot a 1.96 * SD prediction band
# that is a 95% prediction band
m <- loess.sd(dd)
plot(dd, cex=.5, col="blue", xlab="zNDVI", ylab="Mortality rate")
lines(m$x, m$y, col="red")
lines(m$x, m$upper, lty=2)
lines(m$x, m$lower, lty=2)
Same thing after a little generalization.
x <- dd
x$NDVI = round(x$NDVI, 1)
a <- aggregate(x[, "mortality_rate", drop=FALSE], x[, "NDVI", drop=FALSE], mean)
ma <- loess.sd(a)
plot(a, cex=.5, col="blue", xlab="zNDVI", ylab="Mortality rate")
lines(ma$x, ma$y, col="red")
lines(ma$x, ma$upper, lty=2)
lines(ma$x, ma$lower, lty=2)
Compare predicted with observed mortality rates
ml <- loess(mortality_rate ~ NDVI, data=dd)
p <- predict(ml, d)
plot(d$mortality_rate, p, xlab="mortality rate", ylab="predicted")
abline(0,1)
x <- na.omit(cbind(d$mortality_rate, p))
cor(x[,1], x[,2])
## [1] 0.5567935
d$predicted_mortality <- p
saveRDS(d, "pred_mort1.rds")
Predict mortality in other years based on their zNDVI.
ndvi$predicted_mortality <- predict(ml, ndvi)
par(mai=c(1,2,1,1))
boxplot(ndvi$predicted_mortality ~ ndvi$year_season, las=1, cex.axis=.5, horizontal=TRUE, xlab="zNDVI predicted Mortality rate", ylab="", cex=.25, col=rainbow(32))
saveRDS(ndvi, "pred_mort2.rds")