## ----------------------------------------------------------------------------- library(agro) ff <- get_data_from_uri("https://doi.org/10.7910/DVN/SNQZTT", ".") ff ## ----import table------------------------------------------------------------- library(readxl) # Import table of data wdata <- read_excel(ff[1]) # Check column names to see the variable provided colnames(wdata) ## ----subset data-------------------------------------------------------------- wdata1 <- wdata[, !(colnames(wdata) %in% c("Year", "location", "Ave yield control", "Ave yield stress"))] ## ----------------------------------------------------------------------------- # Make a multiple linear regression model with all explanatory variables and yield reduction as the response variable # Load packages library(car) # Using the dot notation for shorter code, which means everything else in the data frame model_full <- lm(`% yield reduction` ~ ., wdata1) # Start with all variables and develop a good model using regsubsets() from the leaps package. Select the model with the minimum BIC # Load packages library(leaps) # Using regsubset model_regsub <- regsubsets(`% yield reduction` ~ ., data = wdata1) # View results summary(model_regsub) # Visually determine best model by BIC (subset( ) are bic, cp, adjr2, and rss.) summary(model_regsub)$bic subsets(model_regsub, statistic = "bic", legend = F) # Creat the best model model_reduced <- lm(`% yield reduction` ~ `Rainfall 60-100 DAS` + `ave tensiom 60-100` + `max tensiom 60-100` + `aver water table 60-100 DAS` + `bd 5-10 cm` + pH + `% Clay` + `depth of max penetrom`, data = wdata1) summary(model_reduced) # Compare the best model with the full model anova(model_full, model_reduced) ## ----------------------------------------------------------------------------- library(MASS) # Stepwise Regression model_step <- stepAIC(model_full, direction = "both") # Display results model_step$anova # Final model model_final <- lm(`% yield reduction` ~ `Rainfall 60-100 DAS` + `ave tensiom 60-100` + `max tensiom 60-100` + `aver water table 60-100 DAS` + `bd 5-10 cm` + `bd 25-30 cm` + pH + `Avail-P ppm` + `Exch-K ppm` + `% Clay`, data = wdata1) # Compare models (best reduced model from the previous analysis vs. the final model from this analysis) anova(model_reduced, model_final) ## ----------------------------------------------------------------------------- # Perform a PCA on the 20 varaibles using the correlation matrix # Load package library(stats) # Pricipal Component Analysis (When scale=TRUE, the correlation matrix is used, omit data wich is NA) sr.pca <- prcomp(na.omit(wdata1), scale = TRUE) # Display results summary(sr.pca) # Using a scree plot to check how many PC's to retain (find elbow) plot(sr.pca, type = "l", main ="PCA of dataset") # Check eigenvalues for each PC summary(sr.pca)$sdev^2 # Check Eigenvector values for each PC's re <- eigen(cor(na.omit(wdata1))) ## ----------------------------------------------------------------------------- # Make a biplot biplot(sr.pca)