## ----setup, include=FALSE------------------------------------------- library(BKP) library(tgp) library(gplite) library(kernlab) library(mlbench) library(pROC) library(gridExtra) library(mdhglm) library(ggplot2) library(rnaturalearth) library(rnaturalearthdata) library(sf) options(prompt = "R> ", continue = "+ ", width = 70, useFancyQuotes = FALSE) ## ----Examples using BKP, include=FALSE------------------------------ # ============================================================== # # ========================= BKP Examples ======================= # # ============================================================== # #-------------------------- 1D Example --------------------------- #-------------------------- Example 1 ---------------------------- set.seed(123) # Define true success probability function true_pi_fun <- function(x) { 1/(1+exp(-3*x)) } # Data points n <- 7 Xbounds <- matrix(c(-2,2), nrow = 1) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- sample(100, n, replace = TRUE) y <- rbinom(n, size = m, prob = true_pi) # Fit BKP model BKP_model_1D_1 <- fit_BKP(X, y, m, Xbounds = Xbounds) # Print BKP model print(BKP_model_1D_1) # New data points Xnew = matrix(seq(-2, 2, length = 100), ncol = 1) true_pi <- true_pi_fun(Xnew) # Plot results pdf("ex1.pdf", width = 6, height = 6) plot(BKP_model_1D_1) lines(Xnew,true_pi, col = "black", lwd = 2) legend(x = -2.24,y = 0.89, legend = "True Probability", col = "black", lwd = 2, bty = "n", inset = 0.02) dev.off() # Simulate sim <- simulate(BKP_model_1D_1, nsim = 3, Xnew = Xnew) pdf("ex1_sim.pdf", width = 6, height = 6) matplot(Xnew, sim$samples, type = "l", lty = 1, lwd = 1.5, col = rainbow(ncol(sim$samples)), xlab = "x", ylab = "Probability", main = "Simulated Probability Curves") legend("topleft", legend = paste("sample", 1:ncol(sim$samples)), col = rainbow(ncol(sim$samples)), lty = 1, lwd = 2, bty = "n") dev.off() ### Classification set.seed(123) # Data points n <- 20 Xbounds <- matrix(c(-2,2), nrow = 1) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- rep(1,n) y <- as.numeric(true_pi > 0.5) # Fit BKP model with r0 = 0.01 BKP_model_1D_1_class_1 <- fit_BKP( X, y, m, Xbounds = Xbounds, prior = "fixed", r0 = 0.01, loss = "log_loss") # New data points Xnew = matrix(seq(-2, 2, length = 100), ncol = 1) true_pi <- true_pi_fun(Xnew) # Plot results pdf("ex1class001.pdf", width = 6, height = 6) plot(BKP_model_1D_1_class_1) lines(Xnew,true_pi, col = "black", lwd = 2) dev.off() # Fit BKP model with r0 = 2 BKP_model_1D_1_class_2 <- fit_BKP( X, y, m, Xbounds = Xbounds, prior = "fixed", r0 = 2, loss = "log_loss") # Plot results pdf("ex1class2.pdf", width = 6, height = 6) plot(BKP_model_1D_1_class_2) lines(Xnew,true_pi, col = "black", lwd = 2) dev.off() #-------------------------- Example 2 ---------------------------- set.seed(123) # Define true success probability function true_pi_fun <- function(x) { (1 + exp(-x^2) * cos(10 * (1 - exp(-x)) / (1 + exp(-x)))) / 2 } # Data points n <- 30 Xbounds <- matrix(c(-2,2), nrow = 1) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- sample(100, n, replace = TRUE) y <- rbinom(n, size = m, prob = true_pi) # Fit BKP model BKP_model_1D_2 <- fit_BKP(X, y, m, Xbounds = Xbounds) # New data points Xnew = matrix(seq(-2, 2, length = 100), ncol=1) true_pi <- true_pi_fun(Xnew) # Plot results pdf("ex2.pdf", width = 6, height = 6) plot(BKP_model_1D_2) lines(Xnew,true_pi, col = "black", lwd = 2) legend(x = -2.24,y = 0.89, legend = "True Probability", col = "black", lwd = 2, bty = "n", inset = 0.02) dev.off() # Simulate sim <- simulate(BKP_model_1D_2, nsim = 3, Xnew = Xnew) pdf("ex2_sim.pdf", width = 6, height = 6) matplot(Xnew, sim$samples, type = "l", lty = 1, lwd = 1.5, col = rainbow(ncol(sim$samples)), xlab = "x", ylab = "Probability", main = "Simulated Probability Curves") legend("topleft", legend = paste("sample", 1:ncol(sim$samples)), col = rainbow(ncol(sim$samples)), lty = 1, lwd = 2, bty = "n") dev.off() #-------------------------- 2D Example --------------------------- #-------------------------- Example 3 ---------------------------- set.seed(123) # Define 2D latent function and probability transformation true_pi_fun <- function(X) { if(is.null(nrow(X))) X <- matrix(X, nrow=1) m <- 8.6928 s <- 2.4269 x1 <- 4*X[,1]- 2 x2 <- 4*X[,2]- 2 a <- 1 + (x1 + x2 + 1)^2 * (19- 14*x1 + 3*x1^2- 14*x2 + 6*x1*x2 + 3*x2^2) b <- 30 + (2*x1- 3*x2)^2 * (18- 32*x1 + 12*x1^2 + 48*x2- 36*x1*x2 + 27*x2^2) f <- log(a*b) f <- (f- m)/s return(pnorm(f)) # Transform to probability } # Data points n <- 100 Xbounds <- matrix(c(0, 0, 1, 1), nrow = 2) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- sample(100, n, replace = TRUE) y <- rbinom(n, size = m, prob = true_pi) # Fit BKP model BKP_model_2D <- fit_BKP(X, y, m, Xbounds=Xbounds) # Print BKP model print(BKP_model_2D) # Plot results pdf("ex3.pdf", width = 9, height = 8) plot(BKP_model_2D) dev.off() # Plot True distribution pdf("ex3_true.pdf", width = 4.5, height = 4) Xnew1 <- seq(Xbounds[1,1], Xbounds[1,2], length.out = 200) Xnew2 <- seq(Xbounds[2,1], Xbounds[2,2], length.out = 200) Xnew <- expand.grid(Xnew1 = Xnew1, Xnew2 = Xnew2) true_pi <- true_pi_fun(Xnew) df <- data.frame(x1 = Xnew$Xnew1, x2 = Xnew$Xnew2, True = true_pi) print(BKP:::my_2D_plot_fun("True", title = "True Probability", data = df)) dev.off() # Predict via BKP model Xnew <- lhs(n = 10, rect = Xbounds) predict(BKP_model_2D, Xnew) # ============================================================ # Execution mode: # run_elapsed_time = FALSE means using the pre-computed average # elapsed time matrix, without re-running the time measurement # experiments. # # Setting run_elapsed_time = TRUE will run the full benchmarking # procedure for 5 methods × 5 n values × 20 repetitions. # This can take many hours (potentially tens of hours) depending # on your machine's performance. # # It is recommended to keep this FALSE unless you need to # fully reproduce the timing experiments. # ============================================================ run_elapsed_time = FALSE if(run_elapsed_time){ n_vals <- c(200, 500, 1000, 2000, 5000) elapsed_time_given <- matrix(NA, nrow = 20, ncol = 5) elapsed_time_single_start <- matrix(NA, nrow = 20, ncol = 5) elapsed_time_multi_start <- matrix(NA, nrow = 20, ncol = 5) elapsed_time_given_gp <- matrix(NA, nrow = 20, ncol = 5) elapsed_time_optim_gp <- matrix(NA, nrow = 20, ncol = 5) for (i in 1:20) { for (j in 1:5) { set.seed((i - 1) * 5 + j) cat("i =", i, "j =", j, ":") n <- n_vals[j] Xbounds <- matrix(c(0, 0, 1, 1), nrow = 2) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- sample(100, n, replace = TRUE) y <- rbinom(n, size = m, prob = true_pi) elapsed_time_given[i, j] <- system.time({ fit <- fit_BKP(X, y, m, Xbounds = Xbounds, theta = 1) })["elapsed"] cat(" BKP_given =", elapsed_time_given[i, j]) elapsed_time_single_start[i, j] <- system.time({ fit <- fit_BKP(X, y, m, Xbounds = Xbounds, n_multi_start = 1) })["elapsed"] cat(", BKP_single_start =", elapsed_time_single_start[i, j]) elapsed_time_multi_start[i, j] <- system.time({ fit <- fit_BKP(X, y, m, Xbounds = Xbounds) })["elapsed"] cat(", BKP_multi_start =", elapsed_time_multi_start[i, j]) elapsed_time_given_gp[i, j] <- system.time({ gp <- gp_init(cf = cf_sexp(), lik = lik_binomial()) gp <- gp_fit(gp, X, y, trials = m) })["elapsed"] cat(", GP_given =", elapsed_time_given_gp[i, j]) elapsed_time_optim_gp[i, j] <- system.time({ gp <- gp_init(cf = cf_sexp(), lik = lik_binomial()) gp <- gp_optim(gp, X, y, trials = m, verbose = FALSE) })["elapsed"] cat(", GP_optim =", elapsed_time_optim_gp[i, j], "\n") } } # ---- Compute average time for each n ---- avg_time <- matrix(NA, nrow = 5, ncol = 5) colnames(avg_time) <- paste0("n=", n_vals) rownames(avg_time) <- c("given", "single_start", "multi_start", "given_gp", "optim_gp") avg_time[1, ] <- colMeans(elapsed_time_given) avg_time[2, ] <- colMeans(elapsed_time_single_start) avg_time[3, ] <- colMeans(elapsed_time_multi_start) avg_time[4, ] <- colMeans(elapsed_time_given_gp) avg_time[5, ] <- colMeans(elapsed_time_optim_gp) }else{ n_vals <- c(200, 500, 1000, 2000, 5000) avg_time <- matrix(c(0.0030, 0.0215, 0.0745, 0.3555, 2.2010, # given 0.3095, 1.8790, 6.6080, 24.3170, 144.4830, # single_start 5.7440, 31.2160, 132.6125, 547.2550, 3110.1650, # multi_start 0.0140, 0.1740, 1.2235, 9.0255, 145.7435, # given_gp 0.7500, 8.6575, 73.4760, 471.0000, 6619.0835 # optim_gp ), nrow = 5, byrow = TRUE) rownames(avg_time) <- c("given", "single_start", "multi_start", "given_gp", "optim_gp") colnames(avg_time) <- c("n=200", "n=500", "n=1000", "n=2000", "n=5000") } pdf("elapsed_time.pdf", width = 10, height = 5) par(mfrow = c(1, 2), mar = c(4,4,3,1)) # Adjust margins for better appearance cols <- c("blue", "skyblue", "darkblue", "red", "orange") pchs <- c(16, 17, 16, 15, 15) # Left plot: given and given_gp plot(n_vals, avg_time["given", ], log = "xy", type = "b", col = cols[1], pch = pchs[1], lwd = 2, xlab = "n", ylab = "Time (seconds, log scale)", main = "Fixed Hyperparameter", ylim = range(avg_time[c("given", "given_gp"), ])) lines(n_vals, avg_time["given_gp", ], type = "b", col = cols[4], pch = pchs[4], lwd = 2) # Complexity reference lines ref_bkp <- avg_time["given", 1] * (n_vals / n_vals[1])^2 ref_gp <- avg_time["given_gp", 1] * (n_vals / n_vals[1])^3 lines(n_vals, ref_bkp, col = "blue", lty = 2) lines(n_vals, ref_gp, col = "red", lty = 4) legend("topleft", bty = "n", legend = c("BKP", "LGP", "O(n^2)", "O(n^3)"), col = c(cols[c(1,4)], "blue", "red"), pch = c(pchs[c(1,4)], NA, NA), lty = c(1,1,2,4), lwd = c(2,2,1,1), bg = "white") # Right plot: single_start, multi_start, optim_gp plot(n_vals, avg_time["single_start", ], log = "xy", type = "b", col = cols[2], pch = pchs[2], lwd = 2, xlab = "n", ylab = "Time (seconds, log scale)", main = "Optimization-based Methods", ylim = range(avg_time[c("single_start", "multi_start", "optim_gp"), ])) lines(n_vals, avg_time["multi_start", ], type = "b", col = cols[3], pch = pchs[3], lwd = 2) lines(n_vals, avg_time["optim_gp", ], type = "b", col = cols[5], pch = pchs[5], lwd = 2) # Complexity reference lines ref_bkp2 <- avg_time["single_start", 1] * (n_vals / n_vals[1])^2 ref_gp2 <- avg_time["optim_gp", 1] * (n_vals / n_vals[1])^3 lines(n_vals, ref_bkp2, col = "skyblue", lty = 2) lines(n_vals, ref_gp2, col = "orange", lty = 4) legend("topleft", bty = "n", legend = c("BKP (single_start)", "BKP (multi_start)", "LGP", "O(n^2)", "O(n^3)"), col = c(cols[c(2,3,5)], "skyblue", "orange"), pch = c(pchs[c(2,3,5)], NA, NA), lty = c(1,1,1,2,4), lwd = c(2,2,2,1,1), bg = "white") # Restore default plotting parameters (optional) par(mfrow = c(1,1)) dev.off() #-------------------------- Classification ------------------------ #-------------------------- Example 4 ---------------------------- set.seed(123) # Data points n <- 250 n_train <- 200 n_test <- 50 data <- mlbench.spirals(n, cycles = 2, sd = 0.05) X_train <- data$x[1:n_train, ] y_train <- as.numeric(data$classes[1:n_train]) - 1 # Convert to 0/1 for BKP X_test <- data$x[(n_train + 1):n, ] y_test <- as.numeric(data$classes[(n_train + 1):n]) - 1 # Convert to 0/1 for BKP m <- rep(1, n_train) Xbounds <- rbind(c(-1.7, 1.7), c(-1.7, 1.7)) # Fit BKP model BKP_model_Class <- fit_BKP( X_train, y_train, m, Xbounds = Xbounds, prior = "fixed", r0 = 0.1, loss = "log_loss") # Predict via BKP prediction <- predict(BKP_model_Class, X_test) # Plot ROC pdf("ex4roc.pdf", width = 6, height = 6) roc_curve_BKP <- roc(y_test, prediction$mean) plot(roc_curve_BKP, main = paste("ROC curve for BKP (AUC = ", round(auc(roc_curve_BKP), 3), ")", sep = ""), col = "#1c61b6") dev.off() pdf("ex4.pdf", width = 13, height = 6) plot(BKP_model_Class) dev.off() # Fit LGP model with exponential approx gp <- gp_init(cf = cf_sexp(), lik = lik_bernoulli()) gp <- gp_optim(gp, X_train, y_train, method = method_full(), approx = approx_ep(), verbose = FALSE) # Predict via LGP prediction_gp <- gp_pred(gp, as.matrix(X_test), transform = TRUE) # Plot ROC pdf("ex4rocLGP.pdf", width = 6, height = 6) roc_curve_LGP <- roc(y_test, prediction_gp$mean) plot(roc_curve_LGP, main = paste("ROC curve for LGP (AUC = ", round(auc(roc_curve_LGP), 3), ")", sep = ""), col = "#1c61b6") dev.off() # Generate the grid for predictions x1_seq <- seq(-1.7, 1.7, length.out = 80) x2_seq <- seq(-1.7, 1.7, length.out = 80) grid <- expand.grid(x1 = x1_seq, x2 = x2_seq) # Predict the grid for plot prediction <- gp_pred(gp, as.matrix(grid), transform = TRUE, var = TRUE) df <- data.frame(x1 = grid$x1, x2 = grid$x2, Mean = prediction$mean, Variance = prediction$var) p1 <- BKP:::my_2D_plot_fun("Mean", "Predictive Mean", df, X = X_train, y = y_train) p2 <- BKP:::my_2D_plot_fun("Variance", "Predictive Variance", df, X = X_train, y = y_train) # Plot results of LGP pdf("ex4LGP.pdf", width = 13, height = 6) gridExtra::grid.arrange(p1, p2, ncol = 2) dev.off() #----------------------------------------------------------------- # ============================================================== # # ========================= DKP Examples ======================= # # ============================================================== # #-------------------------- 1D Example --------------------------- #-------------------------- Example 5 ---------------------------- set.seed(123) # Define true class probability function (3-class) true_pi_fun <- function(X) { p1 <- 1/(1+exp(-3*X)) p2 <- (1 + exp(-X^2) * cos(10 * (1 - exp(-X)) / (1 + exp(-X)))) / 2 return(matrix(c(p1/2, p2/2, 1 - (p1+p2)/2), nrow = length(p1))) } # Data points n <- 30 Xbounds <- matrix(c(-2, 2), nrow = 1) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- sample(150, n, replace = TRUE) Y <- t(sapply(1:n, function(i) rmultinom(1, size = m[i], prob = true_pi[i, ]))) # Fit DKP model DKP_model_1D <- fit_DKP(X, Y, Xbounds = Xbounds) # New data points Xnew <- matrix(seq(-2,2, length = 100), ncol=1) true_pi <- true_pi_fun(Xnew) # Plot results pdf("ex5.pdf", width = 8,height = 8) plot(DKP_model_1D) plot(Xnew, true_pi[, 1], type = "l", col = "black", xlab = "x", ylab = "Probability", ylim = c(0, 1), main = "True Probability", lwd = 2) lines(Xnew, true_pi[, 2], col = "red", lwd = 2) lines(Xnew, true_pi[, 3], col = "blue", lwd = 2) legend("topright", bty = "n", legend = c("Class 1", "Class 2", "Class 3"), col = c("black", "red", "blue"), lty = 1, lwd = 2) dev.off() #-------------------------- 2D Example --------------------------- #-------------------------- Example 6 ---------------------------- set.seed(123) # Define 2D latent function and probability transformation (3-class) true_pi_fun <- function(X){ if(is.null(nrow(X))) X <- matrix(X, nrow=1) m <- 8.6928 s <- 2.4269 x1 <- 4*X[,1]- 2 x2 <- 4*X[,2]- 2 a <- 1 + (x1 + x2 + 1)^2 * (19- 14*x1 + 3*x1^2- 14*x2 + 6*x1*x2 + 3*x2^2) b <- 30 + (2*x1- 3*x2)^2 * (18- 32*x1 + 12*x1^2 + 48*x2- 36*x1*x2 + 27*x2^2) f <- (log(a*b)- m)/s p1 <- pnorm(f) # Transform to probability p2 <- sin(pi * X[,1]) * sin(pi * X[,2]) return(matrix(c(p1/2, p2/2, 1 - (p1+p2)/2), nrow = length(p1))) } # Data points n <- 100 Xbounds <- matrix(c(0, 0, 1, 1), nrow = 2) X <- lhs(n = n, rect = Xbounds) true_pi <- true_pi_fun(X) m <- sample(150, n, replace = TRUE) Y <- t(sapply(1:n, function(i) rmultinom(1, size = m[i], prob = true_pi[i, ]))) # Fit DKP model DKP_model_2D <- fit_DKP(X, Y, Xbounds=Xbounds) # Plot results pdf(file = "ex6_class%d.pdf", width = 9, height = 8, onefile = FALSE) plot(DKP_model_2D) dev.off() # New data points Xnew1 <- seq(Xbounds[1,1], Xbounds[1,2], length.out = 200) Xnew2 <- seq(Xbounds[2,1], Xbounds[2,2], length.out = 200) Xnew <- expand.grid(Xnew1 = Xnew1, Xnew2 = Xnew2) true_pi <- true_pi_fun(Xnew) df <- data.frame(x1 = Xnew$Xnew1, x2 = Xnew$Xnew2, True1 = true_pi[,1], True2 = true_pi[,2], True3 = true_pi[,3]) # Plot True distribution pdf("ex6_class1_true.pdf", width = 4.5, height = 4) print(BKP:::my_2D_plot_fun("True1", title = "True Probability", data = df)) dev.off() pdf("ex6_class2_true.pdf", width = 4.5, height = 4) print(BKP:::my_2D_plot_fun("True2", title = "True Probability", data = df)) dev.off() pdf("ex6_class3_true.pdf", width = 4.5, height = 4) print(BKP:::my_2D_plot_fun("True3", title = "True Probability", data = df)) dev.off() #-------------------------- Classification ------------------------ #-------------------------- Example 7 ---------------------------- set.seed(123) data(iris) X <- as.matrix(iris[, 1:2]) Xbounds <- rbind(c(4.2, 8), c(1.9, 4.5)) labels <- iris$Species Y <- model.matrix(~ labels - 1) # expand factors to a set of dummy variables train_indices <- sample(1:nrow(iris), 0.7 * nrow(iris)) X_train <- X[train_indices, ] Y_train <- Y[train_indices, ] labels_train <- labels[train_indices] X_test <- X[-train_indices, ] Y_test <- Y[-train_indices, ] labels_test <- labels[-train_indices] # Fit DKP model DKP_model_Class <- fit_DKP( X_train, Y_train, Xbounds = Xbounds, loss = "log_loss", prior = "fixed", r0 = 0.1, p0 = rep(1/3, 3)) pdf("ex7.pdf", width = 13, height = 6) plot(DKP_model_Class) dev.off() # Test dkp_pred_probs <- predict(DKP_model_Class, X_test)$mean class_levels <- levels(labels_test) colnames(dkp_pred_probs) <- class_levels multiclass_roc_dkp <- multiclass.roc(labels_test, dkp_pred_probs) all_rocs <- list() for (class_name in class_levels) { # convert to binary label labels_binary <- ifelse(labels_test == class_name, 1, 0) probabilities <- dkp_pred_probs[, class_name] roc_curve <- roc(labels_binary, probabilities) all_rocs[[class_name]] <- roc_curve } # Plot ROC curve pdf("ex7roc.pdf", width = 6, height = 6) plot(all_rocs[[1]], col = "blue", lwd = 2, lty = 1, main = paste("One-vs-Rest ROC curve for BKP (AUC =", round(auc(multiclass_roc_dkp), 3), ")", sep = "")) lines(all_rocs[[2]], col = "red", lwd = 2, lty = 2) lines(all_rocs[[3]], col = "black", lwd = 2, lty = 4) legend("bottomright", legend = class_levels, col = c("blue", "red", "black"), lwd = 2, lty = c(1,2,4), cex = 1.2) dev.off() ### LGP model iris_data <- data.frame( Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width, Species = iris$Species ) iris_train <- iris_data[train_indices, ] iris_test <- iris_data[-train_indices, ] # Fit LGP model gausspr_model <- gausspr(Species ~ ., data = iris_train, kernel = "rbfdot", kpar = "automatic") # Test LGP lgp_pred_probs <- predict(gausspr_model, newdata = iris_test, type = "probabilities") multiclass_roc_lgp <- multiclass.roc(iris_test$Species, lgp_pred_probs) # Plot ROC all_rocs <- list() for (class_name in class_levels) { # binary label labels_binary <- ifelse(labels_test == class_name, 1, 0) probabilities <- lgp_pred_probs[, class_name] roc_curve <- roc(labels_binary, probabilities) all_rocs[[class_name]] <- roc_curve } pdf("ex7rocLGP.pdf", width = 6, height = 6) plot(all_rocs[[1]], col = "blue", lwd = 2, lty = 1, main = paste("One-vs-Rest ROC curve for LGP (AUC =", round(auc(multiclass_roc_lgp), 3), ")", sep = "")) lines(all_rocs[[2]], col = "red", lwd = 2, lty = 2) lines(all_rocs[[3]], col = "black", lwd = 2, lty = 4) legend("bottomright", legend = class_levels, col = c("blue", "red", "black"), lty = c(1,2,4), lwd = 2, cex = 1.2) dev.off() # New data points for plot grid <- expand.grid( Sepal.Length = seq(Xbounds[1, 1], Xbounds[1, 2], length.out = 80), Sepal.Width = seq(Xbounds[2, 1], Xbounds[2, 2], length.out = 80) ) grid_predictions <- predict(gausspr_model, newdata = grid, type = "prob") class <- max.col(grid_predictions) df<- data.frame(x1 = grid$Sepal.Length,x2=grid$Sepal.Width, class = class, max_prob = apply(grid_predictions, 1, max)) p1 <- BKP:::my_2D_plot_fun_class("class", "Predicted Classes", df, X_train, Y_train) p2 <- BKP:::my_2D_plot_fun_class("max_prob", "Maximum Predicted Probability", df, X_train, Y_train, classification = FALSE) pdf("ex7LGP.pdf", width = 13, height = 6) grid.arrange(p1, p2, ncol = 2) dev.off() ## ----Working Example, include=FALSE--------------------------------- # Load the data data("Loaloa") # Extract input variables (X), response variable (y), and trial counts (m) X <- as.matrix(Loaloa[, 1:2]) rownames(X) <- NULL y <- Loaloa$npos m <- Loaloa$ntot # Randomly split into training (70%) and testing (30%) sets set.seed(123) train_idx <- sample(1:nrow(Loaloa), 0.7 * nrow(Loaloa)) X_train <- X[train_idx, ] y_train <- y[train_idx] m_train <- m[train_idx] X_test <- X[-train_idx, ] y_test <- y[-train_idx] m_test <- m[-train_idx] p <- y / m # Infection rate Xbounds <- matrix(c(7.8, 15.3, 3.1, 7.0), ncol = 2, byrow = TRUE) df <- data.frame( lon = X[,1], lat = X[,2], p = p, n = m, set = ifelse(1:nrow(Loaloa) %in% train_idx, "Train", "Test") ) # Obtain African map data (sf object) africa <- ne_countries(continent = "africa", scale = "medium", returnclass = "sf") pmap <- ggplot() + geom_sf(data = africa, fill = "gray95", color = "gray60") + geom_point(data = df, aes(x = lon, y = lat, color = p, size = n, shape = set), alpha = 0.8) + scale_color_viridis_c(name = "Proportion y/m") + scale_size_continuous(name = "Trial count m") + scale_shape_manual(name = "Dataset", values = c("Train" = 16, "Test" = 17)) + coord_sf(xlim = Xbounds[1, ], ylim = Xbounds[2, ], expand = FALSE) + theme_minimal() + # theme(aspect.ratio = 1) + labs(title = "Loaloa infection proportion (y/m): Train vs Test", x = "Longitude", y = "Latitude") ggsave("Loaloa_map.pdf", plot = pmap, width = 10, height = 5) # Model fitting Loaloa_bkp_model <- fit_BKP( X_train, y_train, m_train, Xbounds, loss = "brier", prior = "adaptive", r0 = mean(m_train)) # Print the model summary summary(Loaloa_bkp_model) # Plot the fitted model pdf("Loaloa_bkp.pdf", width = 12, height = 12) plot(Loaloa_bkp_model) dev.off() # LGP model for comparison Loaloa_gp_model <- gp_init(cf = cf_sexp(), lik = lik_binomial()) Loaloa_gp_model <- gp_optim( Loaloa_gp_model, X_train, y_train, trials = m_train, method = method_full(), approx = approx_ep(), verbose = FALSE) # Plot the fitted LGP model pdf("Loaloa_gp.pdf", width = 12, height = 12) # Generate the grid for predictions x1_seq <- seq(Xbounds[1,1], Xbounds[1,2], length.out = 80) x2_seq <- seq(Xbounds[2,1], Xbounds[2,2], length.out = 80) grid <- expand.grid(x1 = x1_seq, x2 = x2_seq) # Predict the grid for plot prediction <- gp_pred(Loaloa_gp_model, as.matrix(grid), transform = TRUE, var = TRUE, quantiles = c(0.025,0.975)) df <- data.frame(x1 = grid$x1, x2 = grid$x2, Mean = prediction$mean, Upper = prediction$quantiles[,2], Lower = prediction$quantiles[,1], Variance = prediction$var) dims <- c(1,2) # Width = prediction$upper - prediction$lower) p1 <- BKP:::my_2D_plot_fun("Mean", "Predictive Mean", df, dims= dims) p3 <- BKP:::my_2D_plot_fun("Variance", "Predictive Variance", df, dims= dims) p2 <- BKP:::my_2D_plot_fun("Upper", "95% CI Upper", df, dims= dims) p4 <- BKP:::my_2D_plot_fun("Lower", "95% CI Lower", df, dims= dims) # Arrange into 2×2 layout grid.arrange(p1, p2, p3, p4, ncol = 2) dev.off() # Predict the mean infection rate for the test data predict_on_test_bkp <- predict(Loaloa_bkp_model, Xnew = X_test) predict_on_test_gp <- gp_pred(Loaloa_gp_model, xnew = X_test, transform = TRUE, var = TRUE) # Empirical success rate for the test data pi_tilde_test <- y_test / m_test # Mean Squared Error (Brier Score) mse_bkp <- mean((predict_on_test_bkp$mean - pi_tilde_test)^2) mse_gp <- mean((predict_on_test_gp$mean - pi_tilde_test)^2)