In Best Subset, Forward Stepwise or Lasso? Analysis and Recommendations Based on Extensive Comparisons by T.Hastie, it is suggested that the relaxed Lasso performs as well as or better than all the other regression methods over nearly all settings.
But it seems to be a quite unfair match since the relaxed lasso has two hyperparamteters where the other methods have only one hyperparameter.
We will compare the Elastic Net, which also has two hyperparameters, with the relaxed Lasso in terms of accuracy by doing the same simulation.
Before showing the result of simulation, we will see some essential properties of Elastic Net, mostly related to the grouping effect.
The following is the R code for the simulation comparing the performance of Elastic Net and Relaxed Lasso
generator_Sigma = function(p, rho){
Sigma = matrix(0, p, p)
for(i in 1:p){
for(j in 1:p){
Sigma[i,j] = rho^abs(i-j)
}
}
return(Sigma)
}
generator_X = function(n, Sigma){
X = mvtnorm::rmvnorm(n=n, sigma = Sigma)
return(X)
}
generator_beta = function(p, s){
stopifnot(p >= s)
beta = rep(0, p)
beta[1:s] = 1
dim(beta) = c(length(beta),1)
beta = Matrix::Matrix(beta, sparse = TRUE)
return(beta)
}
generator_y = function(X, beta, Sigma, snr){
n = nrow(X)
sigma_sq = sum(beta * (Sigma %*% beta))/ snr
mu = X %*% beta
y = matrix(0, nrow = n, ncol =1)
for(i in 1:n){
y[i] = rnorm(1, mean = mu[i], sd = sqrt(sigma_sq))
}
return(y)
}
RelativeRisk = function(beta, beta_hat, Sigma){
diff = beta - beta_hat
RR = sum(diff * (Sigma %*% diff)) / sum(beta * (Sigma %*% beta))
return(RR)
}
F1score = function(beta, beta_hat){
nonzero_ind0 = beta!=0
nonzero_ind = beta_hat!=0
precision = ifelse(sum(nonzero_ind) > 0,
sum(nonzero_ind0 * nonzero_ind) / sum(nonzero_ind), 0)
recall = sum(nonzero_ind0 * nonzero_ind) / sum(nonzero_ind0)
F1 = ifelse(precision ==0 & recall ==0, 0,
2 * precision * recall / (precision+recall) )
return(F1)
}
# From glmnetUtils documentation
# Because cross-validation is often a statistically noisy procedure,
# it doesn’t try to automatically choose alpha and lambda for you
# Hence I create a function to automatically choose the best alpha and lambda
bestcoef = function(cva){
stopifnot(class(cva)=="cva.glmnet")
cverrors = rep(0, length(cva$alpha))
for(i in 1:length(cva$alpha)){
ind = cva$modlist[[i]]$index["min","Lambda"]
cverrors[i] = cva$modlist[[i]]$cvm[ind]
}
bestalpha_index = which.min(cverrors)
result = list(coef = coef(cva, alpha = cva$alpha[bestalpha_index]),
alpha = cva$alpha[bestalpha_index],
lambda = cva$modlist[[bestalpha_index]]$lambda.min )
return(result)
}
simulator = function(n, p, s, rho, snr, seed, Niter){
set.seed(seed)
RRrelax = rep(0, Niter)
RRelastic = rep(0, Niter)
F1relax = rep(0, Niter)
F1elastic = rep(0, Niter)
for(i in 1:Niter){
Sigma = generator_Sigma(p, rho)
X = generator_X(n, Sigma)
beta = generator_beta(p, s)
y = generator_y(X, beta, Sigma, snr)
cvfit_relax = glmnet::cv.glmnet(X,y, intercept = FALSE,
relax = TRUE, gamma = seq(from = 0, to = 1, length = 11))
beta_hat_relax = coef(cvfit_relax, s = "lambda.min")
beta_hat_relax = Matrix::Matrix(beta_hat_relax[-1], sparse = T)
cvfit_elastic = glmnetUtils::cva.glmnet(X, y, intercept = FALSE,
alpha = seq(from = 0, to = 1, length = 11)^2 )
beta_hat_elastic = bestcoef(cvfit_elastic)$coef
beta_hat_elastic = Matrix::Matrix(beta_hat_elastic[-1], sparse = T)
RRrelax[i] = RelativeRisk(beta, beta_hat_relax, Sigma)
RRelastic[i] = RelativeRisk(beta, beta_hat_elastic, Sigma)
F1relax[i] = F1score(beta, beta_hat_relax)
F1elastic[i] = F1score(beta, beta_hat_elastic)
}
result = list(RRrelax = c(mean(RRrelax), sd(RRrelax)) ,
RRelastic = c(mean(RRelastic), sd(RRelastic)),
F1relax = c(mean(F1relax), sd(F1relax)),
F1elastic = c(mean(F1elastic), sd(F1elastic)))
return(result)
}
snrlist = round(exp(seq(log(0.05), log(6), length = 10)),2)
n = 50 ; p = 1000
s = 5 ; rho = 0.70
seed = 123
Niter = 30
RRrelax_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
RRelastic_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
F1relax_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
F1elastic_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
for(i in 1:length(snrlist)){
res = simulator(n, p , s, rho, snrlist[i], seed, Niter)
RRrelax_results[i, ] = res$RRrelax
RRelastic_results[i, ] = res$RRelastic
F1relax_results[i, ] = res$F1relax
F1elastic_results[i, ] = res$F1elastic
cat("iteration", i, "done\n")
}
snr = 1:length(snrlist)
method = rep(c("Relax","Elastic"), each=length(snrlist))
RR_results = rbind(cbind(snr, RRrelax_results), cbind(snr, RRelastic_results) )
RR_results = cbind(RR_results, method)
F1_results = rbind(cbind(snr, F1relax_results), cbind(snr, F1elastic_results) )
F1_results = cbind(F1_results, method)
library(ggplot2)
RRplot <- ggplot(RR_results, aes(x = snr, y = mean, group = method, color = method)) +
geom_line() + geom_point() +
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd) , width = .2,
position = position_dodge(0.05)) +
scale_x_continuous(name = "SNR", breaks = 1:10, labels = snrlist) +
ylab("RelativeRisk") +
ggtitle(paste("High setting : n =" , n, ", p =", p, ", s =", s,
"\n Correlation : rho = ", rho)) +
theme(plot.title = element_text(hjust = 0.5))
print(RRplot)
F1plot <- ggplot(F1_results, aes(x = snr, y = mean, group = method, color = method)) +
geom_line() + geom_point() +
geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd) , width = .2,
position = position_dodge(0.05)) +
scale_x_continuous(name = "SNR", breaks = 1:10, labels = snrlist) +
ylab("F1score") +
ggtitle(paste("High setting : n =" , n, ", p =", p, ", s =", s,
"\n Correlation : rho = ", rho)) +
theme(plot.title = element_text(hjust = 0.5))
print(F1plot)