Properties of Elastic Net and Comparison with Relaxed Lasso

In Best Subset, Forward Stepwise or Lasso? Analysis and Recommendations Based on Extensive Comparisons by T.Hastie, it is suggested that the relaxed Lasso performs as well as or better than all the other regression methods over nearly all settings.

But it seems to be a quite unfair match since the relaxed lasso has two hyperparamteters where the other methods have only one hyperparameter.

We will compare the Elastic Net, which also has two hyperparameters, with the relaxed Lasso in terms of accuracy by doing the same simulation.

Before showing the result of simulation, we will see some essential properties of Elastic Net, mostly related to the grouping effect.

Beamer Presentation

The following is the R code for the simulation comparing the performance of Elastic Net and Relaxed Lasso

generator_Sigma = function(p, rho){
  Sigma = matrix(0, p, p)
  for(i in 1:p){
    for(j in 1:p){
      Sigma[i,j] = rho^abs(i-j)
    }
  }
  return(Sigma)
}

generator_X = function(n, Sigma){
  X = mvtnorm::rmvnorm(n=n, sigma = Sigma)
  return(X)
}

generator_beta = function(p, s){
  stopifnot(p >= s)
  beta = rep(0, p)
  beta[1:s] = 1
  dim(beta) = c(length(beta),1)
  beta = Matrix::Matrix(beta, sparse = TRUE)
  return(beta)
}

generator_y = function(X, beta, Sigma, snr){
  n = nrow(X)
  sigma_sq = sum(beta * (Sigma %*% beta))/ snr
  mu = X %*% beta
  y = matrix(0, nrow = n, ncol =1)
  for(i in 1:n){
    y[i] = rnorm(1, mean = mu[i], sd  = sqrt(sigma_sq))
  }
  return(y)
}

RelativeRisk = function(beta, beta_hat, Sigma){
  diff = beta - beta_hat
  RR = sum(diff * (Sigma %*% diff)) / sum(beta * (Sigma %*% beta))
  return(RR)
}

F1score = function(beta, beta_hat){
  nonzero_ind0 = beta!=0
  nonzero_ind = beta_hat!=0
  precision = ifelse(sum(nonzero_ind) > 0, 
                     sum(nonzero_ind0 * nonzero_ind) / sum(nonzero_ind), 0)
  recall = sum(nonzero_ind0 * nonzero_ind) / sum(nonzero_ind0)
  F1 = ifelse(precision ==0 & recall ==0, 0, 
              2 * precision * recall / (precision+recall) )
  return(F1)
}


# From glmnetUtils documentation
# Because cross-validation is often a statistically noisy procedure, 
# it doesn’t try to automatically choose alpha and lambda for you

# Hence I create a function to automatically choose the best alpha and lambda

bestcoef = function(cva){
  stopifnot(class(cva)=="cva.glmnet")
  cverrors = rep(0, length(cva$alpha))
  for(i in 1:length(cva$alpha)){
    ind = cva$modlist[[i]]$index["min","Lambda"]
    cverrors[i] = cva$modlist[[i]]$cvm[ind]
  }
  bestalpha_index = which.min(cverrors)
  result = list(coef = coef(cva, alpha = cva$alpha[bestalpha_index]), 
                alpha = cva$alpha[bestalpha_index], 
                lambda = cva$modlist[[bestalpha_index]]$lambda.min )
  return(result)
}

simulator = function(n, p, s, rho, snr, seed, Niter){
  set.seed(seed)
  RRrelax = rep(0, Niter)
  RRelastic = rep(0, Niter)
  F1relax = rep(0, Niter)
  F1elastic = rep(0, Niter)
  for(i in 1:Niter){
    Sigma = generator_Sigma(p, rho)
    X = generator_X(n, Sigma)
    beta = generator_beta(p, s)
    y = generator_y(X, beta, Sigma, snr)
    
    cvfit_relax = glmnet::cv.glmnet(X,y, intercept = FALSE, 
                                    relax = TRUE, gamma = seq(from = 0, to = 1, length = 11)) 
    beta_hat_relax = coef(cvfit_relax, s = "lambda.min")
    beta_hat_relax = Matrix::Matrix(beta_hat_relax[-1], sparse = T)
    
    cvfit_elastic = glmnetUtils::cva.glmnet(X, y, intercept = FALSE, 
                                            alpha = seq(from = 0, to = 1, length = 11)^2 )
    
    beta_hat_elastic = bestcoef(cvfit_elastic)$coef
    beta_hat_elastic = Matrix::Matrix(beta_hat_elastic[-1], sparse = T)
    
    RRrelax[i] =  RelativeRisk(beta, beta_hat_relax, Sigma)
    RRelastic[i] = RelativeRisk(beta, beta_hat_elastic, Sigma)
    F1relax[i] = F1score(beta, beta_hat_relax)
    F1elastic[i] = F1score(beta, beta_hat_elastic)
  }
  
  result = list(RRrelax = c(mean(RRrelax), sd(RRrelax)) , 
       RRelastic = c(mean(RRelastic), sd(RRelastic)),
       F1relax = c(mean(F1relax), sd(F1relax)),
       F1elastic = c(mean(F1elastic), sd(F1elastic)))
  
  return(result)
  
}

snrlist = round(exp(seq(log(0.05), log(6), length = 10)),2)

n = 50 ; p = 1000
s = 5 ; rho = 0.70
seed = 123
Niter = 30

RRrelax_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
RRelastic_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
F1relax_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )
F1elastic_results = data.frame(mean = rep(0, length(snrlist)), sd = rep(0, length(snrlist)) )

for(i in 1:length(snrlist)){
  res = simulator(n, p , s, rho, snrlist[i], seed, Niter)
  RRrelax_results[i, ] = res$RRrelax
  RRelastic_results[i, ] = res$RRelastic
  F1relax_results[i, ] = res$F1relax
  F1elastic_results[i, ] = res$F1elastic
  cat("iteration", i, "done\n")
}

snr = 1:length(snrlist) 
method = rep(c("Relax","Elastic"), each=length(snrlist))

RR_results = rbind(cbind(snr, RRrelax_results), cbind(snr, RRelastic_results) )
RR_results = cbind(RR_results, method)

F1_results = rbind(cbind(snr, F1relax_results), cbind(snr, F1elastic_results) )
F1_results = cbind(F1_results, method)

library(ggplot2)

RRplot <- ggplot(RR_results, aes(x = snr, y = mean, group = method, color = method)) +
        geom_line() + geom_point() +
        geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd) , width = .2, 
                      position = position_dodge(0.05)) +
        scale_x_continuous(name = "SNR", breaks = 1:10, labels = snrlist) +
        ylab("RelativeRisk") + 
        ggtitle(paste("High setting : n =" , n, ", p =", p, ", s =", s, 
                      "\n Correlation : rho = ", rho)) + 
        theme(plot.title = element_text(hjust = 0.5))
print(RRplot)

F1plot <- ggplot(F1_results, aes(x = snr, y = mean, group = method, color = method)) +
  geom_line() + geom_point() +
  geom_errorbar(aes(ymin = mean-sd, ymax = mean+sd) , width = .2, 
                position = position_dodge(0.05)) +
  scale_x_continuous(name = "SNR", breaks = 1:10, labels = snrlist) +
  ylab("F1score") + 
  ggtitle(paste("High setting : n =" , n, ", p =", p, ", s =", s, 
                "\n Correlation : rho = ", rho)) + 
  theme(plot.title = element_text(hjust = 0.5))
print(F1plot)