| Title: | Sparse Projected Averaged Regression |
|---|---|
| Description: | A flexible framework combining variable screening and random projection techniques for fitting ensembles of predictive generalized linear models to high-dimensional data. Designed for extensibility, the package implements key techniques as S3 classes with user-friendly constructors, enabling easy integration and development of new procedures for high-dimensional applications. For more details see Parzer et al (2024a) <doi:10.48550/arXiv.2312.00130> and Parzer et al (2024b) <doi:10.48550/arXiv.2410.00971>. |
| Authors: | Laura Vana-Gür [aut, cre] (ORCID: <https://orcid.org/0000-0002-9613-7604>), Roman Parzer [aut] (ORCID: <https://orcid.org/0000-0003-0893-3190>), Peter Filzmoser [aut] (ORCID: <https://orcid.org/0000-0002-8014-4682>) |
| Maintainer: | Laura Vana-Gür <[email protected]> |
| License: | GPL-3 |
| Version: | 1.1.1 |
| Built: | 2026-05-26 07:47:58 UTC |
| Source: | https://github.com/lauravana/spareg |
'spar' ObjectExtracts coefficients from 'spar' bbject
## S3 method for class 'spar' coef( object, nummod = NULL, nu = NULL, aggregate = c("mean", "median", "none"), ... )## S3 method for class 'spar' coef( object, nummod = NULL, nu = NULL, aggregate = c("mean", "median", "none"), ... )
object |
result of spar function of class |
nummod |
number of models used to form coefficients; value with minimal
validation |
nu |
threshold level used to form coefficients; value with minimal
validation |
aggregate |
character one of c("mean", "median", "none"). If set to "none" the coefficients are not aggregated over the marginal models, otherwise the coefficients are aggregated using the specified method (mean or median). Defaults to mean aggregation. |
... |
further arguments passed to or from other methods |
object of class 'coefspar' which is a list with elements
intercept intercept value
beta vector of length p of averaged coefficients
nummod number of models based on which the coefficient is computed
nu threshold based on which the coefficient is computed
print.coefspar, summary.coefspar
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coef(spar_res) coef(spar_res, aggregate = "median") coef(spar_res, aggregate = "none") coef(spar_res, nummod = 5, nu = 0)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coef(spar_res) coef(spar_res, aggregate = "median") coef(spar_res, aggregate = "none") coef(spar_res, nummod = 5, nu = 0)
'spar.cv' ObjectExtract coefficients from 'spar.cv' object
## S3 method for class 'spar.cv' coef( object, nummod = NULL, nu = NULL, opt_par = c("best", "1se"), aggregate = c("mean", "median", "none"), ... )## S3 method for class 'spar.cv' coef( object, nummod = NULL, nu = NULL, opt_par = c("best", "1se"), aggregate = c("mean", "median", "none"), ... )
object |
result of spar.cv function of class |
nummod |
optional number of models used to form coefficients |
nu |
optional threshold level used to form coefficients |
opt_par |
one of |
aggregate |
character one of c("mean", "median", "none"). If set to "none" the coefficients are not aggregated over the marginal models, otherwise the coefficients are aggregated using the specified method (mean or median). Defaults to mean aggregation. |
... |
further arguments passed to or from other methods |
List with elements
intercept intercept value
beta vector of length p of averaged coefficients
nummod number of models based on which the coefficient is computed
nu threshold based on which the coefficient is computed
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, nummods = c(5, 10)) coef(spar_res)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, nummods = c(5, 10)) coef(spar_res)
'randomprojection' ObjectCreates an object class 'randomprojection' using arguments passed by user.
constructor_randomprojection( name, generate_fun, update_fun = NULL, update_rpm_w_data = NULL, control = list() )constructor_randomprojection( name, generate_fun, update_fun = NULL, update_rpm_w_data = NULL, control = list() )
name |
character |
generate_fun |
function for generating the random projection matrix. This
function should have with arguments |
update_fun |
function for updating the |
update_rpm_w_data |
function for updating the random projection matrix with data.
This can be used for the case where a list of random projection matrices is
provided by argument |
control |
list of controls for random projection. Can include minimum and
maximum dimension for the projection defaults to
|
a function which in turn creates an object of class 'randomprojection'
generate_cauchy <- function(rp, m, included_vector, x = NULL, y = NULL) { p <- length(included_vector) control_rcauchy <- c(rp$control[names(rp$control) %in% names(formals(rcauchy))], attributes(rp)[names(attributes(rp)) %in% names(formals(rcauchy))]) control_rcauchy <- control_rcauchy[!duplicated(names(control_rcauchy))] vals <- do.call(function(...) rcauchy(m * p, ...), control_rcauchy) RM <- matrix(vals, nrow = m, ncol = p) return(RM) } rp_cauchy <- constructor_randomprojection("rp_cauchy", generate_fun = generate_cauchy) example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, rp = rp_cauchy(scale = 1/400)) spar_resgenerate_cauchy <- function(rp, m, included_vector, x = NULL, y = NULL) { p <- length(included_vector) control_rcauchy <- c(rp$control[names(rp$control) %in% names(formals(rcauchy))], attributes(rp)[names(attributes(rp)) %in% names(formals(rcauchy))]) control_rcauchy <- control_rcauchy[!duplicated(names(control_rcauchy))] vals <- do.call(function(...) rcauchy(m * p, ...), control_rcauchy) RM <- matrix(vals, nrow = m, ncol = p) return(RM) } rp_cauchy <- constructor_randomprojection("rp_cauchy", generate_fun = generate_cauchy) example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, rp = rp_cauchy(scale = 1/400)) spar_res
'screencoef' ObjectsThe created function will return a object of class 'screencoef' which
constitutes of a list. The attributes of the generating object will include by
default type, which can take one of two values "prob" (indicating
probabilistic screening should be employed),
"fixed" (indicating that the top nscreen variables should be employed).
constructor_screencoef(name, generate_fun)constructor_screencoef(name, generate_fun)
name |
character |
generate_fun |
function for generating the screening coefficient. This
function should have arguments and |
Creates an object class 'screencoef' using arguments passed by user.
a function which in turn creates an object of class 'screencoef'
generate_scr_sirs <- function(y, x, object) { res_screen <- do.call(function(...) VariableScreening::screenIID(x, y, ...), object$control) coefs <- res_screen$measurement coefs } screen_sirs <- constructor_screencoef("screen_sirs", generate_fun = generate_scr_sirs) example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_example <- spar(example_data$x, example_data$y, screencoef = screen_sirs(control = list(method = "SIRS")), rp = rp_sparse()) spar_examplegenerate_scr_sirs <- function(y, x, object) { res_screen <- do.call(function(...) VariableScreening::screenIID(x, y, ...), object$control) coefs <- res_screen$measurement coefs } screen_sirs <- constructor_screencoef("screen_sirs", generate_fun = generate_scr_sirs) example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_example <- spar(example_data$x, example_data$y, screencoef = screen_sirs(control = list(method = "SIRS")), rp = rp_sparse()) spar_example
'sparmodel' ObjectThe created function will return a object of class 'sparmodel' which
constitutes of a list.
constructor_sparmodel(name, model_fun, update_fun = NULL)constructor_sparmodel(name, model_fun, update_fun = NULL)
name |
character |
model_fun |
function for estimating the marginal models which returns the
function should have arguments and |
update_fun |
optional function for updating the |
Creates an object of class 'sparmodel' using arguments passed by user.
a function which in turn creates an
object of class 'sparmodel'.
model_glmrob <- function(y, z, object) { requireNamespace("robustbase") fam <- object$control$family glmrob_res <- do.call(function(...) robustbase::glmrob(y ~ as.matrix(z), ...), object$control) intercept <- coef(glmrob_res)[1] gammas <- coef(glmrob_res)[-1] list(gammas = gammas, intercept = intercept) } spar_glmrob <- constructor_sparmodel(name = "glmrob", model_fun = model_glmrob) example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, model = spar_glmrob()) spar_resmodel_glmrob <- function(y, z, object) { requireNamespace("robustbase") fam <- object$control$family glmrob_res <- do.call(function(...) robustbase::glmrob(y ~ as.matrix(z), ...), object$control) intercept <- coef(glmrob_res)[1] gammas <- coef(glmrob_res)[-1] list(gammas = gammas, intercept = intercept) } spar_glmrob <- constructor_sparmodel(name = "glmrob", model_fun = model_glmrob) example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, model = spar_glmrob()) spar_res
'coefspar' ObjectExtractor for Model Coefficients from 'coefspar' Object
get_coef(x)get_coef(x)
x |
A ' |
A numeric vector or matrix of coefficients.
coef.spar, coef.spar.cv, print.coefspar, summary.coefspar
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coefs <- coef(spar_res) get_coef(coefs)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coefs <- coef(spar_res) get_coef(coefs)
'coefspar' ObjectExtractor for Model Intercept from 'coefspar' Object
get_intercept(x)get_intercept(x)
x |
A ' |
Intercept (numeric or vector).
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coefs <- coef(spar_res) get_coef(coefs)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coefs <- coef(spar_res) get_coef(coefs)
spar' or 'spar.cv' ObjectExtractor for (Cross-)Validation Measure from 'spar' or 'spar.cv' Object
get_measure(object)get_measure(object)
object |
A fitted ' |
data.frame containing the (cross-)validation measure for the considered threshold and number of model combinations.
For 'spar' objects it contains information about the measure calculated on the validation set (or on the training sample if
xval and yval are missing) and the number of active variables. For 'spar.cv' objects it contains information
on the average measure obtained across folds together with the standard deviation across the folds and the average number of active variables.
the nfolds of the training set.
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) get_measure(spar_res)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) get_measure(spar_res)
'spar' and 'spar.cv' ObjectExtractor of Specific Model from 'spar' and 'spar.cv' Object
get_model(object, opt_par = c("best", "1se"))get_model(object, opt_par = c("best", "1se"))
object |
A fitted ' |
opt_par |
One of "best", "1se" |
A 'spar' or 'spar.cv' object where the beta and intercept elements are
the ones which correspond to the best or the 1se model.
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, screencoef = screen_cor(), rp = rp_gaussian(), nummods=c(5, 10)) best_model <- get_model(spar_res, opt_par = "best") spar_cv <- spar.cv(example_data$x, example_data$y, screencoef = screen_cor(), rp = rp_gaussian(), nummods = c(5, 10), nfolds = 4L) best_model_cv <- get_model(spar_cv, opt_par = "best") onese_model_cv <- get_model(spar_cv, opt_par = "1se")example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, screencoef = screen_cor(), rp = rp_gaussian(), nummods=c(5, 10)) best_model <- get_model(spar_res, opt_par = "best") spar_cv <- spar.cv(example_data$x, example_data$y, screencoef = screen_cor(), rp = rp_gaussian(), nummods = c(5, 10), nfolds = 4L) best_model_cv <- get_model(spar_cv, opt_par = "best") onese_model_cv <- get_model(spar_cv, opt_par = "1se")
'spar' ObjectPlot values of validation measure or number of active variables over different thresholds or number of models for 'spar' object, or residuals vs fitted
## S3 method for class 'spar' plot( x, plot_type = c("val_measure", "val_numactive", "res_vs_fitted", "coefs"), plot_along = c("nu", "nummod"), nummod = NULL, nu = NULL, xfit = NULL, yfit = NULL, prange = NULL, coef_order = NULL, digits = 2L, ... )## S3 method for class 'spar' plot( x, plot_type = c("val_measure", "val_numactive", "res_vs_fitted", "coefs"), plot_along = c("nu", "nummod"), nummod = NULL, nu = NULL, xfit = NULL, yfit = NULL, prange = NULL, coef_order = NULL, digits = 2L, ... )
x |
result of spar function of class |
plot_type |
one of |
plot_along |
one of |
nummod |
fixed value for number of models when |
nu |
fixed value for |
xfit |
data used for predictions in |
yfit |
data used for predictions in |
prange |
optional vector of length 2 for |
coef_order |
optional index vector of length p for |
digits |
number of significant digits to be displayed in the axis; defaults to 2L. |
... |
further arguments passed to or from other methods |
'ggplot2::ggplot' object
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest) plot(spar_res, plot_type = "coefs", prange = c(1,400))example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest) plot(spar_res, plot_type = "coefs", prange = c(1,400))
'spar.cv' ObjectPlot cross-validation measure or number of active variables over different thresholds or number
of models of 'spar.cv' object, produce a residuals vs fitted plot,
or a plot of the estimated coefficients in each marginal model, sorted by their absolute value.
## S3 method for class 'spar.cv' plot( x, plot_type = c("val_measure", "val_numactive", "res_vs_fitted", "coefs"), plot_along = c("nu", "nummod"), nummod = NULL, nu = NULL, xfit = NULL, yfit = NULL, opt_par = c("best", "1se"), prange = NULL, coef_order = NULL, digits = 2, ... )## S3 method for class 'spar.cv' plot( x, plot_type = c("val_measure", "val_numactive", "res_vs_fitted", "coefs"), plot_along = c("nu", "nummod"), nummod = NULL, nu = NULL, xfit = NULL, yfit = NULL, opt_par = c("best", "1se"), prange = NULL, coef_order = NULL, digits = 2, ... )
x |
result of spar.cv function of class |
plot_type |
one of |
plot_along |
one of |
nummod |
fixed value for |
nu |
fixed value for |
xfit |
data used for predictions in |
yfit |
data used for predictions in |
opt_par |
one of |
prange |
optional vector of length 2 for |
coef_order |
optional index vector of length p for |
digits |
number of significant digits to be displayed in the axis; defaults to 2L. |
... |
further arguments passed to or from other methods |
'ggplot2::ggplot' object
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, screencoef = screen_cor(), rp = rp_gaussian(), nummods = c(5, 10)) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest, opt_par = "1se") plot(spar_res, "coefs", prange = c(1, 400))example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, screencoef = screen_cor(), rp = rp_gaussian(), nummods = c(5, 10)) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest, opt_par = "1se") plot(spar_res, "coefs", prange = c(1, 400))
'spar.cv' ObjectPredict responses for new predictors from 'spar' object
## S3 method for class 'spar' predict( object, xnew = NULL, type = c("response", "link"), avg_type = c("link", "response"), nummod = NULL, nu = NULL, aggregate = c("mean", "median"), ... )## S3 method for class 'spar' predict( object, xnew = NULL, type = c("response", "link"), avg_type = c("link", "response"), nummod = NULL, nu = NULL, aggregate = c("mean", "median"), ... )
object |
result of spar function of class |
xnew |
matrix of new predictor variables; must have same number of columns as |
type |
the type of required predictions; either on response level (default) or on link level |
avg_type |
type of averaging the marginal models; either on link (default) or on response level |
nummod |
number of models used to form coefficients; value with minimal validation measure is used if not provided. |
nu |
threshold level used to form coefficients; value with minimal validation measure is used if not provided. |
aggregate |
character one of c("mean", "median"); the aggregation over the ensembles is done using the specified method (mean or median). Defaults to mean aggregation. |
... |
further arguments passed to or from other methods |
Vector of predictions
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) pred <- predict(spar_res, xnew = example_data$xtest)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) pred <- predict(spar_res, xnew = example_data$xtest)
'spar.cv' ObjectPredict responses for new predictors from 'spar.cv' object
## S3 method for class 'spar.cv' predict( object, xnew = NULL, type = c("response", "link"), avg_type = c("link", "response"), opt_par = c("best", "1se"), nummod = NULL, nu = NULL, aggregate = c("mean", "median"), ... )## S3 method for class 'spar.cv' predict( object, xnew = NULL, type = c("response", "link"), avg_type = c("link", "response"), opt_par = c("best", "1se"), nummod = NULL, nu = NULL, aggregate = c("mean", "median"), ... )
object |
result of spar function of class |
xnew |
matrix of new predictor variables; must have same number of columns as |
type |
the type of required predictions; either on response level (default) or on link level |
avg_type |
type of averaging the marginal models; either on link (default) or on response level |
opt_par |
one of |
nummod |
number of models used to form coefficients; value with
minimal validation |
nu |
threshold level used to form coefficients; value with minimal
validation |
aggregate |
character one of c("mean", "median"); the aggregation over the ensembles is done using the specified method (mean or median). Defaults to mean aggregation. |
... |
further arguments passed to or from other methods |
Vector of predictions
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, rp = rp_gaussian(), nummods = c(5, 10)) pred <- predict(spar_res, example_data$x)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, rp = rp_gaussian(), nummods = c(5, 10)) pred <- predict(spar_res, example_data$x)
'coefspar' ObjectPrint method showing the basic components of a 'coefspar' object.
## S3 method for class 'coefspar' print(x, digits = 4L, show = 6L, ...)## S3 method for class 'coefspar' print(x, digits = 4L, show = 6L, ...)
x |
An object of class |
digits |
integer digits to be printed, defaults to 4L. |
show |
integer number of coefficients to be shown, defaults to 6L. |
... |
Additional arguments passed to or from other methods (ignored here). |
Invisibly returns the input object x.
example_data <- simulate_spareg_data(n = 100, p = 2000, ntest = 100) spar_res <- spareg(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coef(spar_res) coef(spar_res, aggregate = "median") coef(spar_res, aggregate = "none") print(coef(spar_res), show = 10L, digits = 6L)example_data <- simulate_spareg_data(n = 100, p = 2000, ntest = 100) spar_res <- spareg(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) coef(spar_res) coef(spar_res, aggregate = "median") coef(spar_res, aggregate = "none") print(coef(spar_res), show = 10L, digits = 6L)
'randomprojection' ObjectPrint method for a 'randomprojection' object
## S3 method for class 'randomprojection' print(x, ...)## S3 method for class 'randomprojection' print(x, ...)
x |
object of class |
... |
further arguments passed to or from other methods |
text summary
'screencoef' ObjectPrint method for a 'screencoef' object
## S3 method for class 'screencoef' print(x, ...)## S3 method for class 'screencoef' print(x, ...)
x |
description |
... |
further arguments passed to or from other methods |
text summary
'spar' ObjectPrint summary of 'spar' object
## S3 method for class 'spar' print(x, ...)## S3 method for class 'spar' print(x, ...)
x |
result of spar function of class |
... |
further arguments passed to or from other methods |
text summary
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) print(spar_res)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) print(spar_res)
'spar.cv' ObjectPrint summary of 'spar.cv' object
## S3 method for class 'spar.cv' print(x, ...)## S3 method for class 'spar.cv' print(x, ...)
x |
result of spar.cv function of class |
... |
further arguments passed to or from other methods |
text summary
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spareg.cv(example_data$x, example_data$y, nfolds = 3L, screencoef = screen_cor(), rp = rp_gaussian(), nummods = c(5, 10)) print(spar_res)example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spareg.cv(example_data$x, example_data$y, nfolds = 3L, screencoef = screen_cor(), rp = rp_gaussian(), nummods = c(5, 10)) print(spar_res)
Creates an object class 'randomprojection' using arguments passed by
user which in turn can be employed to generate a sparse embedding matrix as
in (Clarkson and Woodruff 2013).
rp_cw(..., control = list())rp_cw(..., control = list())
... |
includes arguments which can be passed as attributes to the random projection matrix |
control |
list of arguments to be used in functions
|
The entries of the matrix are generated based on (Clarkson and Woodruff 2013).
This matrix is constructed as , where
is a binary matrix, where for each column
an index is uniformly sampled from and the corresponding
entry is set to one, and is a diagonal matrix,
with entries .
If specified as rp_cw(data = TRUE), the random elements on the diagonal
are replaced by the ridge coefficients with a small penalty, as introduced in
(Parzer et al. 2024).
object of class 'randomprojection' which is a list with
elements name,
generate_fun, update_fun, control
Clarkson KL, Woodruff DP (2013). “Low Rank Approximation and Regression in Input Sparsity Time.” In Proceedings of the Forty-Fifth Annual ACM Symposium on Theory of Computing, STOC '13, 81–90. ISBN 9781450320290. doi:10.1145/2488608.2488620.
Parzer R, Filzmoser P, Vana-Gür L (2024). “Data-Driven Random Projection and Screening for High-Dimensional Generalized Linear Models.” Technical Report 2410.00971, arXiv.org E-Print Archive. doi:10.48550/arXiv.2410.00971..
example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), rp = rp_cw(data = TRUE))example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), rp = rp_cw(data = TRUE))
Creates an object class 'randomprojection' using arguments passed by
user which in turn can be employed to generate a random matrix with normally
distributed entries (mean 0 and standard deviation 1 by default).
rp_gaussian(..., control = list())rp_gaussian(..., control = list())
... |
includes arguments which can be passed as attributes to the random projection matrix |
control |
list of arguments to be used in functions
|
Arguments related to the random projection procedure can
be passed to the rp_gaussian() function through ..., and
will be saved as attributes of the 'randomprojection' object.
The following attributes are relevant for spar and spar.cv:
mslow: integer giving the minimum dimension to which the predictors
should be projected; defaults to .
msup: integer giving the maximum dimension to which the predictors
should be projected; defaults to .
object of class 'randomprojection' which is a list with
elements name,
generate_fun, update_fun, control
example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), rp = rp_gaussian(control = list(sd = 1/sqrt(ncol(example_data$x)))))example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), rp = rp_gaussian(control = list(sd = 1/sqrt(ncol(example_data$x)))))
Creates an object class 'randomprojection' using arguments passed by
user which in turn can be employed to generate a sparse embedding matrix as
in (Achlioptas 2003).
rp_sparse(..., control = list())rp_sparse(..., control = list())
... |
includes arguments which can be passed as attributes to the random projection matrix. |
control |
list of arguments to be used in functions
|
The sparse matrix used in (Achlioptas 2003) with entries equal to
with probability and zero otherwise
for . Default is psi = 1.
Arguments related to the random projection procedure can
be passed to the rp_gaussian() function through ..., and
will be saved as attributes of the 'randomprojection' object.
The following attributes are relevant for spar and spar.cv:
mslow: integer giving the minimum dimension to which the predictors
should be projected; defaults to .
msup: integer giving the maximum dimension to which the predictors
should be projected; defaults to .
object of class 'randomprojection' which is a list with
elements name,
generate_fun, update_fun, control
Achlioptas D (2003). “Database-Friendly Random Projections: Johnson-Lindenstrauss with Binary Coins.” Journal of Computer and System Sciences, 66(4), 671-687. ISSN 0022-0000. doi:10.1016/S0022-0000(03)00025-4. Special Issue on PODS 2001.
example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), rp = rp_sparse(control = list(psi = 1/3)))example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), rp = rp_sparse(control = list(psi = 1/3)))
Creates an object class 'screencoef' using arguments passed by user,
where the screening coefficient should be computed based on the correlation
coefficient of response and each predictor separately.
screen_cor(..., control = list())screen_cor(..., control = list())
... |
includes arguments which can be passed as attributes to the
|
control |
list of controls to be passed to the screening function |
Creates an object class 'screencoef' using arguments passed by user.
The function generate_fun relies on cor.
Arguments related to the screening procedure can
be passed to the screen_cor() function through ..., and
will be saved as attributes of the 'screencoef' object.
The following attributes are relevant for spar and spar.cv:
nscreen integer giving the number of variables to be retained
after screening; if not specified, defaults to $2n$.
split_data_prop, double between 0 and 1 which indicates the
proportion of the data that should be used for computing the screening
coefficient. The remaining data will be used for estimating the marginal
models in the SPAR algorithm; if not specified, the whole data will be used
for estimating the screening coefficient and the marginal models.
type character - either "prob" (indicating that
probabilistic screening should be employed) or "fixed" (indicating
that a fixed set of nscreen variables should be employed across the
ensemble); defaults to type = "prob".
reuse_in_rp logical - indicates whether the screening
coefficient should be reused at a later stage in the construction of the random
projection. Defaults to FALSE.
object of class 'screencoef' which is a list with elements
name (character)
control (list of controls passed as an argument)
generate_fun for generating the screening coefficient.
This function should have arguments and y (vector of (standardized for Gaussian) responses),
x (the matrix of standardized predictors) and a 'screencoef' object.
example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), screencoef = screen_cor(control = list(method = "kendall")))example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), screencoef = screen_cor(control = list(method = "kendall")))
Creates an object class 'screencoef' using arguments passed by user,
where the screening coefficient should be computed based on penalized coefficients.
screen_glmnet(..., control = list())screen_glmnet(..., control = list())
... |
includes arguments which can be passed as attributes to the
|
control |
list of controls to be passed to the screening function |
Creates an object class 'screencoef' using arguments passed by user.
The function generate_fun relies on glmnet.
Arguments related to the screening procedure can
be passed to the screen_glmnet() function through ..., and
will be saved as attributes of the 'screencoef' object.
The following attributes are relevant for spar and spar.cv:
nscreen integer giving the number of variables to be retained
after screening; if not specified, defaults to $2n$.
split_data_prop, double between 0 and 1 which indicates the
proportion of the data that should be used for computing the screening
coefficient. The remaining data will be used for estimating the marginal
models in the SPAR algorithm; if not specified, the whole data will be used
for estimating the screening coefficient and the marginal models.
type character - either "prob" (indicating that
probabilistic screening should be employed) or "fixed" (indicating
that a fixed set of nscreen variables should be employed across the
ensemble); defaults to type = "prob".
reuse_in_rp logical - indicates whether the screening
coefficient should be reused at a later stage in the construction of the random
projection. Defaults to FALSE.
object of class 'screencoef' which is a list with elements
name (character)
control (list of controls passed as an argument)
generate_fun for generating the screening coefficient.
This function should have arguments and y (vector of (standardized for Gaussian) responses),
x (the matrix of standardized predictors) and a 'screencoef' object.
example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), screencoef = screen_glmnet(control = list(alpha = 0.1)))example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), screencoef = screen_glmnet(control = list(alpha = 0.1)))
Creates an object class 'screencoef' using arguments passed by user,
where the screening coefficient should be computed based on the marginal
likelihood of the univariate GLM where the response is regressed on
each predictor separately.
screen_marglik(..., control = list())screen_marglik(..., control = list())
... |
includes arguments which can be passed as attributes to the
|
control |
list of controls to be passed to the screening function |
The function generate_fun relies on glm.
Arguments related to the screening procedure can
be passed to the screen_marglik() function through ..., and
will be saved as attributes of the 'screencoef' object.
The following attributes are relevant for spar and spar.cv:
nscreen integer giving the number of variables to be retained
after screening; if not specified, defaults to $2n$.
split_data_prop, double between 0 and 1 which indicates the
proportion of the data that should be used for computing the screening
coefficient. The remaining data will be used for estimating the marginal
models in the SPAR algorithm; if not specified, the whole data will be used
for estimating the screening coefficient and the marginal models.
type character - either "prob" (indicating that
probabilistic screening should be employed) or "fixed" (indicating
that a fixed set of nscreen variables should be employed across the
ensemble); defaults to type = "prob".
reuse_in_rp logical - indicates whether the screening
coefficient should be reused at a later stage in the construction of the random
projection. Defaults to FALSE.
object of class 'screencoef' which is a list with elements:
name (character)
control (list of controls passed as an argument)
generate_fun for generating the screening coefficient.
This function should have arguments and y (vector of (standardized for Gaussian) responses),
x (the matrix of standardized predictors) and a 'screencoef' object.
example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), screencoef = screen_marglik(nscreen = 500))example_data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30), screencoef = screen_marglik(nscreen = 500))
Generates synthetic data for sparse linear regression problems. Returns training and test sets along with model parameters.
simulate_spareg_data( n, p, ntest, a = min(100, p/4), snr = 10, rho = 0.5, mu = 1, beta_vals = NULL, seed = NULL )simulate_spareg_data( n, p, ntest, a = min(100, p/4), snr = 10, rho = 0.5, mu = 1, beta_vals = NULL, seed = NULL )
n |
Integer. Number of training samples. |
p |
Integer. Number of predictors (features). |
ntest |
Integer. Number of test samples. |
a |
Integer. Number of non-zero coefficients in the true beta vector. Default is min(100, p/4). |
snr |
Numeric. Signal-to-noise ratio. Default is 10. |
rho |
Numeric between 0 and 1. Pairwise correlation coefficient among predictors. Default is 0.5. A compound symmetry correlation matrix is used. The variance of the predictors is fixed to 1. |
mu |
Numeric. Intercept term (mean of response). Default is 1. |
beta_vals |
Numeric. Possible values for non-zero coefficients in the true beta vector. Default to NULL, in which case the values -3, -2, -1, 1, 2, 3 will be used. |
seed |
Integer. Random seed for reproducibility. Default is NULL. |
A list with the following components:
Training design matrix (n x p).
Training response vector (length n).
Test design matrix (ntest x p).
Test response vector (length ntest).
Intercept used in data generation.
True coefficient vector (length p).
Noise variance used in data generation. Equals beta' Sigma beta / snr.
set.seed(123) data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) str(data)set.seed(123) data <- simulate_spareg_data(n = 200, p = 2000, ntest = 100) str(data)
Apply Sparse Projected Averaged Regression to high-dimensional data by
building an ensemble of generalized linear models, where the high-dimensional
predictors can be screened using a screening coefficient and then projected
using data-agnostic or data-informed random projection matrices.
This function performs the procedure for a given grid of thresholds
and a grid of the number of marginal models to be employed in the ensemble.
This function is also used in the cross-validated procedure spar.cv.
spar( x, y, family = gaussian("identity"), model = NULL, rp = NULL, screencoef = NULL, xval = NULL, yval = NULL, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, inds = NULL, RPMs = NULL, seed = NULL, ... ) spareg( x, y, family = gaussian("identity"), model = NULL, rp = NULL, screencoef = NULL, xval = NULL, yval = NULL, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, inds = NULL, RPMs = NULL, seed = NULL, ... )spar( x, y, family = gaussian("identity"), model = NULL, rp = NULL, screencoef = NULL, xval = NULL, yval = NULL, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, inds = NULL, RPMs = NULL, seed = NULL, ... ) spareg( x, y, family = gaussian("identity"), model = NULL, rp = NULL, screencoef = NULL, xval = NULL, yval = NULL, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, inds = NULL, RPMs = NULL, seed = NULL, ... )
x |
n x p numeric matrix of predictor variables. |
y |
quantitative response vector of length n. |
family |
a family object used for the marginal generalized linear model,
default |
model |
function creating a |
rp |
function creating a |
screencoef |
function creating a |
xval |
optional matrix of predictor variables observations used for
validation of threshold nu and number of models; |
yval |
optional response observations used for validation of
threshold nu and number of models; |
nnu |
number of different threshold values |
nus |
optional vector of |
nummods |
vector of numbers of marginal models to consider for
validation; defaults to |
measure |
loss to use for validation; defaults to |
avg_type |
type of averaging the marginal models; either on link (default) or on response level. This is used in computing the validation measure. |
parallel |
assuming a parallel backend is loaded and available, a logical indicating whether the function should use it in parallelizing the estimation of the marginal models. Defaults to FALSE. |
inds |
optional list of index-vectors corresponding to variables kept
after screening in each marginal model of length |
RPMs |
optional list of projection matrices used in each
marginal model of length |
seed |
integer seed to be set at the beginning of the SPAR algorithm. Default to NULL, in which case no seed is set. |
... |
further arguments mainly to ensure back-compatibility |
object of class 'spar' with elements
betas p x max(nummods) sparse matrix of class
'Matrix::dgCMatrix' containing the
standardized coefficients from each marginal model
intercepts used in each marginal model
scr_coef vector of length p with coefficients used for screening the standardized predictors
inds list of index-vectors corresponding to variables kept after screening in each marginal model of length max(nummods)
RPMs list of projection matrices used in each marginal model of length max(nummods)
val_res data.frame with validation results (validation measure
and number of active variables) for each element of nus and nummods
val_set logical flag, whether validation data were provided;
if FALSE, training data were used for validation
family a character corresponding to family object used for the marginal generalized linear model e.g.,
"gaussian(identity)"
nus vector of 's considered for thresholding
nummods vector of numbers of marginal models considered for validation
ycenter empirical mean of initial response vector
yscale empirical standard deviation of initial response vector
xcenter p-vector of empirical means of initial predictor variables
xscale p-vector of empirical standard deviations of initial predictor variables
avg_type character, averaging type for computing the validation measure
measure character, type of validation measure used
rp an object of class "randomprojection"
screencoef an object of class "screeningcoef"
x_rows_for_fitting_marginal_models vector of row indicators from
x which were used for fitting the marginal models, if screening was performed
using screencoef with split_data_prop argument. Is NULL otherwise.
If a parallel backend is registered and parallel = TRUE,
the foreach function
is used to estimate the marginal models in parallel.
Parzer R, Filzmoser P, Vana-Gür L (2024). “Sparse Data-Driven Random Projection in Regression for High-Dimensional Data.” Technical Report 2312.00130, arXiv.org E-Print Archive. doi:10.48550/arXiv.2312.00130.
Parzer R, Filzmoser P, Vana-Gür L (2024). “Data-Driven Random Projection and Screening for High-Dimensional Generalized Linear Models.” Technical Report 2410.00971, arXiv.org E-Print Archive. doi:10.48550/arXiv.2410.00971.
Clarkson KL, Woodruff DP (2013). “Low Rank Approximation and Regression in Input Sparsity Time.” In Proceedings of the Forty-Fifth Annual ACM Symposium on Theory of Computing, STOC '13, 81–90. ISBN 9781450320290. doi:10.1145/2488608.2488620.
Achlioptas D (2003). “Database-Friendly Random Projections: Johnson-Lindenstrauss with Binary Coins.” Journal of Computer and System Sciences, 66(4), 671-687. ISSN 0022-0000. doi:10.1016/S0022-0000(03)00025-4. Special Issue on PODS 2001.
spar.cv, coef.spar, predict.spar, plot.spar, print.spar
example_data <- simulate_spareg_data(n = 200, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30)) coefs <- coef(spar_res) pred <- predict(spar_res, xnew = example_data$x) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest) plot(spar_res, plot_type = "coefs", prange = c(1,400)) spar_res <- spareg(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30))example_data <- simulate_spareg_data(n = 200, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30)) coefs <- coef(spar_res) pred <- predict(spar_res, xnew = example_data$x) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest) plot(spar_res, plot_type = "coefs", prange = c(1,400)) spar_res <- spareg(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10, 15, 20, 25, 30))
'sparmodel'
Creates an object class 'sparmodel' using arguments passed by user.
spar_glm(..., control = list())spar_glm(..., control = list())
... |
includes arguments which can be passed as attributes to the
|
control |
list of controls to be passed to the model function |
Relies on glm.
object of class 'sparmodel' which is a list with elements
name (character)
control (list of controls passed as an argument)
model_fun function for estimating the model coefficients and the intercept.
This function should have arguments y, vector of standardized responses,
z, a matrix of projected predictors in each marginal model, and
object, which is a 'sparmodel' object. Returns a list with
two elements: gammas which is the vector of regression coefficients
for the projected predictors and intercept which is the intercept of the model
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, model = spar_glm())example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, model = spar_glm())
'sparmodel'
Creates an object class 'sparmodel' using arguments passed by user.
spar_glmnet(..., control = list())spar_glmnet(..., control = list())
... |
includes arguments which can be passed as attributes to the
|
control |
list of controls to be passed to the model function |
Relies on glmnet.
object of class 'sparmodel' which is a list with elements
name (character)
control (list of controls passed as an argument)
model_fun for generating the screening coefficient.
This function should have arguments y, vector of standardized responses,
z, a matrix of projected predictors in each marginal model, and
object, which is a 'sparmodel' object. Returns a list with
two elements: gammas which is the vector of regression coefficients
for the projected predictors and intercept which is the intercept
of the model.
update_fun optional function for updating the 'sparmodel'
object before the start of the algorithm.
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, model = spar_glmnet(alpha = 0.1))example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, model = spar_glmnet(alpha = 0.1))
Apply Sparse Projected Averaged Regression to High-Dimensional Data, where the number of models and the threshold parameter is chosen using a cross-validation procedure.
spar.cv( x, y, family = gaussian("identity"), model = spar_glmnet(), rp = NULL, screencoef = NULL, nfolds = 10, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, seed = NULL, ... ) spareg.cv( x, y, family = gaussian("identity"), model = spar_glmnet(), rp = NULL, screencoef = NULL, nfolds = 10, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, seed = NULL, ... )spar.cv( x, y, family = gaussian("identity"), model = spar_glmnet(), rp = NULL, screencoef = NULL, nfolds = 10, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, seed = NULL, ... ) spareg.cv( x, y, family = gaussian("identity"), model = spar_glmnet(), rp = NULL, screencoef = NULL, nfolds = 10, nnu = 20, nus = NULL, nummods = c(20), measure = c("deviance", "mse", "mae", "class", "1-auc"), avg_type = c("link", "response"), parallel = FALSE, seed = NULL, ... )
x |
n x p numeric matrix of predictor variables. |
y |
quantitative response vector of length n. |
family |
a |
model |
function creating a |
rp |
function creating a |
screencoef |
function creating a |
nfolds |
number of folds to use for cross-validation; should be at least 2, defaults to 10. |
nnu |
number of different threshold values |
nus |
optional vector of |
nummods |
vector of numbers of marginal models to consider for
validation; defaults to |
measure |
loss to use for validation; defaults to |
avg_type |
type of averaging the marginal models; either on link (default) or on response level. This is used in computing the validation measure. |
parallel |
assuming a parallel backend is loaded and available, a logical indicating whether the function should use it in parallelizing the estimation of the marginal models. Defaults to FALSE. |
seed |
integer seed to be set at the beginning of the SPAR algorithm. Default to NULL, in which case no seed is set. |
... |
further arguments mainly to ensure back-compatibility |
object of class 'spar.cv' with elements
betas p x max(nummods) sparse matrix of class
'Matrix::dgCMatrix' containing the
standardized coefficients from each marginal model computed with the spar
algorithm on the whole training data.
intercepts used in each marginal model, vector of length max(nummods)
computed with the spar algorithm on the whole training data.
scr_coef p-vector of coefficients used for screening for standardized predictors
inds list of index-vectors corresponding to variables kept after
screening in each marginal model of length max(nummods)
RPMs list of projection matrices used in each marginal model of length max(nummods)
val_res a data.frame with CV results for each fold and for each element of nus and nummods
nus vector of 's considered for thresholding
nummods vector of numbers of marginal models considered for validation
family a character corresponding to family object used for the marginal generalized linear model e.g.,
"gaussian(identity)"
measure character, type of validation measure used
avg_type character, averaging type for computing the validation measure
rp an object of class 'randomprojection'
screencoef an object of class 'screeningcoef'
model an object of class 'sparmodel'
ycenter empirical mean of initial response vector
yscale empirical standard deviation of initial response vector
.
xcenter p-vector of empirical means of initial predictor variables
xscale p-vector of empirical standard deviations of initial predictor variables
spar, coef.spar.cv, predict.spar.cv, plot.spar.cv, print.spar.cv
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, rp = rp_gaussian(), nummods = c(5, 10)) spar_res coefs <- coef(spar_res) pred <- predict(spar_res, example_data$x) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest, opt_par = "1se") plot(spar_res, "coefs", prange = c(1, 400)) spar_res <- spareg.cv(example_data$x, example_data$y, nummods=c(5, 10, 15, 20, 25, 30))example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100) spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L, rp = rp_gaussian(), nummods = c(5, 10)) spar_res coefs <- coef(spar_res) pred <- predict(spar_res, example_data$x) plot(spar_res) plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "val_numactive", plot_along = "nummod", nu = 0) plot(spar_res, plot_type = "val_numactive", plot_along = "nu", nummod = 10) plot(spar_res, plot_type = "res_vs_fitted", xfit = example_data$xtest, yfit = example_data$ytest, opt_par = "1se") plot(spar_res, "coefs", prange = c(1, 400)) spar_res <- spareg.cv(example_data$x, example_data$y, nummods=c(5, 10, 15, 20, 25, 30))
'coefspar' ObjectProvides a summary of a coefspar object.
## S3 method for class 'coefspar' summary(object, digits = 4L, ...)## S3 method for class 'coefspar' summary(object, digits = 4L, ...)
object |
An object of class |
digits |
integer digits to be printed, defaults to 4L. |
... |
Additional arguments (ignored). |
Invisibly returns object.
example_data <- simulate_spareg_data(n = 100, p = 2000, ntest = 100) spar_res <- spareg(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) summary(coef(spar_res)) summary(coef(spar_res, aggregate = "none"))example_data <- simulate_spareg_data(n = 100, p = 2000, ntest = 100) spar_res <- spareg(example_data$x, example_data$y, xval = example_data$xtest, yval = example_data$ytest, nummods=c(5, 10)) summary(coef(spar_res)) summary(coef(spar_res, aggregate = "none"))