Extract event history data and design matrix including specials from call
EventHistory.frame(
formula,
data,
unspecialsDesign = TRUE,
specials,
specialsFactor = TRUE,
specialsDesign = FALSE,
stripSpecials = NULL,
stripArguments = NULL,
stripAlias = NULL,
stripUnspecials = NULL,
dropIntercept = TRUE,
check.formula = TRUE,
response = TRUE
)
Formula whose left hand side specifies the event history, i.e., either via Surv() or Hist().
Data frame in which the formula is interpreted
Passed as is to
model.design
.
Character vector of special function names.
Usually the body of the special functions is function(x)x but
e.g., strata
from the survival package does treat
the values
Passed as is to model.design
.
Passed as is to model.design
Passed as specials
to
strip.terms
Passed as arguments
to
strip.terms
Passed as alias.names
to
strip.terms
Passed as unspecials
to
strip.terms
Passed as is to model.design
If TRUE check if formula is a Surv or Hist thing.
If FALSE do not evaluate the left hand side of the formula and to not return the response (event.history).
A list which contains
- the event.history (see Hist
)
- the design matrix (see model.design
)
- one entry for each special (see model.design
)
Obtain a list with the data used for event history regression analysis. This function cannot be used directly on the user level but inside a function to prepare data for survival analysis.
model.frame model.design Hist
## Here are some data with an event time and no competing risks
## and two covariates X1 and X2.
## Suppose we want to declare that variable X1 is treated differently
## than variable X2. For example, X1 could be a cluster variable, or
## X1 should have a proportional effect on the outcome.
dsurv <- data.frame(time=1:7,
status=c(0,1,1,0,0,0,1),
X2=c(2.24,3.22,9.59,4.4,3.54,6.81,5.05),
X3=c(1,1,1,1,0,0,1),
X4=c(44.69,37.41,68.54,38.85,35.9,27.02,41.84),
X1=factor(c("a","b","a","c","c","a","b"),
levels=c("c","a","b")))
## We pass a formula and the data
e <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4,
data=dsurv,
specials=c("prop","cluster"),
stripSpecials=c("prop","cluster"))
names(e)
#> [1] "event.history" "design" "prop" "cluster"
## The first element is the event.history which is result of the left hand
## side of the formula:
e$event.history
#>
#> Right-censored response of a survival model
#>
#> No.Observations: 7
#>
#> Pattern:
#> Freq
#> event 3
#> right.censored 4
## same as
with(dsurv,Hist(time,status))
#>
#> Right-censored response of a survival model
#>
#> No.Observations: 7
#>
#> Pattern:
#> Freq
#> event 3
#> right.censored 4
## to see the structure do
colnames(e$event.history)
#> [1] "time" "status"
unclass(e$event.history)
#> time status
#> 1 1 0
#> 2 2 1
#> 3 3 1
#> 4 4 0
#> 5 5 0
#> 6 6 0
#> 7 7 1
#> attr(,"states")
#> [1] "1"
#> attr(,"cens.type")
#> [1] "rightCensored"
#> attr(,"cens.code")
#> [1] "0"
#> attr(,"model")
#> [1] "survival"
#> attr(,"entry.type")
#> [1] ""
## in case of competing risks there will be an additional column called event,
## see help(Hist) for more details
## The other elements are the design, i.e., model.matrix for the non-special covariates
e$design
#> X2 X4
#> 1 2.24 44.69
#> 2 3.22 37.41
#> 3 9.59 68.54
#> 4 4.40 38.85
#> 5 3.54 35.90
#> 6 6.81 27.02
#> 7 5.05 41.84
#> attr(,"levels")
#> named list()
## and a data.frame for the special covariates
e$prop
#> X1
#> 1 a
#> 2 b
#> 3 a
#> 4 c
#> 5 c
#> 6 a
#> 7 b
## The special covariates can be returned as a model.matrix
e2 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4,
data=dsurv,
specials=c("prop","cluster"),
stripSpecials=c("prop","cluster"),
specialsDesign=TRUE)
e2$prop
#> X1a X1b
#> 1 1 0
#> 2 0 1
#> 3 1 0
#> 4 0 0
#> 5 0 0
#> 6 1 0
#> 7 0 1
#> attr(,"levels")
#> attr(,"levels")$X1
#> [1] "c" "a" "b"
#>
#> attr(,"arguments")
#> attr(,"arguments")$X1
#> NULL
#>
#> attr(,"arguments.terms")
#> list()
#> attr(,"matrix.terms")
#> attr(,"matrix.terms")$X1
#> [1] "X1c" "X1a" "X1b"
#>
## and the non-special covariates can be returned as a data.frame
e3 <- EventHistory.frame(Hist(time,status)~prop(X1)+X2+cluster(X3)+X4,
data=dsurv,
specials=c("prop","cluster"),
stripSpecials=c("prop","cluster"),
specialsDesign=TRUE,
unspecialsDesign=FALSE)
e3$design
#> X2 X4
#> 1 2.24 44.69
#> 2 3.22 37.41
#> 3 9.59 68.54
#> 4 4.40 38.85
#> 5 3.54 35.90
#> 6 6.81 27.02
#> 7 5.05 41.84
## the general idea is that the function is used to parse the combination of
## formula and data inside another function. Here is an example with
## competing risks
SampleRegression <- function(formula,data=parent.frame()){
thecall <- match.call()
ehf <- EventHistory.frame(formula=formula,
data=data,
stripSpecials=c("prop","cluster","timevar"),
specials=c("prop","timevar","cluster"))
time <- ehf$event.history[,"time"]
status <- ehf$event.history[,"status"]
## event as a factor
if (attr(ehf$event.history,"model")=="competing.risks"){
event <- ehf$event.history[,"event"]
Event <- getEvent(ehf$event.history)
list(response=data.frame(time,status,event,Event),X=ehf[-1])
}
else{ # no competing risks
list(response=data.frame(time,status),X=ehf[-1])
}
}
dsurv$outcome <- c("cause1","0","cause2","cause1","cause2","cause2","0")
SampleRegression(Hist(time,outcome)~prop(X1)+X2+cluster(X3)+X4,dsurv)
#> $response
#> time status event Event
#> 1 1 1 1 cause1
#> 2 2 0 3 unknown
#> 3 3 1 2 cause2
#> 4 4 1 1 cause1
#> 5 5 1 2 cause2
#> 6 6 1 2 cause2
#> 7 7 0 3 unknown
#>
#> $X
#> $X$design
#> X2 X4
#> 1 2.24 44.69
#> 2 3.22 37.41
#> 3 9.59 68.54
#> 4 4.40 38.85
#> 5 3.54 35.90
#> 6 6.81 27.02
#> 7 5.05 41.84
#> attr(,"levels")
#> named list()
#>
#> $X$prop
#> X1
#> 1 a
#> 2 b
#> 3 a
#> 4 c
#> 5 c
#> 6 a
#> 7 b
#>
#> $X$cluster
#> X3
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 0
#> 6 0
#> 7 1
#>
#>
## let's test if the parsing works
form1 <- Hist(time,outcome!="0")~prop(X1)+X2+cluster(X3)+X4
form2 <- Hist(time,outcome)~prop(X1)+cluster(X3)+X4
ff <- list(form1,form2)
lapply(ff,function(f){SampleRegression(f,dsurv)})
#> [[1]]
#> [[1]]$response
#> time status
#> 1 1 1
#> 2 2 0
#> 3 3 1
#> 4 4 1
#> 5 5 1
#> 6 6 1
#> 7 7 0
#>
#> [[1]]$X
#> [[1]]$X$design
#> X2 X4
#> 1 2.24 44.69
#> 2 3.22 37.41
#> 3 9.59 68.54
#> 4 4.40 38.85
#> 5 3.54 35.90
#> 6 6.81 27.02
#> 7 5.05 41.84
#> attr(,"levels")
#> named list()
#>
#> [[1]]$X$prop
#> X1
#> 1 a
#> 2 b
#> 3 a
#> 4 c
#> 5 c
#> 6 a
#> 7 b
#>
#> [[1]]$X$cluster
#> X3
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 0
#> 6 0
#> 7 1
#>
#>
#>
#> [[2]]
#> [[2]]$response
#> time status event Event
#> 1 1 1 1 cause1
#> 2 2 0 3 unknown
#> 3 3 1 2 cause2
#> 4 4 1 1 cause1
#> 5 5 1 2 cause2
#> 6 6 1 2 cause2
#> 7 7 0 3 unknown
#>
#> [[2]]$X
#> [[2]]$X$design
#> X4
#> 1 44.69
#> 2 37.41
#> 3 68.54
#> 4 38.85
#> 5 35.90
#> 6 27.02
#> 7 41.84
#> attr(,"levels")
#> named list()
#>
#> [[2]]$X$prop
#> X1
#> 1 a
#> 2 b
#> 3 a
#> 4 c
#> 5 c
#> 6 a
#> 7 b
#>
#> [[2]]$X$cluster
#> X3
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 0
#> 6 0
#> 7 1
#>
#>
#>
## here is what the riskRegression package uses to
## distinguish between covariates with
## time-proportional effects and covariates with
## time-varying effects:
if (FALSE) { # \dontrun{
library(riskRegression)
data(Melanoma)
f <- Hist(time,status)~prop(thick)+strata(sex)+age+prop(ulcer,power=1)+timevar(invasion,test=1)
## here the unspecial terms, i.e., the term age is treated as prop
## also, strata is an alias for timvar
EHF <- prodlim::EventHistory.frame(formula,
Melanoma[1:10],
specials=c("timevar","strata","prop","const","tp"),
stripSpecials=c("timevar","prop"),
stripArguments=list("prop"=list("power"=0),
"timevar"=list("test"=0)),
stripAlias=list("timevar"=c("strata"),
"prop"=c("tp","const")),
stripUnspecials="prop",
specialsDesign=TRUE,
dropIntercept=TRUE)
EHF$prop
EHF$timevar
} # }