Generalized with Marginal

Vignette Author

2024-02-21

Model fitting

Here we allow for the user to input an array of people x ages x states with the exact position of an individual in every age, and a matrix of potential transitions, to train the model. The marginal computation over all states is then entry in the matrix product of the j,k position at age N, but you can also use the conditional computations.

library(MSGene)
library(data.table)
dfh <- readRDS(system.file("data", "msgene_sampledf.rds", package = "MSGene"))
train=dfh[1:(nrow(dfh)*0.80),]

set transition matrix

nstates=c("Health","Ht","HyperLip","Dm","Cad","death","Ht&HyperLip","HyperLip&Dm","Ht&Dm","Ht&HyperLip&Dm")

transitions=matrix(0,nrow = length(nstates),ncol=length(nstates))
rownames(transitions)=nstates
colnames(transitions)=nstates
transitions["Health","Health"]=1
transitions["Health","Ht"]=1
transitions["Health","HyperLip"]=1
transitions["Health","Dm"]=1
transitions["Health","Cad"]=1
transitions["Health","death"]=1

transitions["Ht","Ht"]=1
transitions["Ht","Ht&HyperLip"]=1
transitions["Ht","Ht&Dm"]=1
transitions["Ht","Cad"]=1
transitions["Ht","death"]=1


transitions["HyperLip","HyperLip"]=1
transitions["HyperLip","Ht&HyperLip"]=1
transitions["HyperLip","HyperLip&Dm"]=1
transitions["HyperLip","Cad"]=1
transitions["HyperLip","death"]=1

transitions["Dm","Dm"]=1
transitions["Dm","Ht&Dm"]=1
transitions["Dm","HyperLip&Dm"]=1
transitions["Dm","Cad"]=1
transitions["Dm","death"]=1

transitions["Ht&HyperLip","Ht&HyperLip"]=1
transitions["Ht&HyperLip","Ht&HyperLip&Dm"]=1
transitions["Ht&HyperLip","Cad"]=1
transitions["Ht&HyperLip","death"]=1

transitions["Ht&Dm","Ht&Dm"]=1
transitions["Ht&Dm","Ht&HyperLip&Dm"]=1
transitions["Ht&Dm","Cad"]=1
transitions["Ht&Dm","death"]=1


transitions["HyperLip&Dm","HyperLip&Dm"]=1
transitions["HyperLip&Dm","Ht&HyperLip&Dm"]=1
transitions["HyperLip&Dm","Cad"]=1
transitions["HyperLip&Dm","death"]=1


transitions["Ht&HyperLip&Dm","Ht&HyperLip&Dm"]=1
transitions["Ht&HyperLip&Dm","Cad"]=1
transitions["Ht&HyperLip&Dm","death"]=1


## absorbing states
transitions["death","death"]=1
transitions["Cad","Cad"]=1

print(transitions)
#>                Health Ht HyperLip Dm Cad death Ht&HyperLip HyperLip&Dm Ht&Dm
#> Health              1  1        1  1   1     1           0           0     0
#> Ht                  0  1        0  0   1     1           1           0     1
#> HyperLip            0  0        1  0   1     1           1           1     0
#> Dm                  0  0        0  1   1     1           0           1     1
#> Cad                 0  0        0  0   1     0           0           0     0
#> death               0  0        0  0   0     1           0           0     0
#> Ht&HyperLip         0  0        0  0   1     1           1           0     0
#> HyperLip&Dm         0  0        0  0   1     1           0           1     0
#> Ht&Dm               0  0        0  0   1     1           0           0     1
#> Ht&HyperLip&Dm      0  0        0  0   1     1           0           0     0
#>                Ht&HyperLip&Dm
#> Health                      0
#> Ht                          0
#> HyperLip                    0
#> Dm                          0
#> Cad                         0
#> death                       0
#> Ht&HyperLip                 1
#> HyperLip&Dm                 1
#> Ht&Dm                       1
#> Ht&HyperLip&Dm              1

Indicate your at risk state for the years of interest

ages=c(40:81)
s=statusarray(df_frame = data.table(train),ages = ages,nstates = nstates)
dim(s)
#> [1]     42 385541     11
head(s[,18,])
#>    Health Ht HyperLip Dm Cad death Ht&HyperLip HyperLip&Dm Ht&Dm Ht&HyperLip&Dm
#> 40      0  1        0  0   0     0           0           0     0              0
#> 41      0  1        0  0   0     0           0           0     0              0
#> 42      0  1        0  0   0     0           0           0     0              0
#> 43      0  1        0  0   0     0           0           0     0              0
#> 44      0  1        0  0   0     0           0           0     0              0
#> 45      0  1        0  0   0     0           0           0     0              0
#>    out
#> 40   0
#> 41   0
#> 42   0
#> 43   0
#> 44   0
#> 45   0

now fit the model which will expand over the potential transitions in the transition matrix according to your covariates list

ages=40:80
covariates="cad.prs+f.31.0.0+smoke+antihtn_now+statin_now"
train$cad.prs=scale(train$cad.prs)
mfit=modelfitfun(ages = ages,transit_mat=transitions,covariates,df = train,statusarray = s)

now we will need to extract coefficients for each age and relevant state to state transition

ages=40:80
coeff_array=coef_array_func(ages = ages,transit_mat = transitions,modelfit = mfit)

now we have an array that is ages x start

dim(coeff_array)
#> [1] 41 10 10  6

then we will need to compute a transition matrix for each age and persno

here is for one person with a particular covariate profile at age 50

age=50
covariates=c(1,1,1,0,0,0)
absorbing_states=c(5,6)


age_specific_matrix <- generate_single_transition_matrix(age = 50, coeff_array, covariates, absorbing_states = c(5, 6))
print(age_specific_matrix)
#>                   Health         Ht    HyperLip          Dm         Cad
#> Health         0.9761349 0.01477726 0.004133046 0.001979573 0.002905684
#> Ht             0.0000000 0.96850829 0.000000000 0.000000000 0.007428364
#> HyperLip       0.0000000 0.00000000 0.942322223 0.000000000 0.015311795
#> Dm             0.0000000 0.00000000 0.000000000 0.923574414 0.008249102
#> Cad            0.0000000 0.00000000 0.000000000 0.000000000 1.000000000
#> death          0.0000000 0.00000000 0.000000000 0.000000000 0.000000000
#> Ht&HyperLip    0.0000000 0.00000000 0.000000000 0.000000000 0.018090491
#> HyperLip&Dm    0.0000000 0.00000000 0.000000000 0.000000000 0.012585436
#> Ht&Dm          0.0000000 0.00000000 0.000000000 0.000000000 0.014208380
#> Ht&HyperLip&Dm 0.0000000 0.00000000 0.000000000 0.000000000 0.019405100
#>                       death Ht&HyperLip HyperLip&Dm       Ht&Dm Ht&HyperLip&Dm
#> Health         6.951385e-05  0.00000000 0.000000000 0.000000000     0.00000000
#> Ht             1.543083e-04  0.01724618 0.000000000 0.006662851     0.00000000
#> HyperLip       3.412091e-04  0.03578112 0.006243657 0.000000000     0.00000000
#> Dm             7.086418e-04  0.00000000 0.020209520 0.047258321     0.00000000
#> Cad            0.000000e+00  0.00000000 0.000000000 0.000000000     0.00000000
#> death          1.000000e+00  0.00000000 0.000000000 0.000000000     0.00000000
#> Ht&HyperLip    1.677610e-04  0.96927482 0.000000000 0.000000000     0.01246693
#> HyperLip&Dm    1.870668e-03  0.00000000 0.910675942 0.000000000     0.07486795
#> Ht&Dm          9.577323e-04  0.00000000 0.000000000 0.938849061     0.04598483
#> Ht&HyperLip&Dm 1.475940e-03  0.00000000 0.000000000 0.000000000     0.97911896

Note that the probs for each row sum to 1 and the for the absorbing states the j,j entry is 1.

rowSums(age_specific_matrix)
#>         Health             Ht       HyperLip             Dm            Cad 
#>              1              1              1              1              1 
#>          death    Ht&HyperLip    HyperLip&Dm          Ht&Dm Ht&HyperLip&Dm 
#>              1              1              1              1              1
age_specific_matrix[absorbing_states[1],absorbing_states[1]]
#> [1] 1
age_specific_matrix[absorbing_states[2],absorbing_states[2]]
#> [1] 1

Now let’s compute over an entire interval:

age_start <- 40
age_end <- 80
indcovariates=c(1,1,1,0,0,0)
# Execute the function.


result_matrix <- calculate_matrix_product_over_interval(coeff_array, indcovariates, age_start, age_end,absorbing_states = c(5,6))

# result_list=list()
# for(i in 1:nrow(atrisk)){
#   result_list[[i]]=calculate_matrix_product_over_interval(coeff_array, atrisk[i,], age_start, age_end,absorbing_states = c(5,6))
# }


print(result_matrix)
#>                   Health        Ht   HyperLip         Dm       Cad      death
#> Health         0.3030552 0.1479926 0.05169571 0.01134324 0.2909590 0.05707457
#> Ht             0.0000000 0.2045137 0.00000000 0.00000000 0.4487799 0.06689984
#> HyperLip       0.0000000 0.0000000 0.09653173 0.00000000 0.5339276 0.08321941
#> Dm             0.0000000 0.0000000 0.00000000 0.03331253 0.4573097 0.21233519
#> Cad            0.0000000 0.0000000 0.00000000 0.00000000 1.0000000 0.00000000
#> death          0.0000000 0.0000000 0.00000000 0.00000000 0.0000000 1.00000000
#> Ht&HyperLip    0.0000000 0.0000000 0.00000000 0.00000000 0.5955979 0.05908701
#> HyperLip&Dm    0.0000000 0.0000000 0.00000000 0.00000000 0.2905922 0.65239948
#> Ht&Dm          0.0000000 0.0000000 0.00000000 0.00000000 0.5991276 0.09646410
#> Ht&HyperLip&Dm 0.0000000 0.0000000 0.00000000 0.00000000 0.6603932 0.08931565
#>                Ht&HyperLip HyperLip&Dm      Ht&Dm Ht&HyperLip&Dm
#> Health          0.08895675 0.004134397 0.02177520     0.02305678
#> Ht              0.18609210 0.000000000 0.03551808     0.05819637
#> HyperLip        0.21949965 0.007965871 0.00000000     0.06098176
#> Dm              0.00000000 0.022133150 0.10569724     0.17503840
#> Cad             0.00000000 0.000000000 0.00000000     0.00000000
#> death           0.00000000 0.000000000 0.00000000     0.00000000
#> Ht&HyperLip     0.24970782 0.000000000 0.00000000     0.09560729
#> HyperLip&Dm     0.00000000 0.000000000 0.00000000     0.10487802
#> Ht&Dm           0.00000000 0.000000000 0.07708799     0.22732032
#> Ht&HyperLip&Dm  0.00000000 0.000000000 0.00000000     0.25029116

Great! But can also compute the lifetime risk as a product of one step and compare differences with our earlier ‘conditional’ method:

cmat=coeff_array[,"Health","Cad",]

intercept=1
sex=c(0,1)
cad.prs=1
smoke=0
antihtn_now=0
statin_now=0
atrisk=expand.grid(intercept,cad.prs,sex,smoke,antihtn_now,statin_now) 

mso = compute_prediction_product_matrix(
coefmat = cmat,
atrisk = atrisk,
agepredinterval = c(40:80)
)

result1=mso$PredictedIntervalrisk[2]


## compare

covariates="cad.prs+f.31.0.0+smoke+antihtn_now+statin_now"
modelfit = fitfunc2(
  df_frame=data.table::data.table(train),
  ages = ages,
  nstates = nstates,
  mode = "binomial",
  covariates = covariates)

Look at all that we’ve produced!

attributes(modelfit)
#> $names
#> [1] "events"       "rates"        "AR"           "model_list"   "yearsinstate"

You can see in this array we have the number of events, the absolute rate per year per transition, the number at risk per year and per transition, and the list of models over years and states.

Extract coefficients for health–>CAD transition

To compute our risk calculations, we need to extract the unsmoothed coefficients, we will feed through a smoother, and then use these coefficients to compute our product. We want to check to make sure these match!

a = coefplotsmooth2(
    ages = ages,
    start = nstates[1],
    stop = "Cad",
    modelfit = modelfit,
    window_width = 20,
    span = 0.75,
    degree = 2
  )
healthy_coefs = a$custom_smooth



mso = compute_prediction_product_matrix(
coefmat = healthy_coefs,
atrisk = atrisk,
agepredinterval = c(40:80)
)

result2=mso$PredictedIntervalrisk[2]
all.equal(result1,result2,tolerance = 1e-5)
#> [1] TRUE

‘result_matrix’ now contains the product of transition matrices over the specified age interval.

Confirm that result matches the product of individual matrices:

covariates=c(1,1,1,0,0,0)
ml=mat.test(coefficients = coeff_array,covariates = covariates,age_start = 40,age_end = 79,absorbing_states = c(5,6))

cumulative_product <- Reduce(function(x, y) x %*% y, ml)

all.equal(cumulative_product,result_matrix)
#> [1] TRUE