Fig1

Author

Sarah Urbut

Published

March 7, 2024

Hazard Ratios for the UK Biobank

** Here we plot hazard ratios by age of enrollment

Code
# Set working directory
setwd("~/dynamicHRpaper/")

# Load data
df <- readRDS("output/amit_df_with_pc_correction.rds")
dat <- df %>%
  group_by(round(phenos.enrollment, 0)) %>%
  summarise(count = n()) %>%
  slice(3:32)

hazards <- readRDS("output/hazards_rate_UKB_amit_EQ_withPCcorrection.rds")
upperbound <- readRDS("output/hazards_rate_upperbound_amit_EQ_withPCcorrection.rds")
lowerbound <- readRDS("output/hazards_rate_lowerbound_amit_EQ_withPCcorrection.rds")

# Convert hazards to data frame
hazards_df <- as.data.frame(hazards)

# Prepare matrices
hrmat <- hazards_df %>%
  select(ends_with("HR")) %>%
  mutate(age =  dat$`round(phenos.enrollment, 0)`)

ormat <- hazards_df %>%
  select(ends_with("OR")) %>%
  mutate(age =  dat$`round(phenos.enrollment, 0)`)

r2mat <- hazards_df %>%
  select(ends_with("r2")) %>%
  mutate(age =  dat$`round(phenos.enrollment, 0)`)

colnames(hrmat) <- colnames(ormat) <- colnames(r2mat) <- c(
  "Polygenic risk score", "Pooled cohort equations", "LDL Cholesterol", 
  "Smoking", "Age", "Systolic blood pressure", "Diabetes mellitus", 
  "Body Mass Index", "Male Sex", "HDL cholesterol", "Total cholesterol", "age"
)

# Melt upper and lower bounds
upperbound <- melt((as.data.frame(upperbound) %>% mutate(age = dat$`round(phenos.enrollment, 0)`)),id.vars="age")
lowerbound <- melt((as.data.frame(lowerbound) %>% mutate(age = dat$`round(phenos.enrollment, 0)`)),id.vars="age")

Convert to Long Form and Format Data

Code
haz <- melt(hrmat, id.vars = "age")
ors <- melt(ormat, id.vars = "age")
r2s <- melt(r2mat, id.vars = "age")

haz <- haz %>%
  mutate(
    low.se = lowerbound$value,
    upper.se = upperbound$value,
    across(c(value, low.se, upper.se), round, 3)
  )

r2s <- r2s %>%
  mutate(value = round(value, 3))

# Set factor levels for plotting
factor_levels <- c(
  "Diabetes mellitus", "Male Sex", "Smoking", "Polygenic risk score",
  "LDL Cholesterol", "Systolic blood pressure", "Body Mass Index",
  "Pooled cohort equations", "Age", "HDL cholesterol", "Total cholesterol"
)
haz$variable <- factor(haz$variable, levels = factor_levels)
ors$variable <- factor(ors$variable, levels = factor_levels)
r2s$variable <- factor(r2s$variable, levels = factor_levels)

# Save reshaped data
# write.csv(reshape(haz, direction = "wide", idvar = "age", timevar = "variable"), "output/hrindex_ukb_eq.csv", row.names = FALSE)
# write.csv(reshape(r2s, direction = "wide", idvar = "age", timevar = "variable"), "output/r2index_ukb_eq.csv", row.names = FALSE)

Hazard Ratio and proportion of variation explained:

Code
# Define color palette
col_pce <- rev(brewer.pal(10, "Paired"))
col_p <- c(
  col_pce[1:3], col_pce[5], col_pce[4], col_pce[c(6, 10)], "black",
  col_pce[9], "turquoise", col_pce[7]
)

# Plot HR
hr_ukb_all <- haz %>%
  filter(variable %in% c("Diabetes mellitus", "Smoking", "Polygenic risk score", "HDL cholesterol", "Systolic blood pressure", "Total cholesterol")) %>%
  ggplot(aes(x = age, y = value, colour = variable, fill = variable)) +
  stat_smooth(method = "loess", span = 1) +
  scale_color_manual(values = col_p, drop = TRUE, breaks = factor_levels) +
  scale_fill_manual(values = col_p, drop = TRUE, breaks = factor_levels, guide = "none") +
  labs(y = "Hazard Ratio of Coronary Artery Disease", x = "Age of Risk Assessment", colour = "Risk Factor") +
  theme_classic(base_size = 20) +
  ylim(0.35, 5)

# Plot R2
r2_ukb_all <- r2s %>%
  filter(variable %in% c("Diabetes mellitus", "Smoking", "Polygenic risk score", "HDL cholesterol", "Systolic blood pressure", "Total cholesterol")) %>%
  ggplot(aes(x = age, y = round(value, 2), colour = variable, fill = variable)) +
  stat_smooth(method = "loess", span = 1) +
  scale_color_manual(values = col_p, drop = TRUE, breaks = factor_levels) +
  scale_fill_manual(values = col_p, drop = TRUE, breaks = factor_levels, guide = "none") +
  labs(y = "Proportion of Variation Explained", x = "Age of Risk Assessment", colour = "Risk Factor") +
  theme_classic(base_size = 20) +
  ylim(0, 0.18)

# Save plots
# saveRDS(hr_ukb_all, "output/hall_withPC.rds")
# saveRDS(r2_ukb_all, "output/r2_withPC.rds")

Interactive Plots

Save Figures:

Code
fig_path <- "./Figs/Fig1/"
ggsave(hr_ukb_all, filename = paste0(fig_path, "hr_ukb_leg.tiff"), dpi = 300, width = 10, height = 6)
ggsave(hr_ukb_all + theme(legend.position = "none"), filename = paste0(fig_path, "hr_ukb_all_noleg.tiff"), dpi = 300, width = 6, height = 6)
ggsave(r2_ukb_all, filename = paste0(fig_path, "r2_ukb_all.tiff"), dpi = 300, width = 6, height = 6)

Pooled Cohort Equations:

Code
# Plot HR for PCE
hr_ukb_pce <- haz %>%
  filter(variable == "Pooled cohort equations") %>%
  ggplot(aes(x = age, y = value, colour = variable)) +
  stat_smooth(method = "loess", span = 1) +
  scale_color_manual(values = col_p, drop = TRUE, breaks = factor_levels) +
  labs(y = "Hazard Ratio of Coronary Artery Disease", x = "Age of Risk Assessment", colour = "Risk Factor") +
  theme_classic(base_size = 15) +
  ylim(0.8, 2) +
  theme(legend.position = "none")

# Plot R2 for PCE
r2_ukb_pce <- r2s %>%
  filter(variable == "Pooled cohort equations") %>%
  ggplot(aes(x = age, y = value, colour = variable)) +
  stat_smooth(method = "loess", span = 1) +
  scale_color_manual(values = col_p, drop = TRUE, breaks = factor_levels) +
  labs(y = "Proportion of Variation Explained", x = "Age of Risk Assessment", colour = "Risk Factor") +
  theme_classic(base_size = 15) +
  ylim(0, 0.15) +
  theme(legend.position = "none")

# Save PCE plots
# ggsave(hr_ukb_pce, filename = paste0(fig_path, "hr_pce.tiff"), dpi = 300, width = 6, height = 6)
# ggsave(r2_ukb_pce, filename = paste0(fig_path, "r2_pce.tiff"), dpi = 300, width = 6, height = 6)
# saveRDS(hr_ukb_pce, "output/hallpce.rds")
# saveRDS(r2_ukb_pce, "output/r2pce.rds")

# Combined plot
g <- ggarrange(hr_ukb_pce, r2_ukb_pce, ncol = 2, labels = c("A.", "B."))
#ggsave(g, filename = "Figs/PCE_nom.tiff", dpi = 300, width = 10, height = 5)

Interactive PCE Plots

Code
# Interactive HR PCE plot
ggplotly(hr_ukb_pce)
Code
# Interactive R2 PCE plot
ggplotly(r2_ukb_pce)

UKB Hazard Rate Lookup Table

Code
m <- cbind(haz[, 1:2], round(haz[, 3:5], 2))
colnames(m) <- colnames(haz)
DT::datatable(m)

UKB Proportion Variation Explained Lookup Table

Code
m <- cbind(r2s[, 1:2], round(r2s[, 3], 2))
colnames(m) <- colnames(r2s)
DT::datatable(m)