## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----load-packages, message=FALSE---------------------------------------------
library(LimROTS, quietly = TRUE)
library(mia, quietly = TRUE)
library(miaTime, quietly = TRUE)
library(TreeSummarizedExperiment, quietly = TRUE)
library(BiocParallel, quietly = TRUE)
library(SummarizedExperiment, quietly = TRUE)
library(ggplot2, quietly = TRUE)

## ----load-dataset-------------------------------------------------------------
data("crohn_survival")
print(crohn_survival)

## ----clr-transform------------------------------------------------------------
crohn_survival <- transformAssay(
    crohn_survival,
    assay.type = "counts",
    method     = "clr",
    pseudocount = TRUE,
    name       = "clr"
)
assayNames(crohn_survival)

## ----convert-to-se------------------------------------------------------------
taxa_names <- rownames(crohn_survival)
crohn_survival <- as(crohn_survival, "SummarizedExperiment")
# as() coercion can drop rownames; restore them
if (is.null(rownames(crohn_survival))) {
    rownames(crohn_survival) <- taxa_names
}
print(crohn_survival)

## ----fix-sample-names---------------------------------------------------------
clean_names <- gsub("\\.", "_", colnames(crohn_survival))
colnames(crohn_survival) <- clean_names
rownames(colData(crohn_survival)) <- clean_names
# Rename event_time -> time as required by LimROTS_survival()
colnames(colData(crohn_survival))[
    colnames(colData(crohn_survival)) == "event_time"] <- "time"

## ----inspect-coldata----------------------------------------------------------
table(colData(crohn_survival)$diagnosis,
      colData(crohn_survival)$event)

## ----set-seed-cox-------------------------------------------------------------
set.seed(1234, kind = "default", sample.kind = "default")

## ----run-cox------------------------------------------------------------------
crohn_cox <- LimROTS_survival(
    x           = crohn_survival,
    assay.type  = "clr",
    niter       = 30,    # use >= 1000 for real analyses
    K           = 10,     # number of top taxa
    meta.info   = c("time", "event"),
    formula.str = "~ Surv(time, event)",
    competing_risks   = FALSE,
    verbose           = FALSE
)

## ----cox-results--------------------------------------------------------------
cox_res <- data.frame(
    rowData(crohn_cox),
    row.names = rownames(crohn_cox)
)
head(cox_res[order(cox_res$pvalue), ])

## ----volcano-cox--------------------------------------------------------------
cox_res$significant <- cox_res$FDR < 0.05

ggplot(cox_res, aes(
    x     = log(exp_coef),
    y     = -log10(pvalue),
    color = significant
)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_hline(
        yintercept = -log10(0.05),
        linetype   = "dashed",
        color      = "steelblue"
    ) +
    scale_color_manual(
        values = c("FALSE" = "grey60", "TRUE" = "firebrick"),
        labels = c("FALSE" = "FDR \u2265 0.05", "TRUE" = "FDR < 0.05")
    ) +
    labs(
        title = "LimROTS Survival \u2014 Cox Model",
        x     = "Log Hazard Ratio",
        y     = expression(-log[10](pvalue)),
        color = NULL
    ) +
    theme_bw(base_size = 12) +
    theme(legend.position = "top")

## ----prepare-competing--------------------------------------------------------
crohn_cr <- crohn_survival

event_vec <- colData(crohn_cr)$event
censored_idx <- which(event_vec == 0)

set.seed(42)
competing_idx <- sample(censored_idx, floor(length(censored_idx) / 2))
event_vec[competing_idx] <- 2L

colData(crohn_cr)$event <- event_vec
table(colData(crohn_cr)$event)

## ----set-seed-cr--------------------------------------------------------------
set.seed(1234, kind = "default", sample.kind = "default")

## ----run-competing------------------------------------------------------------
crohn_cr <- LimROTS_survival(
    x           = crohn_cr,
    assay.type  = "clr",
    niter       = 30,
    K           = 10,
    meta.info   = c("time", "event"),
    formula.str = "~ Surv(time, event)",
    competing_risks   = TRUE,
    verbose           = FALSE
)

## ----cr-results---------------------------------------------------------------
cr_res <- data.frame(
    rowData(crohn_cr),
    row.names = rownames(crohn_cr)
)
head(cr_res[order(cr_res$pvalue), ])

## ----volcano-cr---------------------------------------------------------------
cr_res$significant <- cr_res$FDR < 0.05

ggplot(cr_res, aes(
    x     = log(exp_coef),
    y     = -log10(pvalue),
    color = significant
)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_hline(
        yintercept = -log10(0.05),
        linetype   = "dashed",
        color      = "steelblue"
    ) +
    scale_color_manual(
        values = c("FALSE" = "grey60", "TRUE" = "firebrick"),
        labels = c("FALSE" = "FDR \u2265 0.05", "TRUE" = "FDR < 0.05")
    ) +
    labs(
        title = "LimROTS Survival \u2014 Competing Risks Model",
        x     = "Log Subdistribution Hazard Ratio",
        y     = expression(-log[10](pvalue)),
        color = NULL
    ) +
    theme_bw(base_size = 12) +
    theme(legend.position = "top")

## ----qc-cox, results="hide", message=FALSE, warning=FALSE---------------------
plot(metadata(crohn_cr)[["q_values"]])
hist(metadata(crohn_cr)[["q_values"]])
print(summary(metadata(crohn_cr)[["q_values"]]))

## ----session-info-------------------------------------------------------------
sessionInfo()

