## ----echo = FALSE, message = FALSE, warning = FALSE---------------------------
library(BiocStyle)

## ----load-libs, message = FALSE,  warning = FALSE-----------------------------
library(dplyr)
library(muscat)
library(SingleCellExperiment)

## ----echo = FALSE, fig.cap = "Bulk-based weighing of single-cell hypotheses. A-C: Example p-values of differential expression upon stress, from both a single-cell dataset with modest sample size (A and C) and a much larger bulk dataset (B) (data from Waag et al., biorxiv 2025). C: Splitting the single-cell p-values by significance of the respective gene in the bulk data leads to higher local excess of p-values. D: Example precision-recall curves in a different dataset with ground truth (based on downsampled data) using standard (local) FDR and the default bbhw method (see our paper for more detail)."----
knitr::include_graphics(system.file('extdata', 'bbhw.png', package = 'muscat'))

## -----------------------------------------------------------------------------
data("example_sce")
# since the dataset is a bit too small for the procedure, we'll double it:
sce <- rbind(example_sce, example_sce)
row.names(sce) <- make.unique(row.names(sce))
# pseudo-bulk aggregation
pb <- aggregateData(sce)
res <- pbDS(pb, verbose = FALSE)
# the signal is too strong in this data, so we'll reduce it
# (don't do this with real data!):
res$table$stim <- lapply(res$table$stim, \(x) {
    x$p_val <- sqrt(x$p_val)
    x$p_adj.loc <- p.adjust(x$p_val, method="fdr")
    x
})
# we assemble the cell types in a single table for the comparison of interest:
res2 <- bind_rows(res$table$stim, .id="cluster_id")
head(res2 <- res2[order(res2$p_adj.loc),])

## -----------------------------------------------------------------------------
set.seed(123)
bulkDEA <- data.frame(
    row.names=row.names(pb), 
    logFC=rnorm(nrow(pb)),
    PValue=runif(nrow(pb)))
# we'll spike some signal in:
gs_by_k <- split(res2$gene, res2$cluster_id)
sel <- unique(unlist(lapply(gs_by_k, \(x) x[3:150])))
bulkDEA[sel,"PValue"] <- abs(rnorm(length(sel), sd=0.01))
head(bulkDEA)

## -----------------------------------------------------------------------------
# here, we manually set 'nbins'
# because the dataset is too small, 
# you shouldn't normally need to do this:
res2 <- bbhw(pbDEA=res2, bulkDEA=bulkDEA, pb=pb, nbins=4, verbose=FALSE)

## -----------------------------------------------------------------------------
head(res2[order(res2$padj),c("gene","cluster_id","p_adj.loc","padj")])
table(standard=res2$p_adj.loc < 0.05, bbhw=res2$padj < 0.05)

## -----------------------------------------------------------------------------
c(standard=sum(res2$p_adj.loc < 0.05), bbhw=sum(res2$padj < 0.05))

## ----session-info-------------------------------------------------------------
sessionInfo()

