## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(tidy = FALSE,
                      cache = FALSE,
                      dev = "png",
                      message = FALSE, error = FALSE, warning = TRUE)

## ----introduction_workflow----------------------------------------------------
library(GenomicRanges)
library(epigraHMM)
library(SummarizedExperiment)

# Creating input for epigraHMM
bamFiles <- system.file(package="genomationData","extdata",
                        c("wgEncodeBroadHistoneH1hescCtcfStdAlnRep1.chr21.bam"))

colData <- data.frame(condition = "CTCF",replicate = 1)

# Creating epigraHMM object
object <- epigraHMMDataSetFromBam(bamFiles = bamFiles,
                                  colData = colData,
                                  genome = 'hg19',
                                  windowSize = 250,
                                  gapTrack = TRUE)

# Make control object
control <- controlEM(fileName = 'control-workflow')

# Creating normalizing offsets
object <- normalizeCounts(object = object,
                          control = control)

# Initializing epigraHMM
object <- initializer(object = object,
                      control = control)

# Running epigraHMM for consensus peak detection 
object <- epigraHMM(object = object,
                    control = control,
                    type = 'consensus')

# Calling peaks with FDR control of 0.05
peaks <- callPeaks(object = object,
                   control = control,
                   method = 0.05)

## ----data_input_countmatrixinput----------------------------------------------
countData <- list('counts' = matrix(rpois(4e5,10),ncol = 4),
                  'controls' = matrix(rpois(4e5,5),ncol = 4))

colData <- data.frame(condition = c('A','A','B','B'),
                      replicate = c(1,2,1,2))

rowRanges <- GRanges('chrA',IRanges(start = seq(1,by = 500, length.out = 1e5),
                                    width = 500))

object_matrix <- epigraHMMDataSetFromMatrix(countData = countData,
                                            colData = colData,
                                            rowRanges = rowRanges)

object_matrix

## ----data_input_baminput------------------------------------------------------
# Creating input for epigraHMM
bamFiles <- system.file(package="genomationData","extdata",
                        "wgEncodeBroadHistoneH1hescCtcfStdAlnRep1.chr21.bam")

colData <- data.frame(condition = "CTCF",
                      replicate = 1)

# Creating epigraHMM object
object_bam <- epigraHMMDataSetFromBam(bamFiles = bamFiles,
                                      colData = colData,
                                      genome = 'hg19',
                                      windowSize = 250,
                                      gapTrack = TRUE)

object_bam

## ----data_norm_offset---------------------------------------------------------
# Normalizing counts
object_normExample <- object_bam
object_normExample <- normalizeCounts(object_normExample,control = control)

normCts <- as.matrix(assay(object_normExample)/
                       exp(assay(object_normExample,'offsets')))

# Summary of raw counts
summary(assay(object_normExample))
colSums(assay(object_normExample))/1e5

# Summary of normalized counts
summary(normCts)
colSums(normCts)/1e5

## ----gcnorm,fig.show='hide',results='hide'------------------------------------
library(gcapc)
library(BSgenome.Hsapiens.UCSC.hg19)

# Toy example of utilization of gcapc for GC-content normalization with model offsets
# See ?gcapc::refineSites for best practices

# Below, subset object_bam simply to illustrate the use of `gcapc::refineSites`
# with epigraHMM
object_gcExample <- object_bam[2e4:5e4,]

gcnorm <- gcapc::refineSites(counts = assay(object_gcExample),
                             sites = rowRanges(object_gcExample),
                             gcrange = c(0,1),
                             genome = 'hg19')

# gcapc::refineSites outputs counts/2^gce
gcnorm_offsets <- log2((assay(object_gcExample) + 1) / (gcnorm + 1))

# Adding offsets to epigraHMM object
object_gcExample <- addOffsets(object = object_gcExample,
                               offsets = gcnorm_offsets)

## ----data_input_controls,eval=FALSE-------------------------------------------
# # Creating input for epigraHMM
# bamFiles <- list('counts' = system.file(package="genomationData",
#                                         "extdata",
#                                         "wgEncodeBroadHistoneH1hescCtcfStdAlnRep1.chr21.bam"),
#                  'controls' = "/path/to/your/inputcontrol.bam")
# 
# colData <- data.frame(condition = 'CTCF',replicate = 1)
# 
# # Creating epigraHMM object
# object_bam <- epigraHMMDataSetFromBam(bamFiles = bamFiles,
#                                       colData = colData,
#                                       genome = 'hg19',
#                                       windowSize = 250,
#                                       gapTrack = TRUE)
# 
# object_bam

## ----data_peak_consensus,message = FALSE--------------------------------------
# Creating input for epigraHMM
bamFiles <- system.file(package="genomationData","extdata",
                        "wgEncodeBroadHistoneH1hescCtcfStdAlnRep1.chr21.bam")

colData <- data.frame(condition = "CTCF",
                      replicate = 1)

# Creating epigraHMM object
object_consensus <- epigraHMMDataSetFromBam(bamFiles = bamFiles,
                                            colData = colData,
                                            genome = 'hg19',
                                            windowSize = 500,
                                            gapTrack = TRUE,
                                            blackList = TRUE)

# Create control object for EM algorithm
control <- controlEM(fileName = 'control-consensus')

# Normalizing counts
object_consensus <- normalizeCounts(object_consensus,
                                    control = control)

# Initializing epigraHMM
object_consensus <- initializer(object_consensus,control)

# Differential peak calling
object_consensus <- epigraHMM(object = object_consensus,
                              control = control,
                              type = 'consensus')

## ----data_peak_differential,message = FALSE-----------------------------------
# Creating input for epigraHMM
bamFiles <- system.file(package="genomationData","extdata",
                        c("wgEncodeBroadHistoneH1hescCtcfStdAlnRep1.chr21.bam",
                          "wgEncodeBroadHistoneH1hescSuz12051317AlnRep1.chr21.bam"))

colData <- data.frame(condition = c("CTCF","SUZ12"),
                      replicate = c(1,1))

# Creating epigraHMM object
object_differential <- epigraHMMDataSetFromBam(bamFiles = bamFiles,
                                               colData = colData,
                                               genome = 'hg19',
                                               windowSize = 500,
                                               gapTrack = TRUE,
                                               blackList = TRUE)

# Create control object for EM algorithm
control <- controlEM(fileName = 'control-differential',criterion = 'MACPE')

# Normalizing counts
object_differential <- normalizeCounts(object_differential,control)

# Initializing epigraHMM
object_differential <- initializer(object_differential,control)

# Differential peak calling
object_differential <- epigraHMM(object = object_differential,
                                 control = control,
                                 type = 'differential')

## ----peaks_consensus----------------------------------------------------------
peaks_consensus <- callPeaks(object = object_consensus)
peaks_consensus

## ----peaks_differential-------------------------------------------------------
peaks_differential <- callPeaks(object = object_differential)
peaks_differential

## ----viz_anno-----------------------------------------------------------------
library(GenomicRanges)
library(rtracklayer)
library(Seqinfo)

session <- browserSession()
genome(session) <- 'hg19'
refSeq <- getTable(ucscTableQuery(session, table = "refGene"))

annotation <- makeGRangesFromDataFrame(refSeq,
                                       starts.in.df.are.0based = TRUE,
                                       seqnames.field = 'chrom',
                                       start.field = 'txStart',
                                       end.field = 'txEnd',
                                       strand.field = 'strand',
                                       keep.extra.columns = TRUE)

## ----viz_consensus------------------------------------------------------------
gr_consensus <- GRanges('chr21',IRanges(41108537,41185351))

plotCounts(object = object_consensus,
           ranges = gr_consensus,
           peaks = peaks_consensus,
           annotation = annotation)

## ----viz_differential---------------------------------------------------------
gr_differential <- GRanges('chr21',IRanges(41198328,41352762))

plotCounts(object = object_differential,
           ranges = gr_differential,
           peaks = peaks_differential,
           annotation = annotation)

## ----viz_heatmap1-------------------------------------------------------------
# Selecting target differential peak
pattern_peak <- peaks_differential[overlapsAny(peaks_differential,gr_differential)]

pattern_peak[4]

plotPatterns(object = object_differential,
             ranges = pattern_peak[4],
             peaks = peaks_differential)

## ----misc_patterns------------------------------------------------------------
# Getting object information
info(object_differential)

## ----misc_probs---------------------------------------------------------------
# Getting posterior probabilties
callPatterns(object = object_differential,peaks = peaks_differential,type = 'all')

## ----misc_max-----------------------------------------------------------------
# Getting most likely differential enrichment
callPatterns(object = object_differential,peaks = peaks_differential,type = 'max')

## ----misc_pruning,message = TRUE----------------------------------------------
data('helas3')

control <- controlEM(pruningThreshold = 0.05,quietPruning = FALSE)

object <- normalizeCounts(object = helas3,control)

object <- initializer(object = object,control)

object <- epigraHMM(object = object,control = control,type = 'differential')

## ----sessioninfo--------------------------------------------------------------
sessionInfo()

