## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, eval = FALSE)

## -----------------------------------------------------------------------------
# library(Biostrings)
# 
# uppercase_by_guide <- function(amplicon, guide, buffer = 10,
#                                 max_mismatch = 3) {
#   amp_seq <- tolower(amplicon)
#   guide_lc <- tolower(guide)
# 
#   # Try forward strand first
#   match_fwd <- matchPattern(guide_lc, DNAString(amp_seq),
#                             max.mismatch = max_mismatch)
#   if (length(match_fwd) > 0) {
#     direction <- 0
#     s <- start(match_fwd)[1]
#     e <- end(match_fwd)[1]
#   } else {
#     # Try reverse complement of guide
#     guide_rc <- tolower(as.character(reverseComplement(DNAString(guide_lc))))
#     match_rev <- matchPattern(guide_rc, DNAString(amp_seq),
#                               max.mismatch = max_mismatch)
#     if (length(match_rev) > 0) {
#       direction <- 1
#       s <- start(match_rev)[1]
#       e <- end(match_rev)[1]
#     } else {
#       warning("Guide not found in amplicon. Returning all lowercase.")
#       return(list(amplicon = amp_seq, direction = NA))
#     }
#   }
# 
#   # Uppercase guide match plus buffer, clamped to amplicon bounds
#   up_start <- max(1, s - buffer)
#   up_end <- min(nchar(amp_seq), e + buffer)
#   substr(amp_seq, up_start, up_end) <-
#     toupper(substr(amp_seq, up_start, up_end))
# 
#   list(amplicon = amp_seq, direction = direction)
# }

## -----------------------------------------------------------------------------
# result <- uppercase_by_guide(
#   amplicon = "aagctgacggctaaatgaaaaatgtcaaacatctgttccaggtgctgcgtatgccagggcagaggAGGTGGTCAGGGAACTGGtggaggtcactgggataccctttcttcccacaccaatggggaaaggagtcctgccagatgaccatcccaactgtgttgctgcagccagatccaggtgtgtttgcgcttgtgtaatt",
#   guide = "AGGTGGTCAGGGAACTGG",
#   buffer = 10
# )
# # result$amplicon has the guide region in UPPER CASE
# # result$direction is 0 or 1

## -----------------------------------------------------------------------------
# library(Biostrings)
# 
# uppercase_whole_interior <- function(amplicon, forward_primer, reverse_primer) {
#   amp_seq <- toupper(amplicon)
#   amp_lc  <- tolower(amplicon)
# 
#   fwd_len <- nchar(forward_primer)
#   rev_rc  <- tolower(as.character(reverseComplement(DNAString(reverse_primer))))
# 
#   # Lowercase forward primer region at the start
#   substr(amp_seq, 1, fwd_len) <- tolower(substr(amp_seq, 1, fwd_len))
# 
#   # Find and lowercase reverse primer RC near the end
#   rev_pos <- regexpr(rev_rc, amp_lc, fixed = TRUE)
#   if (rev_pos > 0) {
#     rev_end <- rev_pos + nchar(rev_rc) - 1
#     substr(amp_seq, rev_pos, rev_end) <-
#       tolower(substr(amp_seq, rev_pos, rev_end))
#   } else {
#     warning("Reverse primer RC not found in amplicon.")
#   }
# 
#   amp_seq
# }

## -----------------------------------------------------------------------------
# library(Biostrings)
# 
# detect_direction <- function(amplicon, guide, max_mismatch = 3) {
#   amp_lc  <- tolower(amplicon)
#   guide_lc <- tolower(guide)
# 
#   match_fwd <- matchPattern(guide_lc, DNAString(amp_lc),
#                             max.mismatch = max_mismatch)
#   if (length(match_fwd) > 0) return(0)
# 
#   guide_rc <- tolower(as.character(reverseComplement(DNAString(guide_lc))))
#   match_rev <- matchPattern(guide_rc, DNAString(amp_lc),
#                             max.mismatch = max_mismatch)
#   if (length(match_rev) > 0) return(1)
# 
#   NA  # guide not found on either strand
# }

## -----------------------------------------------------------------------------
# library(Biostrings)
# 
# validate_config <- function(config) {
# 
#   # 1. Unique IDs
#   dup_ids <- duplicated(config$ID)
#   if (any(dup_ids)) {
#     warning("Duplicate IDs: ", toString(unique(config$ID[dup_ids])))
#   }
# 
#   # 2. No NA in required columns
#   required <- c("ID", "Barcode", "Forward_Primer", "Reverse_Primer",
#                 "guideRNA", "Direction", "Amplicon")
#   for (col in required) {
#     if (any(is.na(config[[col]]) | config[[col]] == "")) {
#       warning("Empty values in column: ", col)
#     }
#   }
# 
#   # 3. Forward primer found in amplicon
#   for (i in seq_len(nrow(config))) {
#     amp <- toupper(config$Amplicon[i])
#     fp  <- toupper(config$Forward_Primer[i])
#     if (nzchar(fp) && !grepl(fp, amp, fixed = TRUE)) {
#       warning("Forward primer not in amplicon for ID: ", config$ID[i])
#     }
#   }
# 
#   # 4. Reverse primer RC found in amplicon
#   for (i in seq_len(nrow(config))) {
#     amp <- toupper(config$Amplicon[i])
#     rp  <- config$Reverse_Primer[i]
#     if (nzchar(rp)) {
#       rp_rc <- toupper(as.character(reverseComplement(DNAString(rp))))
#       if (!grepl(rp_rc, amp, fixed = TRUE)) {
#         warning("Reverse primer RC not in amplicon for ID: ", config$ID[i])
#       }
#     }
#   }
# 
#   # 5. Guide found in amplicon (allowing mismatches)
#   for (i in seq_len(nrow(config))) {
#     guide <- config$guideRNA[i]
#     amp   <- tolower(config$Amplicon[i])
#     if (nzchar(guide)) {
#       fwd <- matchPattern(tolower(guide), DNAString(amp), max.mismatch = 3)
#       if (length(fwd) == 0) {
#         guide_rc <- tolower(as.character(reverseComplement(DNAString(guide))))
#         rev <- matchPattern(guide_rc, DNAString(amp), max.mismatch = 3)
#         if (length(rev) == 0) {
#           warning("Guide not found in amplicon for ID: ", config$ID[i])
#         }
#       }
#     }
#   }
# 
#   # 6. Amplicon has at least some UPPER CASE
#   for (i in seq_len(nrow(config))) {
#     if (!grepl("[A-Z]", config$Amplicon[i])) {
#       warning("No UPPER CASE in amplicon for ID: ", config$ID[i],
#               " -- whole amplicon will be treated as cut site")
#     }
#   }
# 
#   # 7. Unique barcode-primer combinations
#   combo <- paste(config$Barcode, config$Forward_Primer, config$Reverse_Primer)
#   dup_combo <- duplicated(combo)
#   if (any(dup_combo)) {
#     warning("Non-unique barcode/primer combos for rows: ",
#             toString(which(dup_combo)))
#   }
# 
#   message("Validation complete. Check warnings above for any issues.")
# }

## -----------------------------------------------------------------------------
# library(Biostrings)
# library(pwalign)
# 
# extend_donor <- function(donor, amplicon,
#                          scoring_matrix = nucleotideSubstitutionMatrix(
#                            match = 5, mismatch = -4, baseOnly = FALSE,
#                            type = "DNA"),
#                          gap_opening = 25, gap_extension = 0) {
#   if (nchar(donor) == 0) return("")
# 
#   amp_upper <- toupper(amplicon)
#   donor_seq <- toupper(donor)
# 
#   # Align donor to amplicon
#   aln <- pairwiseAlignment(
#     DNAStringSet(donor_seq), DNAStringSet(amp_upper),
#     substitutionMatrix = scoring_matrix, type = "overlap",
#     gapOpening = gap_opening, gapExtension = gap_extension)
# 
#   # Embed donor into a copy of the full amplicon
#   extended <- amp_upper
#   stringr::str_sub(extended, start(subject(aln)), end(subject(aln))) <- donor_seq
# 
#   # Validate: re-align amplicon to extended donor, score should not drop
#   aln_check <- pairwiseAlignment(
#     DNAStringSet(amp_upper), DNAStringSet(extended),
#     substitutionMatrix = scoring_matrix, type = "overlap",
#     gapOpening = gap_opening, gapExtension = gap_extension)
# 
#   if (score(aln_check) < score(aln)) {
#     warning("Donor validation score dropped: ",
#             round(score(aln_check)), " < ", round(score(aln)))
#   }
# 
#   extended
# }

