TileDBArray 1.18.0
TileDB implements a framework for local and remote storage of dense and sparse arrays.
We can use this as a DelayedArray
backend to provide an array-level abstraction,
thus allowing the data to be used in many places where an ordinary array or matrix might be used.
The TileDBArray package implements the necessary wrappers around TileDB-R
to support read/write operations on TileDB arrays within the DelayedArray framework.
TileDBArray
Creating a TileDBArray
is as easy as:
X <- matrix(rnorm(1000), ncol=10)
library(TileDBArray)
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.5740700 -0.3779134 2.5171491 . -0.002977881 0.717038372
## [2,] -1.2648912 0.1744073 -0.5408002 . -1.173651838 -0.891582594
## [3,] -2.6605357 0.7359588 -0.6406294 . -0.175968887 -0.759702585
## [4,] 0.3392694 0.8817105 1.1790853 . -1.041138940 -2.208197727
## [5,] 0.3411987 -0.6917039 2.2170432 . -1.155547847 2.643346118
## ... . . . . . .
## [96,] 0.19551469 0.92577558 -1.11762156 . 1.5105821 -0.1726622
## [97,] 0.48422404 3.04351542 2.31339360 . -0.3949009 0.5872583
## [98,] 0.27343774 1.27566428 0.54915896 . 0.2630336 -0.9735094
## [99,] 1.04993355 -1.18571447 0.66596965 . 0.6942563 -0.2260761
## [100,] 0.63850923 0.03966891 2.78373580 . 0.4561188 2.0441852
Alternatively, we can use coercion methods:
as(X, "TileDBArray")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.5740700 -0.3779134 2.5171491 . -0.002977881 0.717038372
## [2,] -1.2648912 0.1744073 -0.5408002 . -1.173651838 -0.891582594
## [3,] -2.6605357 0.7359588 -0.6406294 . -0.175968887 -0.759702585
## [4,] 0.3392694 0.8817105 1.1790853 . -1.041138940 -2.208197727
## [5,] 0.3411987 -0.6917039 2.2170432 . -1.155547847 2.643346118
## ... . . . . . .
## [96,] 0.19551469 0.92577558 -1.11762156 . 1.5105821 -0.1726622
## [97,] 0.48422404 3.04351542 2.31339360 . -0.3949009 0.5872583
## [98,] 0.27343774 1.27566428 0.54915896 . 0.2630336 -0.9735094
## [99,] 1.04993355 -1.18571447 0.66596965 . 0.6942563 -0.2260761
## [100,] 0.63850923 0.03966891 2.78373580 . 0.4561188 2.0441852
This process works also for sparse matrices:
Y <- Matrix::rsparsematrix(1000, 1000, density=0.01)
writeTileDBArray(Y)
## <1000 x 1000> sparse TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] 0 0 0 . 0 0
## [2,] 0 0 0 . 0 0
## [3,] 0 0 0 . 0 0
## [4,] 0 0 0 . 0 0
## [5,] 0 0 0 . 0 0
## ... . . . . . .
## [996,] 0 0 0 . 0 0
## [997,] 0 0 0 . 0 0
## [998,] 0 0 0 . 0 0
## [999,] 0 0 0 . 0 0
## [1000,] 0 0 0 . 0 0
Logical and integer matrices are supported:
writeTileDBArray(Y > 0)
## <1000 x 1000> sparse TileDBMatrix object of type "logical":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] FALSE FALSE FALSE . FALSE FALSE
## [2,] FALSE FALSE FALSE . FALSE FALSE
## [3,] FALSE FALSE FALSE . FALSE FALSE
## [4,] FALSE FALSE FALSE . FALSE FALSE
## [5,] FALSE FALSE FALSE . FALSE FALSE
## ... . . . . . .
## [996,] FALSE FALSE FALSE . FALSE FALSE
## [997,] FALSE FALSE FALSE . FALSE FALSE
## [998,] FALSE FALSE FALSE . FALSE FALSE
## [999,] FALSE FALSE FALSE . FALSE FALSE
## [1000,] FALSE FALSE FALSE . FALSE FALSE
As are matrices with dimension names:
rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X)))
colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X)))
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 0.5740700 -0.3779134 2.5171491 . -0.002977881 0.717038372
## GENE_2 -1.2648912 0.1744073 -0.5408002 . -1.173651838 -0.891582594
## GENE_3 -2.6605357 0.7359588 -0.6406294 . -0.175968887 -0.759702585
## GENE_4 0.3392694 0.8817105 1.1790853 . -1.041138940 -2.208197727
## GENE_5 0.3411987 -0.6917039 2.2170432 . -1.155547847 2.643346118
## ... . . . . . .
## GENE_96 0.19551469 0.92577558 -1.11762156 . 1.5105821 -0.1726622
## GENE_97 0.48422404 3.04351542 2.31339360 . -0.3949009 0.5872583
## GENE_98 0.27343774 1.27566428 0.54915896 . 0.2630336 -0.9735094
## GENE_99 1.04993355 -1.18571447 0.66596965 . 0.6942563 -0.2260761
## GENE_100 0.63850923 0.03966891 2.78373580 . 0.4561188 2.0441852
TileDBArray
sTileDBArray
s are simply DelayedArray
objects and can be manipulated as such.
The usual conventions for extracting data from matrix-like objects work as expected:
out <- as(X, "TileDBArray")
dim(out)
## [1] 100 10
head(rownames(out))
## [1] "GENE_1" "GENE_2" "GENE_3" "GENE_4" "GENE_5" "GENE_6"
head(out[,1])
## GENE_1 GENE_2 GENE_3 GENE_4 GENE_5 GENE_6
## 0.5740700 -1.2648912 -2.6605357 0.3392694 0.3411987 -0.1648271
We can also perform manipulations like subsetting and arithmetic.
Note that these operations do not affect the data in the TileDB backend;
rather, they are delayed until the values are explicitly required,
hence the creation of the DelayedMatrix
object.
out[1:5,1:5]
## <5 x 5> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5
## GENE_1 0.57407004 -0.37791343 2.51714907 0.61526698 0.05849709
## GENE_2 -1.26489120 0.17440732 -0.54080023 1.01692975 0.81145584
## GENE_3 -2.66053572 0.73595876 -0.64062942 0.05350216 1.83477618
## GENE_4 0.33926937 0.88171048 1.17908530 0.62931897 0.63437864
## GENE_5 0.34119873 -0.69170386 2.21704316 1.23148273 0.86128207
out * 2
## <100 x 10> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 1.1481401 -0.7558269 5.0342981 . -0.005955762 1.434076743
## GENE_2 -2.5297824 0.3488146 -1.0816005 . -2.347303675 -1.783165188
## GENE_3 -5.3210714 1.4719175 -1.2812588 . -0.351937775 -1.519405170
## GENE_4 0.6785387 1.7634210 2.3581706 . -2.082277879 -4.416395453
## GENE_5 0.6823975 -1.3834077 4.4340863 . -2.311095694 5.286692236
## ... . . . . . .
## GENE_96 0.39102938 1.85155116 -2.23524312 . 3.0211642 -0.3453244
## GENE_97 0.96844808 6.08703084 4.62678720 . -0.7898019 1.1745165
## GENE_98 0.54687548 2.55132855 1.09831791 . 0.5260672 -1.9470188
## GENE_99 2.09986710 -2.37142893 1.33193931 . 1.3885126 -0.4521522
## GENE_100 1.27701847 0.07933783 5.56747159 . 0.9122375 4.0883704
We can also do more complex matrix operations that are supported by DelayedArray:
colSums(out)
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5 SAMP_6 SAMP_7
## 7.6401636 -5.2499671 7.2541976 3.6375345 3.0375003 -7.2549749 -8.1860736
## SAMP_8 SAMP_9 SAMP_10
## 15.8984909 6.8521490 -0.7531755
out %*% runif(ncol(out))
## [,1]
## GENE_1 2.12021422
## GENE_2 -4.24385645
## GENE_3 0.23011016
## GENE_4 1.09979602
## GENE_5 2.43291025
## GENE_6 -2.26076267
## GENE_7 -3.28096863
## GENE_8 -0.41043323
## GENE_9 -1.19240849
## GENE_10 -2.23939526
## GENE_11 -0.54116440
## GENE_12 -2.46173251
## GENE_13 3.27382761
## GENE_14 -0.73103229
## GENE_15 1.47329896
## GENE_16 -2.23922449
## GENE_17 -0.27251600
## GENE_18 1.46326529
## GENE_19 0.24493040
## GENE_20 -2.14719997
## GENE_21 1.65252620
## GENE_22 0.62044068
## GENE_23 -0.39340738
## GENE_24 -2.69889427
## GENE_25 0.57631199
## GENE_26 0.38101424
## GENE_27 3.10322817
## GENE_28 0.67727719
## GENE_29 -2.77699849
## GENE_30 -1.02160124
## GENE_31 -0.48657508
## GENE_32 4.49204661
## GENE_33 3.04431719
## GENE_34 2.40785423
## GENE_35 1.10461280
## GENE_36 1.33175679
## GENE_37 -0.66242250
## GENE_38 4.62062347
## GENE_39 1.17136161
## GENE_40 0.77585434
## GENE_41 -3.08283792
## GENE_42 0.47144803
## GENE_43 2.14328816
## GENE_44 0.33015880
## GENE_45 -0.69850787
## GENE_46 -1.77340095
## GENE_47 -0.70796068
## GENE_48 -0.94303749
## GENE_49 -0.80958868
## GENE_50 -0.30164511
## GENE_51 -2.96113981
## GENE_52 -3.57675201
## GENE_53 -0.53322585
## GENE_54 0.71368513
## GENE_55 -2.32655472
## GENE_56 -2.63629527
## GENE_57 -0.03448166
## GENE_58 -0.07286711
## GENE_59 3.77070894
## GENE_60 0.46375691
## GENE_61 2.06159147
## GENE_62 -0.95449470
## GENE_63 -2.20335841
## GENE_64 0.75913700
## GENE_65 -2.73573236
## GENE_66 1.96234421
## GENE_67 0.56920967
## GENE_68 0.57451782
## GENE_69 4.71603386
## GENE_70 -0.94742236
## GENE_71 0.82562117
## GENE_72 3.29840877
## GENE_73 0.01087376
## GENE_74 1.59665611
## GENE_75 0.72744401
## GENE_76 1.70657760
## GENE_77 -0.78747285
## GENE_78 -1.78678864
## GENE_79 -1.27104152
## GENE_80 -0.93681516
## GENE_81 -2.35322420
## GENE_82 -0.67476244
## GENE_83 1.43161811
## GENE_84 -0.51631954
## GENE_85 -5.87711823
## GENE_86 1.58300423
## GENE_87 1.11481411
## GENE_88 -1.41595692
## GENE_89 -1.92005393
## GENE_90 0.84215134
## GENE_91 1.50268483
## GENE_92 0.27307973
## GENE_93 0.12798081
## GENE_94 3.24468570
## GENE_95 1.92374818
## GENE_96 1.19594368
## GENE_97 2.35037384
## GENE_98 1.52257540
## GENE_99 2.34460923
## GENE_100 4.17952435
We can adjust some parameters for creating the backend with appropriate arguments to writeTileDBArray()
.
For example, the example below allows us to control the path to the backend
as well as the name of the attribute containing the data.
X <- matrix(rnorm(1000), ncol=10)
path <- tempfile()
writeTileDBArray(X, path=path, attr="WHEE")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.8144336 0.1095231 -0.3230079 . -0.6548845 0.8671223
## [2,] 0.9068184 1.4670451 -0.4473258 . -0.6489838 -2.3043772
## [3,] -1.0308771 1.5593989 2.1689987 . 0.4566765 -0.3524657
## [4,] 1.0948986 -0.3622410 0.4206669 . 1.2915759 -0.6969460
## [5,] -1.3395880 0.2998638 1.7158243 . 0.9057102 -0.8169803
## ... . . . . . .
## [96,] 0.52574389 1.77585177 1.16962779 . -0.82543716 0.75317596
## [97,] 2.32208752 -0.03647555 0.13658541 . 1.29425662 0.13841471
## [98,] 2.08721845 -1.82792033 -0.66518492 . 0.53434760 0.38378129
## [99,] 0.01016025 0.04566073 0.62669798 . -0.09818803 -0.18677437
## [100,] -0.01300947 0.89667623 0.78121684 . 0.28520512 -0.59656417
As these arguments cannot be passed during coercion, we instead provide global variables that can be set or unset to affect the outcome.
path2 <- tempfile()
setTileDBPath(path2)
as(X, "TileDBArray") # uses path2 to store the backend.
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.8144336 0.1095231 -0.3230079 . -0.6548845 0.8671223
## [2,] 0.9068184 1.4670451 -0.4473258 . -0.6489838 -2.3043772
## [3,] -1.0308771 1.5593989 2.1689987 . 0.4566765 -0.3524657
## [4,] 1.0948986 -0.3622410 0.4206669 . 1.2915759 -0.6969460
## [5,] -1.3395880 0.2998638 1.7158243 . 0.9057102 -0.8169803
## ... . . . . . .
## [96,] 0.52574389 1.77585177 1.16962779 . -0.82543716 0.75317596
## [97,] 2.32208752 -0.03647555 0.13658541 . 1.29425662 0.13841471
## [98,] 2.08721845 -1.82792033 -0.66518492 . 0.53434760 0.38378129
## [99,] 0.01016025 0.04566073 0.62669798 . -0.09818803 -0.18677437
## [100,] -0.01300947 0.89667623 0.78121684 . 0.28520512 -0.59656417
sessionInfo()
## R version 4.5.0 RC (2025-04-04 r88126)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.7.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RcppSpdlog_0.0.21 TileDBArray_1.18.0 DelayedArray_0.34.0
## [4] SparseArray_1.8.0 S4Arrays_1.8.0 IRanges_2.42.0
## [7] abind_1.4-8 S4Vectors_0.46.0 MatrixGenerics_1.20.0
## [10] matrixStats_1.5.0 BiocGenerics_0.54.0 generics_0.1.3
## [13] Matrix_1.7-3 BiocStyle_2.36.0
##
## loaded via a namespace (and not attached):
## [1] bit_4.6.0 jsonlite_2.0.0 compiler_4.5.0
## [4] BiocManager_1.30.25 crayon_1.5.3 Rcpp_1.0.14
## [7] nanoarrow_0.6.0 jquerylib_0.1.4 yaml_2.3.10
## [10] fastmap_1.2.0 lattice_0.22-7 R6_2.6.1
## [13] RcppCCTZ_0.2.13 XVector_0.48.0 tiledb_0.30.2
## [16] knitr_1.50 bookdown_0.43 bslib_0.9.0
## [19] rlang_1.1.6 cachem_1.1.0 xfun_0.52
## [22] sass_0.4.10 bit64_4.6.0-1 cli_3.6.4
## [25] spdl_0.0.5 digest_0.6.37 grid_4.5.0
## [28] lifecycle_1.0.4 data.table_1.17.0 evaluate_1.0.3
## [31] nanotime_0.3.12 zoo_1.8-14 rmarkdown_2.29
## [34] tools_4.5.0 htmltools_0.5.8.1