This document serves as an overview for measuring the performance of
RcppAlgos against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: How to generate
permutations or combinations of object in R? has some benchmarks.
You will note that the examples in that post are relatively small. The
benchmarks below will focus on larger examples where performance really
matters and for this reason we only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a
2022 Macbook Air Apple M2 24 GB machine.
library(RcppAlgos)
library(partitions)
library(arrangements)
#> 
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#> 
#>     compositions
library(microbenchmark)
options(digits = 4)
options(width = 90)
pertinent_output <- capture.output(sessionInfo())
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.4.2 (2024-10-31)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS Sonoma 14.5
pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.9.2'
#> 
#> $arrangements
#> [1] '1.1.9'
#> 
#> $partitions
#> [1] '1.10.7'
#> 
#> $microbenchmark
#> [1] '1.4.10'
numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
#> [1] 4set.seed(13)
v1 <- sort(sample(100, 30))
m <- 21
t1 <- comboGeneral(v1, m, Parallel = T)
t2 <- combinations(v1, m)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150       21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v1, m),
               cbArrangements = combinations(v1, m),
               times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 2.666 2.427 2.432  2.423 2.402 2.401    15
#>  cbArrangements 2.517 2.290 2.292  2.282 2.263 2.252    15v2 <- v1[1:10]
m <- 20
t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t2 <- combinations(v2, m, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
               cbArrangements = combinations(v2, m, replace = TRUE),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 3.014 2.937 2.910  2.921 2.850 2.844    15
#>  cbArrangements 2.771 2.782 2.766  2.762 2.689 2.944    15myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t2 <- combinations(freq = myFreqs, k = 20, x = v3)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
               cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.105 3.069 3.058  3.040 2.996 3.170    10
#>  cbArrangements 5.664 5.726 5.662  5.672 5.598 5.604    10v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
t1 <- permuteGeneral(v4, 6, nThreads = numThreads)
t2 <- permutations(v4, 6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720       6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v4, 6),
               cbArrangements = permutations(v4, 6),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 1.167 1.174 1.090  1.160 1.132 1.056    15
#>  cbArrangements 2.042 2.052 1.953  2.025 2.167 1.656    15
## Indexing permutation example with the partitions package
t1 <- permuteGeneral(11, nThreads = 4)
t2 <- permutations(11)
t3 <- perms(11)
dim(t1)
#> [1] 39916800       11
stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
               cbRcppAlgosSer = permuteGeneral(11),
               cbArrangements = permutations(11),
               cbPartitions   = perms(11),
               times = 5, unit = "relative")
#> Unit: relative
#>            expr    min     lq  mean median     uq   max neval
#>  cbRcppAlgosPar  1.000  1.000 1.000  1.000  1.000 1.000     5
#>  cbRcppAlgosSer  2.540  2.477 2.434  2.841  2.825 1.867     5
#>  cbArrangements  3.915  4.136 3.692  4.158  4.246 2.684     5
#>    cbPartitions 10.436 10.282 9.375 10.392 10.990 6.637     5v5 <- v3[1:5]
t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t2 <- permutations(v5, 10, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
               cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.002 2.803 2.334  2.778 2.780 0.937    10
#>  cbArrangements 3.289 3.055 2.739  3.077 3.077 1.668    10v6 <- sort(runif(12))
t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760        7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
               cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.531 3.460 3.217  3.437 2.758 2.815    10
#>  cbArrangements 3.901 3.824 3.591  3.816 3.128 3.084    10t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 140)
t2 <- partitions(140, distinct = TRUE)
t3 <- diffparts(140)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
                    all(rowSums(t1) == 140), all(rowSums(t2) == 140),
                    all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150      16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
               cbArrangements = partitions(140, distinct = TRUE),
               cbPartitions   = diffparts(140),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median     uq    max neval
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.000  1.000    10
#>  cbRcppAlgosSer  3.172  3.178  2.887  3.098  2.696  2.539    10
#>  cbArrangements  2.510  2.528  2.316  2.488  2.205  2.173    10
#>    cbPartitions 16.918 17.160 15.042 17.001 13.310 12.298    10t1 <- comboGeneral(160, 10,
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 160)
t2 <- partitions(160, 10, distinct = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(160, 10),
               cbArrangements = partitions(160, 10, distinct = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.428 3.401 3.013  3.220 3.200 2.316    10
#>  cbArrangements 4.657 4.587 3.951  4.307 4.274 2.936    10t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 65)
t2 <- partitions(65)
t3 <- parts(65)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 65), all(rowSums(t2) == 65),
          all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558      65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
               cbArrangements = partitions(65),
               cbPartitions   = parts(65),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 2.849 2.767 2.311  2.461 2.313 1.751    20
#>  cbArrangements 2.122 2.047 1.712  1.834 1.794 1.130    20
#>    cbPartitions 8.855 8.639 6.886  7.736 6.792 4.217    20t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 100)
t2 <- partitions(100, 15)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212      15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
               cbArrangements = partitions(100, 15),
               cbPartitions   = restrictedparts(100, 15,
                                                include.zero = FALSE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median    uq    max neval
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.00  1.000    10
#>  cbRcppAlgosSer  3.402  3.281  3.055  3.150  2.85  2.827    10
#>  cbArrangements  4.194  4.045  3.884  4.087  3.51  3.985    10
#>    cbPartitions 14.250 14.076 12.877 13.534 11.57 11.824    10Currenlty, RcppAlgos is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos and use this as a reference for future
improvements.
t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 120)
dim(t1)
#> [1] 7340225      10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
               times = 10)
#> Unit: milliseconds
#>         expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgos 246.8 250.6 255.3  252.6 256.4 269.3    10t1 <- compositionsGeneral(0:15, repetition = TRUE)
t2 <- arrangements::compositions(15)
t3 <- partitions::compositions(15)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 15), all(rowSums(t2) == 15),
          all(colSums(t3) == 15))
dim(t1)
#> [1] 16384    15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
               cbArrangements = arrangements::compositions(15),
               cbPartitions   = partitions::compositions(15),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr     min      lq   mean  median      uq     max neval
#>  cbRcppAlgosSer   1.000   1.000   1.00   1.000   1.000   1.000    20
#>  cbArrangements   1.182   1.205   1.18   1.194   1.179   1.054    20
#>    cbPartitions 129.267 145.992 186.50 192.884 219.196 230.413    20For the next two examples, we will exclude the
partitions package for efficiency reasons.
t1 <- compositionsGeneral(0:23, repetition = TRUE)
t2 <- arrangements::compositions(23)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
          all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304      23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
               cbArrangements = arrangements::compositions(23),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 3.410 3.329 3.331  3.327 3.324 3.397    20
#>  cbArrangements 3.797 3.699 3.691  3.698 3.689 3.611    20t1 <- compositionsGeneral(30, 10, repetition = TRUE)
t2 <- arrangements::compositions(30, 10)
stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005       10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
               cbArrangements = arrangements::compositions(30, 10),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 2.988 3.077 2.952  3.020 3.082 1.924    20
#>  cbArrangements 3.199 3.170 3.036  3.113 3.074 2.213    20We will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos. The results are
similar for the rest of the cases not shown.
pkg_arrangements <- function(n, total) {
    a <- icombinations(n, as.integer(n / 2))
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- comboIter(n, as.integer(n / 2))
    for (i in 1:total) a@nextIter()
}
total <- comboCount(18, 9)
total
#> [1] 48620
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(18, total),
               cbArrangements = pkg_arrangements(18, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median   uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.0  1.00    15
#>  cbArrangements 19.31 19.12 18.91  18.81 18.6 18.81    15pkg_arrangements <- function(n, total) {
    a <- ipermutations(n)
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- permuteIter(n)
    for (i in 1:total) a@nextIter()
}
total <- permuteCount(8)
total
#> [1] 40320
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(8, total),
               cbArrangements = pkg_arrangements(8, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 19.61 19.41 18.93  19.14 18.66 17.41    15pkg_partitions <- function(n, total) {
    a <- firstpart(n)
    for (i in 1:(total - 1)) a <- nextpart(a)
}
pkg_arrangements <- function(n, total) {
    a <- ipartitions(n)
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- partitionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}
total <- partitionsCount(0:40, repetition = TRUE)
total
#> [1] 37338
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(40, total),
               cbArrangements = pkg_arrangements(40, total),
               cbPartitions   = pkg_partitions(40, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 15.40 15.11 14.18  14.50 13.21 13.74    15
#>    cbPartitions 24.79 24.45 23.08  23.48 21.81 21.54    15pkg_partitions <- function(n, total) {
    a <- firstcomposition(n)
    for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}
pkg_arrangements <- function(n, total) {
    a <- icompositions(n)
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- compositionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}
total <- compositionsCount(0:15, repetition = TRUE)
total
#> [1] 16384
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(15, total),
               cbArrangements = pkg_arrangements(15, total),
               cbPartitions   = pkg_partitions(15, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 14.23 14.15 13.45  14.01 12.98 12.10    15
#>    cbPartitions 43.88 43.91 41.42  43.40 40.17 34.25    15