Challenge
- Write R code to add 2 BigInts (arbitrarily large integers)
- Integer input is as a string containing just the digits 0-9 e.g.
"28349823749234234238077632"
- Integer output should be the same.
- Contributions welcome!
- No calling out to C libraries or using
Rcpp
:)
Prep work: Create random large integers
- Guarantee that there are no leading zeros (as
gmp
will interpret as octal)
rbigint <- function(size) {
paste(c(
sample(1:9, size = 1),
sample(0:9, size = size-1, replace = TRUE)
), collapse='')
}
rbigint(50)
[1] "60736670197966208346760834361818514898029183539092"
Naive: for loop
- Split each number into individual digits
- Pad out each list of digits with zeros so that they’re the same length
- Loop over the digits, adding them 1 pair at a time, and including any carry
from the previous pair.
- Paste it all back together
add_naive <- function(a, b) {
# Split the numbers into individiual integer digits
A <- as.integer(rev(strsplit(a, '')[[1]]))
B <- as.integer(rev(strsplit(b, '')[[1]]))
# Pad shorter number with zeros
zeros <- integer(abs(length(A) - length(B)))
if (length(A) < length(B)) {
A <- c(A, zeros)
} else {
B <- c(B, zeros)
}
# Allocate a place to store the sum of each pair (plus an extra in case there's a carry)
digits <- integer(length(A) + 1L)
# Initialise carry so that it's valid for the first pair
carry <- 0L
# loop: add with carry
for (i in seq_len(length(A))) {
digit <- A[i] + B[i] + carry
carry <- digit %/% 10L
digits[i] <- digit %% 10L
}
digits[i+1] <- carry
# Turn integer vector back into a string
result <- paste(rev(digits), collapse = '')
sub("^0+", '', result) # Remove leading zeros for neatness
}
Example with Naive for loop method.
set.seed(1)
a <- rbigint(20)
b <- rbigint(30)
c(a, b)
[1] "93601612044959684488" "541980325995339865878675962957"
add_naive(a, b)
[1] "541980326088941477923635647445"
# Compare to `gmp` library
as.character(gmp::as.bigz(a) + gmp::as.bigz(b))
[1] "541980326088941477923635647445"
Warning: `data_frame()` is deprecated, use `tibble()`.
This warning is displayed once per session.
Table 1: Summary: add_naive()
a |
93601612044959684488441980325995339865878675962957115502275657603788636504508662 |
b |
619962190997946745702920553840252622081750166387242310689911201222781979348 |
add_naive |
93602232007150682435187683246549180118500757713123502744586347514989859286488010 |
gmp |
93602232007150682435187683246549180118500757713123502744586347514989859286488010 |
identical |
TRUE |
Naive Vectorised
add_vectorised <- function(a, b) {
# Split the numbers into individiual integer digits
A <- as.integer(strsplit(a, '')[[1]])
B <- as.integer(strsplit(b, '')[[1]])
# Pad shorter number with zeros
zeros <- integer(abs(length(A) - length(B)))
if (length(A) < length(B)) {
A <- c(zeros, A)
} else {
B <- c(zeros, B)
}
# Vectorised addition of each pair of digits
digits <- A + B
carry <- digits %/% 10L
digits <- digits %% 10L
digits <- c(0L, digits)
# If there's any carry, then add it in
while (any(carry > 0L)) {
carry <- tail(c(carry, 0L), length(A) + 1)
digits <- digits + carry
carry <- digits %/% 10L
digits <- digits %% 10L
}
# Turn integer vector back into a string
result <- paste(digits, collapse='')
sub("^0+", '', result) # Remove leading zeros for neatness
}
Table 2: Summary: add_vectorised()
a |
93601612044959684488441980325995339865878675962957115502275657603788636504508662 |
b |
619962190997946745702920553840252622081750166387242310689911201222781979348 |
add_vectorised |
93602232007150682435187683246549180118500757713123502744586347514989859286488010 |
gmp |
93602232007150682435187683246549180118500757713123502744586347514989859286488010 |
identical |
TRUE |
Vectorised chunks
- Rather than looking at one digit at a time, look at chunks of 8 digits at a time.
- Any addition of a pair of chunks of this size will still fit in an R integer
add_vectorised_chunked <- function(a, b) {
max_chunk_int <- 100000000L
# Pad shorter number with zeros
zeros <- paste(rep('0', abs(nchar(a) - nchar(b))), collapse='')
if (nchar(a) < nchar(b)) {
a <- paste0(zeros, a)
} else {
b <- paste0(zeros, b)
}
n <- seq(1, nchar(a), 8)
# split into chunks
A <- rev(as.integer(stringr::str_sub(a, -n-8+1, -n)))
B <- rev(as.integer(stringr::str_sub(b, -n-8+1, -n)))
# vectorised addition of chunks and calculation of carry
digits <- A + B
carry <- digits %/% max_chunk_int
digits <- digits %% max_chunk_int
digits <- c(0L, digits)
# If there's any carry, then add it in
while (any(carry > 0L)) {
carry <- tail(c(carry, 0L), length(A) + 1)
digits <- digits + carry
carry <- digits %/% max_chunk_int
digits <- digits %% max_chunk_int
}
# Turn integer vector back into a string
result <- paste(sprintf("%08i", digits), collapse='')
sub("^0+", '', result) # Remove leading zeros for neatness
}
Table 3: Summary: add_vectorised_chunked()
a |
9360161204495968448844 |
b |
298032599533986587867596295711 |
add_vectorised_chunked |
298032608894147792363564744555 |
gmp |
298032608894147792363564744555 |
identical |
TRUE |
Benchmarking
Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
This warning is displayed once per session.
Notes
- Using the
gmp
library was always expected to be the fastest (written in C)
- Not that much difference between the naive and vectorised code (without chunking)
- The extra workin chunking in the vectorised addition only pays off for big integers with more than 100 digits.