Shortest unique prefix - hack
- I want to find the shortest unique prefix for each string in a vector (N < 20).
- This isn’t a computational bottleneck, so no need for heroic/drastic solutions.
- I wasn’t going to write a trie implementation just to solve this problem.
Unfortunately, this got a bit ugly :( .Anyone got a more elegant solution?
library(tidyverse)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Shortest Unique Prefix
#
# - Number of strings is always < 20
# - 'strings' input does not contain duplicates
# - this isn't a computational bottleneck
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
find_shortest_unique_prefix <- function(strings) {
# Determine uniqueness boolean vector of a set of arguments
is_unique <- function(...) {x <- list(...); !(duplicated(x) | duplicated(x, fromLast=TRUE))}
# for each possible prefix length, truncate all the strings
# and see which of them are unique
prefix_uniqueness <- strings %>%
map(str_sub, end=seq(max(nchar(strings)))) %>%
pmap(is_unique) %>%
transpose()
# Find the first occurrence of each string being unique
shortest_prefix_length <- prefix_uniqueness %>%
map_int(
~.x %>% flatten_lgl() %>% which.max()
)
# Trim all the strings to the shortest prefix length
shortest_prefix <- stringr::str_sub(strings, end=shortest_prefix_length)
shortest_prefix
}
find_shortest_unique_prefix(c("ab", "apple", "apart", 'b', 'ag'))
[1] "ab" "app" "apa" "b" "ag"
find_shortest_unique_prefix(c('blue', 'black', 'bold'))
[1] "blu" "bla" "bo"