Which of the functions from the base
R package would you let a malicious user run?
There are 1301 functions in the base
package version 3.6.1.
What functions would you want to prevent a malicious user from running?
This post is going doing a (very rough) first pass over what’s in the base
package and do some (lazy, half-arsed!) classification of some
functions into safe(?) and unsafe(?). Note the presence of the (?) to indicate that I’m just roughing out some classifications here
and I’m not actually assuming any function is perfectly safe or unsafe!
Reasons for not letting someone access a function
I would like to restrict access to functions as I’d like to restrict access to
- My resources
- CPU
- RAM
- Filesystem
- Network
- My information
- Files
- Other objects in the R environment
Unsafe(?) functions: Those which access the system
Lots of functions to access the underlying system that R is running on. Most seem to have sys in their name.
The key culprit is system
which is the holy grail of unsafe functions.
sys_funcs <- grep('(sys)', base_funcs, ignore.case = TRUE, value = TRUE)
sys_funcs
[1] ".First.sys" "R_system_version" "sys.call" "sys.calls"
[5] "Sys.chmod" "Sys.Date" "sys.frame" "sys.frames"
[9] "sys.function" "Sys.getenv" "Sys.getlocale" "Sys.getpid"
[13] "Sys.glob" "Sys.info" "sys.load.image" "Sys.localeconv"
[17] "sys.nframe" "sys.on.exit" "sys.parent" "sys.parents"
[21] "Sys.readlink" "sys.save.image" "Sys.setenv" "Sys.setFileTime"
[25] "Sys.setlocale" "Sys.sleep" "sys.source" "sys.status"
[29] "Sys.time" "Sys.timezone" "Sys.umask" "Sys.unsetenv"
[33] "Sys.which" "system" "system.file" "system.time"
[37] "system2"
Unsafe(?) functions: Those which appear to access the filesystem based upon their name
If the function name contains file
, read
, write
then you can be pretty sure it’s going to hit the filesystem.
file_funcs <- grep('(file|read|write|save|load|conn)', base_funcs, ignore.case = TRUE, value = TRUE)
file_funcs
[1] ".readRDS" ".saveRDS" "autoload"
[4] "autoloader" "bzfile" "close.connection"
[7] "close.srcfile" "close.srcfilealias" "closeAllConnections"
[10] "dyn.load" "dyn.unload" "env.profile"
[13] "file" "file.access" "file.append"
[16] "file.choose" "file.copy" "file.create"
[19] "file.exists" "file.info" "file.link"
[22] "file.mode" "file.mtime" "file.path"
[25] "file.remove" "file.rename" "file.show"
[28] "file.size" "file.symlink" "flush.connection"
[31] "getAllConnections" "getConnection" "getLoadedDLLs"
[34] "gzfile" "is.loaded" "isNamespaceLoaded"
[37] "lazyLoad" "lazyLoadDBexec" "lazyLoadDBfetch"
[40] "library.dynam.unload" "list.files" "load"
[43] "loadedNamespaces" "loadingNamespaceInfo" "loadNamespace"
[46] "memory.profile" "nullfile" "open.connection"
[49] "open.srcfile" "open.srcfilealias" "open.srcfilecopy"
[52] "parseNamespaceFile" "print.connection" "print.srcfile"
[55] "rawConnection" "rawConnectionValue" "read.dcf"
[58] "readBin" "readChar" "readline"
[61] "readLines" "readRDS" "readRenviron"
[64] "save" "save.image" "saveRDS"
[67] "seek.connection" "showConnections" "socketConnection"
[70] "srcfile" "srcfilealias" "srcfilecopy"
[73] "summary.connection" "summary.srcfile" "sys.load.image"
[76] "Sys.readlink" "sys.save.image" "Sys.setFileTime"
[79] "system.file" "tempfile" "textConnection"
[82] "textConnectionValue" "truncate.connection" "unloadNamespace"
[85] "write" "write.dcf" "writeBin"
[88] "writeChar" "writeLines" "xzfile"
Unsafe(?) functions: Those which appear to access the filesystem based upon their arguments
There are also functions which take a filename or a connection, which means they also access the filesystem.
file2_funcs <- formal_arg_names %>%
keep(~any(c('file', 'filename', 'open', 'con') %in% .x)) %>%
names()
file2_funcs
[1] ".getRequiredPackages" "bzfile" "cat"
[4] "close" "close.connection" "close.srcfile"
[7] "close.srcfilealias" "dget" "dput"
[10] "dump" "fifo" "file"
[13] "flush" "flush.connection" "gzcon"
[16] "gzfile" "isatty" "isIncomplete"
[19] "isOpen" "isSeekable" "load"
[22] "open" "open.connection" "open.srcfile"
[25] "open.srcfilealias" "open.srcfilecopy" "parse"
[28] "pipe" "rawConnection" "rawConnectionValue"
[31] "read.dcf" "readBin" "readChar"
[34] "readLines" "readRDS" "save"
[37] "save.image" "saveRDS" "scan"
[40] "seek" "seek.connection" "sink"
[43] "socketConnection" "source" "srcfile"
[46] "srcfilealias" "srcfilecopy" "sys.source"
[49] "textConnection" "textConnectionValue" "truncate"
[52] "truncate.connection" "unz" "url"
[55] "write" "write.dcf" "writeBin"
[58] "writeChar" "writeLines" "xzfile"
Unsafe(?) functions: Those which appear to access package internals based upon their arguments
internal_funcs <- formal_arg_names %>%
keep(~any(c('package', 'lib.loc', 'useImports', 'handlers') %in% .x)) %>%
names()
internal_funcs
[1] ".Defunct" ".Deprecated" ".getRequiredPackages"
[4] ".getRequiredPackages2" ".packages" "autoload"
[7] "autoloader" "find.package" "library"
[10] "library.dynam" "loadNamespace" "packageHasNamespace"
[13] "packageNotFoundError" "parseNamespaceFile" "path.package"
[16] "registerS3methods" "require" "requireNamespace"
[19] "system.file" "taskCallbackManager"
Unsafe(?) functions: Those which appear to access R internals based upon their name
internal2_funcs <- grep('(gc)', base_funcs, ignore.case = TRUE, value = TRUE)
internal2_funcs
[1] "gc" "gc.time" "gcinfo" "gctorture"
[5] "gctorture2" "warningCondition"
Unsafe(?) functions: Those which change the global state.
An example of a function which can change global state even when evaluated within a restricted environment is graphcis::par
.
I haven’t gone looking in the base
package for similar.
Unsafe(?) functions: Those which start with a .
I have no idea what most of these are for!
dot_funcs <- grep('^\\.', base_funcs, ignore.case = TRUE, value = TRUE)
dot_funcs
[1] ".__H__.cbind" ".__H__.rbind"
[3] "...elt" "...length"
[5] "..getNamespace" ".amatch_bounds"
[7] ".amatch_costs" ".bincode"
[9] ".C" ".cache_class"
[11] ".Call" ".Call.graphics"
[13] ".col" ".colMeans"
[15] ".colSums" ".Date"
[17] ".decode_numeric_version" ".Defunct"
[19] ".deparseOpts" ".Deprecated"
[21] ".detach" ".difftime"
[23] ".doSortWrap" ".doTrace"
[25] ".doWrap" ".dynLibs"
[27] ".encode_numeric_version" ".expand_R_libs_env_var"
[29] ".External" ".External.graphics"
[31] ".External2" ".find.package"
[33] ".First.sys" ".format.zeros"
[35] ".Fortran" ".getNamespace"
[37] ".getNamespaceInfo" ".getRequiredPackages"
[39] ".getRequiredPackages2" ".gt"
[41] ".gtn" ".handleSimpleError"
[43] ".Internal" ".isMethodsDispatchOn"
[45] ".isOpen" ".kappa_tri"
[47] ".kronecker" ".libPaths"
[49] ".make_numeric_version" ".makeMessage"
[51] ".mapply" ".maskedMsg"
[53] ".mergeExportMethods" ".mergeImportMethods"
[55] ".NotYetImplemented" ".NotYetUsed"
[57] ".OptRequireMethods" ".packages"
[59] ".packageStartupMessage" ".path.package"
[61] ".POSIXct" ".POSIXlt"
[63] ".Primitive" ".primTrace"
[65] ".primUntrace" ".readRDS"
[67] ".rmpkg" ".row"
[69] ".row_names_info" ".rowMeans"
[71] ".rowNamesDF<-" ".rowSums"
[73] ".saveRDS" ".Script"
[75] ".set_row_names" ".signalSimpleWarning"
[77] ".standard_regexps" ".subset"
[79] ".subset2" ".TAOCP1997init"
[81] ".traceback" ".tryResumeInterrupt"
[83] ".valid.factor"
What’s left?
After this triage of ‘unsafe’ funcs, there’s still over 1061 functions still to go!
Safe(?) functions: as.*
functions
as_funcs <- grep('^as\\.', leftover_funcs, ignore.case = TRUE, value = TRUE)
as_funcs
[1] "as.array" "as.array.default"
[3] "as.call" "as.character"
[5] "as.character.condition" "as.character.Date"
[7] "as.character.default" "as.character.error"
[9] "as.character.factor" "as.character.hexmode"
[11] "as.character.numeric_version" "as.character.octmode"
[13] "as.character.POSIXt" "as.character.srcref"
[15] "as.complex" "as.data.frame"
[17] "as.data.frame.array" "as.data.frame.AsIs"
[19] "as.data.frame.character" "as.data.frame.complex"
[21] "as.data.frame.data.frame" "as.data.frame.Date"
[23] "as.data.frame.default" "as.data.frame.difftime"
[25] "as.data.frame.factor" "as.data.frame.integer"
[27] "as.data.frame.list" "as.data.frame.logical"
[29] "as.data.frame.matrix" "as.data.frame.model.matrix"
[31] "as.data.frame.noquote" "as.data.frame.numeric"
[33] "as.data.frame.numeric_version" "as.data.frame.ordered"
[35] "as.data.frame.POSIXct" "as.data.frame.POSIXlt"
[37] "as.data.frame.raw" "as.data.frame.table"
[39] "as.data.frame.ts" "as.data.frame.vector"
[41] "as.Date" "as.Date.character"
[43] "as.Date.default" "as.Date.factor"
[45] "as.Date.numeric" "as.Date.POSIXct"
[47] "as.Date.POSIXlt" "as.difftime"
[49] "as.double" "as.double.difftime"
[51] "as.double.POSIXlt" "as.environment"
[53] "as.expression" "as.expression.default"
[55] "as.factor" "as.function"
[57] "as.function.default" "as.hexmode"
[59] "as.integer" "as.list"
[61] "as.list.data.frame" "as.list.Date"
[63] "as.list.default" "as.list.environment"
[65] "as.list.factor" "as.list.function"
[67] "as.list.numeric_version" "as.list.POSIXct"
[69] "as.list.POSIXlt" "as.logical"
[71] "as.logical.factor" "as.matrix"
[73] "as.matrix.data.frame" "as.matrix.default"
[75] "as.matrix.noquote" "as.matrix.POSIXlt"
[77] "as.name" "as.null"
[79] "as.null.default" "as.numeric"
[81] "as.numeric_version" "as.octmode"
[83] "as.ordered" "as.package_version"
[85] "as.pairlist" "as.POSIXct"
[87] "as.POSIXct.Date" "as.POSIXct.default"
[89] "as.POSIXct.numeric" "as.POSIXct.POSIXlt"
[91] "as.POSIXlt" "as.POSIXlt.character"
[93] "as.POSIXlt.Date" "as.POSIXlt.default"
[95] "as.POSIXlt.factor" "as.POSIXlt.numeric"
[97] "as.POSIXlt.POSIXct" "as.qr"
[99] "as.raw" "as.single"
[101] "as.single.default" "as.symbol"
[103] "as.table" "as.table.default"
[105] "as.vector" "as.vector.factor"
Safe(?) functions: *apply
functions
apply_funcs <- grep('apply', leftover_funcs, ignore.case = TRUE, value = TRUE)
apply_funcs
[1] "apply" "eapply" "lapply" "mapply" "rapply" "sapply" "tapply" "vapply"
Safe(?) functions: print.*
functions
print_funcs <- grep('^print', leftover_funcs, ignore.case = TRUE, value = TRUE)
print_funcs
[1] "print" "print.AsIs"
[3] "print.by" "print.condition"
[5] "print.data.frame" "print.Date"
[7] "print.default" "print.difftime"
[9] "print.Dlist" "print.DLLInfo"
[11] "print.DLLInfoList" "print.DLLRegisteredRoutines"
[13] "print.eigen" "print.factor"
[15] "print.function" "print.hexmode"
[17] "print.libraryIQR" "print.listof"
[19] "print.NativeRoutineList" "print.noquote"
[21] "print.numeric_version" "print.octmode"
[23] "print.packageInfo" "print.POSIXct"
[25] "print.POSIXlt" "print.proc_time"
[27] "print.restart" "print.rle"
[29] "print.simple.list" "print.srcref"
[31] "print.summary.table" "print.summary.warnings"
[33] "print.summaryDefault" "print.table"
[35] "print.warnings"
assign funcs
[1] "[[<-" "[[<-.data.frame"
[3] "[[<-.factor" "[[<-.numeric_version"
[5] "[[<-.POSIXlt" "[<-"
[7] "[<-.data.frame" "[<-.Date"
[9] "[<-.factor" "[<-.numeric_version"
[11] "[<-.POSIXct" "[<-.POSIXlt"
[13] "@<-" "<-"
[15] "<<-" "$<-"
[17] "$<-.data.frame" "attr<-"
[19] "attributes<-" "body<-"
[21] "class<-" "colnames<-"
[23] "comment<-" "diag<-"
[25] "dim<-" "dimnames<-"
[27] "dimnames<-.data.frame" "Encoding<-"
[29] "environment<-" "formals<-"
[31] "is.na<-" "is.na<-.default"
[33] "is.na<-.factor" "is.na<-.numeric_version"
[35] "length<-" "length<-.Date"
[37] "length<-.difftime" "length<-.factor"
[39] "length<-.POSIXct" "length<-.POSIXlt"
[41] "levels<-" "levels<-.factor"
[43] "mode<-" "mostattributes<-"
[45] "names<-" "names<-.POSIXlt"
[47] "oldClass<-" "parent.env<-"
[49] "regmatches<-" "row.names<-"
[51] "row.names<-.data.frame" "row.names<-.default"
[53] "rownames<-" "split<-"
[55] "split<-.data.frame" "split<-.default"
[57] "storage.mode<-" "substr<-"
[59] "substring<-" "units<-"
[61] "units<-.difftime"
Non-alpha funcs
[1] "-" "-.Date" "-.POSIXt"
[4] ":" "::" ":::"
[7] "!" "!.hexmode" "!.octmode"
[10] "!=" "(" "["
[13] "[.AsIs" "[.data.frame" "[.Date"
[16] "[.difftime" "[.Dlist" "[.DLLInfoList"
[19] "[.factor" "[.hexmode" "[.listof"
[22] "[.noquote" "[.numeric_version" "[.octmode"
[25] "[.POSIXct" "[.POSIXlt" "[.simple.list"
[28] "[.table" "[.warnings" "[["
[31] "[[.data.frame" "[[.Date" "[[.factor"
[34] "[[.numeric_version" "[[.POSIXct" "[[.POSIXlt"
[37] "{" "@" "*"
[40] "*.difftime" "/" "/.difftime"
[43] "&" "&.hexmode" "&.octmode"
[46] "&&" "%*%" "%/%"
[49] "%%" "%in%" "%o%"
[52] "%x%" "^" "+"
[55] "+.Date" "+.POSIXt" "<"
[58] "<=" "=" "=="
[61] ">" ">=" "|"
[64] "|.hexmode" "|.octmode" "||"
[67] "~" "$" "$.DLLInfo"
[70] "$.package_version"
Functions which seem safe(?)
math_funcs <- c('sin', 'cos', 'tan',
'acos', 'asin', 'atan', 'atan2',
'sinpi', 'cospi', 'tanpi',
'exp', 'expm1',
'log', 'logb', 'log10', 'log2')
object_funcs <- c('logical', 'as.logical', 'is.logical',
'integer', 'as.integer', 'is.integer',
'numeric', 'as.numeric', 'is.numeric',
'double' , 'as.double' , 'is.double' ,
'single' , 'as.single' , 'is.single',
'complex', 'as.complex', 'is.complex',
'structure',
'c', 'list', 'data.frame', 'complex')
complex_funcs <- c('Re', 'Im', 'Mod', 'Arg', 'Conj')
seq_funcs <- c(':', 'seq', 'seq.int', 'seq_along', 'seq_len')
plot_funcs <- c('plot', 'points', 'lines', 'title', 'legend', 'points.formula')
flow_control_funcs <- c('if', 'for', 'while', 'repeat', 'break', 'next')
Conclusion
So many functions. So many ways for a malicious user to abuse the system.
And this was only the base
package!
Appendix: sandboxr
’s list of blacklisted functions from base
For comparison, the sandboxR
package contains some blacklists for various core R packages.
Here is its list of functions it blacklists from the base package