Evaluating R code from potentially malicious sources Part 4

Which of the functions from the base R package would you let a malicious user run?

There are 1301 functions in the base package version 3.6.1.

What functions would you want to prevent a malicious user from running?

This post is going doing a (very rough) first pass over what’s in the base package and do some (lazy, half-arsed!) classification of some functions into safe(?) and unsafe(?). Note the presence of the (?) to indicate that I’m just roughing out some classifications here and I’m not actually assuming any function is perfectly safe or unsafe!

Reasons for not letting someone access a function

I would like to restrict access to functions as I’d like to restrict access to

  • My resources
    • CPU
    • RAM
    • Filesystem
    • Network
  • My information
    • Files
    • Other objects in the R environment

Unsafe(?) functions: Those which access the system

Lots of functions to access the underlying system that R is running on. Most seem to have sys in their name. The key culprit is system which is the holy grail of unsafe functions.

sys_funcs <- grep('(sys)', base_funcs, ignore.case = TRUE, value = TRUE)
sys_funcs
 [1] ".First.sys"       "R_system_version" "sys.call"         "sys.calls"       
 [5] "Sys.chmod"        "Sys.Date"         "sys.frame"        "sys.frames"      
 [9] "sys.function"     "Sys.getenv"       "Sys.getlocale"    "Sys.getpid"      
[13] "Sys.glob"         "Sys.info"         "sys.load.image"   "Sys.localeconv"  
[17] "sys.nframe"       "sys.on.exit"      "sys.parent"       "sys.parents"     
[21] "Sys.readlink"     "sys.save.image"   "Sys.setenv"       "Sys.setFileTime" 
[25] "Sys.setlocale"    "Sys.sleep"        "sys.source"       "sys.status"      
[29] "Sys.time"         "Sys.timezone"     "Sys.umask"        "Sys.unsetenv"    
[33] "Sys.which"        "system"           "system.file"      "system.time"     
[37] "system2"         

Unsafe(?) functions: Those which appear to access the filesystem based upon their name

If the function name contains file, read, write then you can be pretty sure it’s going to hit the filesystem.

file_funcs <- grep('(file|read|write|save|load|conn)', base_funcs, ignore.case = TRUE, value = TRUE)
file_funcs
 [1] ".readRDS"             ".saveRDS"             "autoload"            
 [4] "autoloader"           "bzfile"               "close.connection"    
 [7] "close.srcfile"        "close.srcfilealias"   "closeAllConnections" 
[10] "dyn.load"             "dyn.unload"           "env.profile"         
[13] "file"                 "file.access"          "file.append"         
[16] "file.choose"          "file.copy"            "file.create"         
[19] "file.exists"          "file.info"            "file.link"           
[22] "file.mode"            "file.mtime"           "file.path"           
[25] "file.remove"          "file.rename"          "file.show"           
[28] "file.size"            "file.symlink"         "flush.connection"    
[31] "getAllConnections"    "getConnection"        "getLoadedDLLs"       
[34] "gzfile"               "is.loaded"            "isNamespaceLoaded"   
[37] "lazyLoad"             "lazyLoadDBexec"       "lazyLoadDBfetch"     
[40] "library.dynam.unload" "list.files"           "load"                
[43] "loadedNamespaces"     "loadingNamespaceInfo" "loadNamespace"       
[46] "memory.profile"       "nullfile"             "open.connection"     
[49] "open.srcfile"         "open.srcfilealias"    "open.srcfilecopy"    
[52] "parseNamespaceFile"   "print.connection"     "print.srcfile"       
[55] "rawConnection"        "rawConnectionValue"   "read.dcf"            
[58] "readBin"              "readChar"             "readline"            
[61] "readLines"            "readRDS"              "readRenviron"        
[64] "save"                 "save.image"           "saveRDS"             
[67] "seek.connection"      "showConnections"      "socketConnection"    
[70] "srcfile"              "srcfilealias"         "srcfilecopy"         
[73] "summary.connection"   "summary.srcfile"      "sys.load.image"      
[76] "Sys.readlink"         "sys.save.image"       "Sys.setFileTime"     
[79] "system.file"          "tempfile"             "textConnection"      
[82] "textConnectionValue"  "truncate.connection"  "unloadNamespace"     
[85] "write"                "write.dcf"            "writeBin"            
[88] "writeChar"            "writeLines"           "xzfile"              

Unsafe(?) functions: Those which appear to access the filesystem based upon their arguments

There are also functions which take a filename or a connection, which means they also access the filesystem.

file2_funcs <- formal_arg_names %>% 
  keep(~any(c('file', 'filename', 'open', 'con') %in% .x)) %>%
  names()

file2_funcs
 [1] ".getRequiredPackages" "bzfile"               "cat"                 
 [4] "close"                "close.connection"     "close.srcfile"       
 [7] "close.srcfilealias"   "dget"                 "dput"                
[10] "dump"                 "fifo"                 "file"                
[13] "flush"                "flush.connection"     "gzcon"               
[16] "gzfile"               "isatty"               "isIncomplete"        
[19] "isOpen"               "isSeekable"           "load"                
[22] "open"                 "open.connection"      "open.srcfile"        
[25] "open.srcfilealias"    "open.srcfilecopy"     "parse"               
[28] "pipe"                 "rawConnection"        "rawConnectionValue"  
[31] "read.dcf"             "readBin"              "readChar"            
[34] "readLines"            "readRDS"              "save"                
[37] "save.image"           "saveRDS"              "scan"                
[40] "seek"                 "seek.connection"      "sink"                
[43] "socketConnection"     "source"               "srcfile"             
[46] "srcfilealias"         "srcfilecopy"          "sys.source"          
[49] "textConnection"       "textConnectionValue"  "truncate"            
[52] "truncate.connection"  "unz"                  "url"                 
[55] "write"                "write.dcf"            "writeBin"            
[58] "writeChar"            "writeLines"           "xzfile"              

Unsafe(?) functions: Those which appear to access package internals based upon their arguments

internal_funcs <- formal_arg_names %>% 
  keep(~any(c('package', 'lib.loc', 'useImports', 'handlers') %in% .x)) %>%
  names()
internal_funcs
 [1] ".Defunct"              ".Deprecated"           ".getRequiredPackages" 
 [4] ".getRequiredPackages2" ".packages"             "autoload"             
 [7] "autoloader"            "find.package"          "library"              
[10] "library.dynam"         "loadNamespace"         "packageHasNamespace"  
[13] "packageNotFoundError"  "parseNamespaceFile"    "path.package"         
[16] "registerS3methods"     "require"               "requireNamespace"     
[19] "system.file"           "taskCallbackManager"  

Unsafe(?) functions: Those which appear to access R internals based upon their name

internal2_funcs <- grep('(gc)', base_funcs, ignore.case = TRUE, value = TRUE)
internal2_funcs
[1] "gc"               "gc.time"          "gcinfo"           "gctorture"       
[5] "gctorture2"       "warningCondition"

Unsafe(?) functions: Those which change the global state.

An example of a function which can change global state even when evaluated within a restricted environment is graphcis::par.

I haven’t gone looking in the base package for similar.

Unsafe(?) functions: Those which start with a .

I have no idea what most of these are for!

dot_funcs <- grep('^\\.', base_funcs, ignore.case = TRUE, value = TRUE)
dot_funcs
 [1] ".__H__.cbind"            ".__H__.rbind"           
 [3] "...elt"                  "...length"              
 [5] "..getNamespace"          ".amatch_bounds"         
 [7] ".amatch_costs"           ".bincode"               
 [9] ".C"                      ".cache_class"           
[11] ".Call"                   ".Call.graphics"         
[13] ".col"                    ".colMeans"              
[15] ".colSums"                ".Date"                  
[17] ".decode_numeric_version" ".Defunct"               
[19] ".deparseOpts"            ".Deprecated"            
[21] ".detach"                 ".difftime"              
[23] ".doSortWrap"             ".doTrace"               
[25] ".doWrap"                 ".dynLibs"               
[27] ".encode_numeric_version" ".expand_R_libs_env_var" 
[29] ".External"               ".External.graphics"     
[31] ".External2"              ".find.package"          
[33] ".First.sys"              ".format.zeros"          
[35] ".Fortran"                ".getNamespace"          
[37] ".getNamespaceInfo"       ".getRequiredPackages"   
[39] ".getRequiredPackages2"   ".gt"                    
[41] ".gtn"                    ".handleSimpleError"     
[43] ".Internal"               ".isMethodsDispatchOn"   
[45] ".isOpen"                 ".kappa_tri"             
[47] ".kronecker"              ".libPaths"              
[49] ".make_numeric_version"   ".makeMessage"           
[51] ".mapply"                 ".maskedMsg"             
[53] ".mergeExportMethods"     ".mergeImportMethods"    
[55] ".NotYetImplemented"      ".NotYetUsed"            
[57] ".OptRequireMethods"      ".packages"              
[59] ".packageStartupMessage"  ".path.package"          
[61] ".POSIXct"                ".POSIXlt"               
[63] ".Primitive"              ".primTrace"             
[65] ".primUntrace"            ".readRDS"               
[67] ".rmpkg"                  ".row"                   
[69] ".row_names_info"         ".rowMeans"              
[71] ".rowNamesDF<-"           ".rowSums"               
[73] ".saveRDS"                ".Script"                
[75] ".set_row_names"          ".signalSimpleWarning"   
[77] ".standard_regexps"       ".subset"                
[79] ".subset2"                ".TAOCP1997init"         
[81] ".traceback"              ".tryResumeInterrupt"    
[83] ".valid.factor"          

What’s left?

After this triage of ‘unsafe’ funcs, there’s still over 1061 functions still to go!

Safe(?) functions: as.* functions

as_funcs <- grep('^as\\.', leftover_funcs, ignore.case = TRUE, value = TRUE)
as_funcs
  [1] "as.array"                      "as.array.default"             
  [3] "as.call"                       "as.character"                 
  [5] "as.character.condition"        "as.character.Date"            
  [7] "as.character.default"          "as.character.error"           
  [9] "as.character.factor"           "as.character.hexmode"         
 [11] "as.character.numeric_version"  "as.character.octmode"         
 [13] "as.character.POSIXt"           "as.character.srcref"          
 [15] "as.complex"                    "as.data.frame"                
 [17] "as.data.frame.array"           "as.data.frame.AsIs"           
 [19] "as.data.frame.character"       "as.data.frame.complex"        
 [21] "as.data.frame.data.frame"      "as.data.frame.Date"           
 [23] "as.data.frame.default"         "as.data.frame.difftime"       
 [25] "as.data.frame.factor"          "as.data.frame.integer"        
 [27] "as.data.frame.list"            "as.data.frame.logical"        
 [29] "as.data.frame.matrix"          "as.data.frame.model.matrix"   
 [31] "as.data.frame.noquote"         "as.data.frame.numeric"        
 [33] "as.data.frame.numeric_version" "as.data.frame.ordered"        
 [35] "as.data.frame.POSIXct"         "as.data.frame.POSIXlt"        
 [37] "as.data.frame.raw"             "as.data.frame.table"          
 [39] "as.data.frame.ts"              "as.data.frame.vector"         
 [41] "as.Date"                       "as.Date.character"            
 [43] "as.Date.default"               "as.Date.factor"               
 [45] "as.Date.numeric"               "as.Date.POSIXct"              
 [47] "as.Date.POSIXlt"               "as.difftime"                  
 [49] "as.double"                     "as.double.difftime"           
 [51] "as.double.POSIXlt"             "as.environment"               
 [53] "as.expression"                 "as.expression.default"        
 [55] "as.factor"                     "as.function"                  
 [57] "as.function.default"           "as.hexmode"                   
 [59] "as.integer"                    "as.list"                      
 [61] "as.list.data.frame"            "as.list.Date"                 
 [63] "as.list.default"               "as.list.environment"          
 [65] "as.list.factor"                "as.list.function"             
 [67] "as.list.numeric_version"       "as.list.POSIXct"              
 [69] "as.list.POSIXlt"               "as.logical"                   
 [71] "as.logical.factor"             "as.matrix"                    
 [73] "as.matrix.data.frame"          "as.matrix.default"            
 [75] "as.matrix.noquote"             "as.matrix.POSIXlt"            
 [77] "as.name"                       "as.null"                      
 [79] "as.null.default"               "as.numeric"                   
 [81] "as.numeric_version"            "as.octmode"                   
 [83] "as.ordered"                    "as.package_version"           
 [85] "as.pairlist"                   "as.POSIXct"                   
 [87] "as.POSIXct.Date"               "as.POSIXct.default"           
 [89] "as.POSIXct.numeric"            "as.POSIXct.POSIXlt"           
 [91] "as.POSIXlt"                    "as.POSIXlt.character"         
 [93] "as.POSIXlt.Date"               "as.POSIXlt.default"           
 [95] "as.POSIXlt.factor"             "as.POSIXlt.numeric"           
 [97] "as.POSIXlt.POSIXct"            "as.qr"                        
 [99] "as.raw"                        "as.single"                    
[101] "as.single.default"             "as.symbol"                    
[103] "as.table"                      "as.table.default"             
[105] "as.vector"                     "as.vector.factor"             

Safe(?) functions: *apply functions

apply_funcs <- grep('apply', leftover_funcs, ignore.case = TRUE, value = TRUE)
apply_funcs
[1] "apply"  "eapply" "lapply" "mapply" "rapply" "sapply" "tapply" "vapply"

Safe(?) functions: print.* functions

print_funcs <- grep('^print', leftover_funcs, ignore.case = TRUE, value = TRUE)
print_funcs
 [1] "print"                       "print.AsIs"                 
 [3] "print.by"                    "print.condition"            
 [5] "print.data.frame"            "print.Date"                 
 [7] "print.default"               "print.difftime"             
 [9] "print.Dlist"                 "print.DLLInfo"              
[11] "print.DLLInfoList"           "print.DLLRegisteredRoutines"
[13] "print.eigen"                 "print.factor"               
[15] "print.function"              "print.hexmode"              
[17] "print.libraryIQR"            "print.listof"               
[19] "print.NativeRoutineList"     "print.noquote"              
[21] "print.numeric_version"       "print.octmode"              
[23] "print.packageInfo"           "print.POSIXct"              
[25] "print.POSIXlt"               "print.proc_time"            
[27] "print.restart"               "print.rle"                  
[29] "print.simple.list"           "print.srcref"               
[31] "print.summary.table"         "print.summary.warnings"     
[33] "print.summaryDefault"        "print.table"                
[35] "print.warnings"             

assign funcs

 [1] "[[<-"                    "[[<-.data.frame"        
 [3] "[[<-.factor"             "[[<-.numeric_version"   
 [5] "[[<-.POSIXlt"            "[<-"                    
 [7] "[<-.data.frame"          "[<-.Date"               
 [9] "[<-.factor"              "[<-.numeric_version"    
[11] "[<-.POSIXct"             "[<-.POSIXlt"            
[13] "@<-"                     "<-"                     
[15] "<<-"                     "$<-"                    
[17] "$<-.data.frame"          "attr<-"                 
[19] "attributes<-"            "body<-"                 
[21] "class<-"                 "colnames<-"             
[23] "comment<-"               "diag<-"                 
[25] "dim<-"                   "dimnames<-"             
[27] "dimnames<-.data.frame"   "Encoding<-"             
[29] "environment<-"           "formals<-"              
[31] "is.na<-"                 "is.na<-.default"        
[33] "is.na<-.factor"          "is.na<-.numeric_version"
[35] "length<-"                "length<-.Date"          
[37] "length<-.difftime"       "length<-.factor"        
[39] "length<-.POSIXct"        "length<-.POSIXlt"       
[41] "levels<-"                "levels<-.factor"        
[43] "mode<-"                  "mostattributes<-"       
[45] "names<-"                 "names<-.POSIXlt"        
[47] "oldClass<-"              "parent.env<-"           
[49] "regmatches<-"            "row.names<-"            
[51] "row.names<-.data.frame"  "row.names<-.default"    
[53] "rownames<-"              "split<-"                
[55] "split<-.data.frame"      "split<-.default"        
[57] "storage.mode<-"          "substr<-"               
[59] "substring<-"             "units<-"                
[61] "units<-.difftime"       

Non-alpha funcs

 [1] "-"                  "-.Date"             "-.POSIXt"          
 [4] ":"                  "::"                 ":::"               
 [7] "!"                  "!.hexmode"          "!.octmode"         
[10] "!="                 "("                  "["                 
[13] "[.AsIs"             "[.data.frame"       "[.Date"            
[16] "[.difftime"         "[.Dlist"            "[.DLLInfoList"     
[19] "[.factor"           "[.hexmode"          "[.listof"          
[22] "[.noquote"          "[.numeric_version"  "[.octmode"         
[25] "[.POSIXct"          "[.POSIXlt"          "[.simple.list"     
[28] "[.table"            "[.warnings"         "[["                
[31] "[[.data.frame"      "[[.Date"            "[[.factor"         
[34] "[[.numeric_version" "[[.POSIXct"         "[[.POSIXlt"        
[37] "{"                  "@"                  "*"                 
[40] "*.difftime"         "/"                  "/.difftime"        
[43] "&"                  "&.hexmode"          "&.octmode"         
[46] "&&"                 "%*%"                "%/%"               
[49] "%%"                 "%in%"               "%o%"               
[52] "%x%"                "^"                  "+"                 
[55] "+.Date"             "+.POSIXt"           "<"                 
[58] "<="                 "="                  "=="                
[61] ">"                  ">="                 "|"                 
[64] "|.hexmode"          "|.octmode"          "||"                
[67] "~"                  "$"                  "$.DLLInfo"         
[70] "$.package_version" 

Functions which seem safe(?)

math_funcs <- c('sin', 'cos', 'tan', 
                'acos', 'asin', 'atan', 'atan2',
                'sinpi', 'cospi', 'tanpi',
                'exp', 'expm1', 
                'log', 'logb', 'log10', 'log2')

object_funcs <- c('logical', 'as.logical', 'is.logical', 
                  'integer', 'as.integer', 'is.integer', 
                  'numeric', 'as.numeric', 'is.numeric', 
                  'double' , 'as.double' , 'is.double' ,
                  'single' , 'as.single' , 'is.single', 
                  'complex', 'as.complex', 'is.complex',
                  'structure', 
                  'c', 'list', 'data.frame', 'complex')

complex_funcs <- c('Re', 'Im', 'Mod', 'Arg', 'Conj')

seq_funcs <- c(':', 'seq', 'seq.int', 'seq_along', 'seq_len')


plot_funcs <- c('plot', 'points', 'lines', 'title', 'legend', 'points.formula')

flow_control_funcs <- c('if', 'for', 'while', 'repeat', 'break', 'next')

Conclusion

So many functions. So many ways for a malicious user to abuse the system.

And this was only the base package!

Appendix: sandboxr’s list of blacklisted functions from base

For comparison, the sandboxR package contains some blacklists for various core R packages.

Here is its list of functions it blacklists from the base package