Parsing PBRT rendering scene format

PBRT format

The PBRT Scene Format is a scene description format associated with the book “Physically Based Rendering” by Pharr et al.

This vignette outlines the beginning of the parsing process, and starts to define parser functions for the main elements in the format.

Example Scene

pbrt_text <- '
LookAt 3 4 1.5  # eye
       .5 .5 0  # look at point
       0 0 1    # up vector
Camera "perspective" "float fov" 45

Sampler "halton" "integer pixelsamples" 128
Integrator "path"
Film "image" "string filename" "simple.png"
     "integer xresolution" [400] "integer yresolution" [400]

WorldBegin

# uniform blue-ish illumination from all directions
LightSource "infinite" "rgb L" [.4 .45 .5]

# approximate the sun
LightSource "distant"  "point from" [ -30 40  100 ]
   "blackbody L" [3000 1.5]

AttributeBegin
  Material "glass"
  Shape "sphere" "float radius" 1
AttributeEnd

AttributeBegin
  Texture "checks" "spectrum" "checkerboard"
          "float uscale" [8] "float vscale" [8]
          "rgb tex1" [.1 .1 .1] "rgb tex2" [.8 .8 .8]
  Material "matte" "texture Kd" "checks"
  Translate 0 0 -1
  Shape "trianglemesh"
      "integer indices" [0 1 2 0 2 3]
      "point P" [ -20 -20 0   20 -20 0   20 20 0   -20 20 0 ]
      "float st" [ 0 0   1 0    1 1   0 1 ]
AttributeEnd

WorldEnd
'

When using a renderer which supports the PBRT scene format, the above example should render to the following image:

Lex the text into tokens

pbrt_regexes <- c(
  comment       = '(#.*?)\n',       # Assume # only appears to denote comment to end of line
  number        = '[+\\-]?\\.?(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?',
  string        = '"(.*?)"',
  list_start    = "\\[",
  list_end      = "\\]",
  identifier    = "\\w+",
  newline       = '\n',
  whitespace    = '\\s+'
)

tokens <- flexo::lex(pbrt_text, pbrt_regexes)
tokens <- tokens[!(names(tokens) %in% c('whitespace', 'newline', 'comment'))]

tokens[1:20]

#>             identifier                 number                 number 
#>               "LookAt"                    "3"                    "4" 
#>                 number                 number                 number 
#>                  "1.5"                   ".5"                   ".5" 
#>                 number                 number                 number 
#>                    "0"                    "0"                    "0" 
#>                 number             identifier                 string 
#>                    "1"               "Camera"          "perspective" 
#>                 string                 number             identifier 
#>            "float fov"                   "45"              "Sampler" 
#>                 string                 string                 number 
#>               "halton" "integer pixelsamples"                  "128" 
#>             identifier                 string 
#>           "Integrator"                 "path"

Define some parsers for particular sequences in the stream

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Parse an identifier 
#
# Parse the stream for the current location until just before the next
# identifier
#
# @param stream stream positioned such the the first token is an 'identifier'
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
parse_identifier <- function(stream) {
  stream$assert_name('identifier')
  identifier <- stream$consume(1)
  body       <- stream$consume_until(name = 'identifier', inclusive = FALSE)
  setNames(list(list(body)), identifier)
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Parse an Attribute 
#
# Parse the stream for the current location until just after the next
# 'AttributeEnd'
#
# @param stream stream positioned such the the first token has the value
#        'AttributeBein'
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
parse_attribute <- function(stream) {
  stream$assert_value('AttributeBegin')
  stream$consume(1)
  attr <- list()
  while (!stream$end_of_stream() && stream$read(1) != 'AttributeEnd') {
    identifier <- parse_identifier(stream)
    attr       <- append(attr, identifier)
  }
  stream$consume(1) # AttributeEnd
  attr
}

Parse the tokens in the ‘setup’ block

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create a stream from the tokens in order to manipulate them.
# Allocate a place for the setup and world informatino 
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stream <- flexo::TokenStream$new(tokens)
setup  <- list()
world  <- list()

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The first token should always be an identifier
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stream$assert_name('identifier')


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Parse all the setup identifiers up to (but not including) WorldBegin
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
while (!stream$end_of_stream() && stream$read(1) != 'WorldBegin') {
  identifier <- parse_identifier(stream)
  setup      <- append(setup, identifier)
}

setup

#> $LookAt
#> $LookAt[[1]]
#> number number number number number number number number number 
#>    "3"    "4"  "1.5"   ".5"   ".5"    "0"    "0"    "0"    "1" 
#> 
#> 
#> $Camera
#> $Camera[[1]]
#>        string        string        number 
#> "perspective"   "float fov"          "45" 
#> 
#> 
#> $Sampler
#> $Sampler[[1]]
#>                 string                 string                 number 
#>               "halton" "integer pixelsamples"                  "128" 
#> 
#> 
#> $Integrator
#> $Integrator[[1]]
#> string 
#> "path" 
#> 
#> 
#> $Film
#> $Film[[1]]
#>                string                string                string 
#>               "image"     "string filename"          "simple.png" 
#>                string            list_start                number 
#> "integer xresolution"                   "["                 "400" 
#>              list_end                string            list_start 
#>                   "]" "integer yresolution"                   "[" 
#>                number              list_end 
#>                 "400"                   "]"

Parse the tokens in the ‘World’ block

jnk <- stream$consume(1) # WorldBegin

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Parse all the world entries
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
while (!stream$end_of_stream() && stream$read(1) != 'WorldEnd') {
  if (stream$read(1) == 'AttributeBegin') {
    attribute <- parse_attribute(stream)
    world     <- append(world, attribute)
  } else {
    identifier <- parse_identifier(stream)
    world      <- append(world, identifier)
  }
}

world

#> $LightSource
#> $LightSource[[1]]
#>     string     string list_start     number     number     number   list_end 
#> "infinite"    "rgb L"        "["       ".4"      ".45"       ".5"        "]" 
#> 
#> 
#> $LightSource
#> $LightSource[[1]]
#>        string        string    list_start        number        number 
#>     "distant"  "point from"           "["         "-30"          "40" 
#>        number      list_end        string    list_start        number 
#>         "100"           "]" "blackbody L"           "["        "3000" 
#>        number      list_end 
#>         "1.5"           "]" 
#> 
#> 
#> $Material
#> $Material[[1]]
#>  string 
#> "glass" 
#> 
#> 
#> $Shape
#> $Shape[[1]]
#>         string         string         number 
#>       "sphere" "float radius"            "1" 
#> 
#> 
#> $Texture
#> $Texture[[1]]
#>         string         string         string         string     list_start 
#>       "checks"     "spectrum" "checkerboard" "float uscale"            "[" 
#>         number       list_end         string     list_start         number 
#>            "8"            "]" "float vscale"            "["            "8" 
#>       list_end         string     list_start         number         number 
#>            "]"     "rgb tex1"            "["           ".1"           ".1" 
#>         number       list_end         string     list_start         number 
#>           ".1"            "]"     "rgb tex2"            "["           ".8" 
#>         number         number       list_end 
#>           ".8"           ".8"            "]" 
#> 
#> 
#> $Material
#> $Material[[1]]
#>       string       string       string 
#>      "matte" "texture Kd"     "checks" 
#> 
#> 
#> $Translate
#> $Translate[[1]]
#> number number number 
#>    "0"    "0"   "-1" 
#> 
#> 
#> $Shape
#> $Shape[[1]]
#>            string            string        list_start            number 
#>    "trianglemesh" "integer indices"               "["               "0" 
#>            number            number            number            number 
#>               "1"               "2"               "0"               "2" 
#>            number          list_end            string        list_start 
#>               "3"               "]"         "point P"               "[" 
#>            number            number            number            number 
#>             "-20"             "-20"               "0"              "20" 
#>            number            number            number            number 
#>             "-20"               "0"              "20"              "20" 
#>            number            number            number            number 
#>               "0"             "-20"              "20"               "0" 
#>          list_end            string        list_start            number 
#>               "]"        "float st"               "["               "0" 
#>            number            number            number            number 
#>               "0"               "1"               "0"               "1" 
#>            number            number            number          list_end 
#>               "1"               "0"               "1"               "]"

Next steps - more parser functions!

Next step would be to call specialist functions for parsing particular block types.

parse_identifier() should recognise the identifer and then call a specialist parsing function for it. i.e.

parse_shape()
parse_translate()
parse_material()
etc

There could also be some low-level parse functions which parse some common generic structures:

parse_number() converts the token (a character) into a numeric value
parse_list() converts a list defined in PBRT format as [e1 e2 e3 ... en] into a standard R list object.

mikefc