*
refers to current address location, and can be read
from or written to*=....
line to set the
starting address.byte
values must be hexadecimal only, and single byte
hexadecimal must always include 2 characters.
i.e. $0e
will work, but $e
won’t work.#$01
will work.
#$1
won’t work.c64asm::asm_patterns
defines the set of patterns for
lexing/tokenising an ASM file*
is interpreted as the program counter
(i.e. current address).rbyte
directive. similar to the standard
.byte
, but the bytes come from the next token in the stream
which must be a variable name which contains integers in the range
0-255..rtext
directive. Similar to the standard
.text
, but the text comes from the next token in the stream
which must be a variable name which contains a string.{...}
represents text to be evaluated. Used for symbol
arithmetic. e.g. lda {message},1
#-----------------------------------------------------------------------------
# Regex patterns for parsing 6502 assembly
#-----------------------------------------------------------------------------
asm_patterns <- c(
newline = '\n',
whitespace = '\\s+',
PC = '\\*',
immediate = '#\\$[0-9a-fA-F]{1,2}',
word = '\\$[0-9a-fA-F]{3,4}',
byte = '\\$[0-9a-fA-F]{1,2}',
opcode = "\\b(ADC|AHX|ALR|ANC|AND|ARR|ASL|AXS|BCC|BCS|BEQ|BIT|BMI|BNE|BPL|BRK|BVC|BVS|CLC|CLD|CLI|CLV|CMP|CPX|CPY|DCP|DEC|DEX|DEY|EOR|INC|INX|INY|ISC|JMP|JSR|LAS|LAX|LAX|LDA|LDX|LDY|LSR|NOP|ORA|PHA|PHP|PLA|PLP|RLA|ROL|ROR|RRA|RTI|RTS|SAX|SBC|SEC|SED|SEI|SHX|SHY|SLO|SRE|STA|STX|STY|TAS|TAX|TAY|TSX|TXA|TXS|TYA|XAA|adc|ahx|alr|anc|and|arr|asl|axs|bcc|bcs|beq|bit|bmi|bne|bpl|brk|bvc|bvs|clc|cld|cli|clv|cmp|cpx|cpy|dcp|dec|dex|dey|eor|inc|inx|iny|isc|jmp|jsr|las|lax|lax|lda|ldx|ldy|lsr|nop|ora|pha|php|pla|plp|rla|rol|ror|rra|rti|rts|sax|sbc|sec|sed|sei|shx|shy|slo|sre|sta|stx|sty|tas|tax|tay|tsx|txa|txs|tya|xaa)\\b",
byte_inst = '\\.byte',
text_inst = '\\.text',
rtext_inst = '\\.rtext',
rbyte_inst = '\\.rbyte',
lbracket = '\\(',
rbracket = '\\)',
text = '".*?"',
comma = ",",
colon = ":",
equals = '=',
comment = '(;[^\n]*)',
x = '(x|X)',
y = '(y|Y)',
symbol = '#?<?>?\\{.*?\\}', # a symbol with evaluation
symbol = '[^\\s:,)]+'
)
comma
, whitespace
or
comment
tokens are discarded
line_tokens <- c64asm::create_line_tokens(asm)
line_tokens
#> [[1]]
#> PC equals word
#> "*" "=" "$0801"
#>
#> [[2]]
#> byte_inst byte byte byte byte byte byte
#> ".byte" "$0c" "$08" "$0a" "$00" "$9e" "$20"
#>
#> [[3]]
#> byte_inst byte byte byte byte byte byte
#> ".byte" "$32" "$30" "$38" "$30" "$00" "$00"
#>
#> [[4]]
#> byte_inst byte
#> ".byte" "$00"
#>
#> [[5]]
#> PC equals word
#> "*" "=" "$0820"
#>
#> [[6]]
#> opcode immediate
#> "ldx" "#$00"
#>
#> [[7]]
#> symbol opcode symbol x
#> "loop" "lda" "message" "x"
#>
#> [[8]]
#> opcode immediate
#> "and" "#$3f"
#>
#> [[9]]
#> opcode word x
#> "sta" "$0400" "x"
#>
#> [[10]]
#> opcode
#> "inx"
#>
#> [[11]]
#> opcode immediate
#> "cpx" "#$0c"
#>
#> [[12]]
#> opcode symbol
#> "bne" "loop"
#>
#> [[13]]
#> opcode
#> "rts"
#>
#> [[14]]
#> symbol
#> "message"
#>
#> [[15]]
#> text_inst text
#> ".text" "\"Hello World!\""
prg_df
data structure is created from
line_tokens
prg_df
data.frame
prg_df <- c64asm::create_prg_df(line_tokens)
init_addr | label | line | opmode | opbyte | ophex | symbol_op | symbol_expr | nbytes |
---|---|---|---|---|---|---|---|---|
2049 | NA | * = $0801 | NA | NA | NA | NA | NA | 0 |
NA | NA | .byte $0c $08 $0a $00 $9e $20 | NA | NA | NA | NA | NA | 6 |
NA | NA | .byte $32 $30 $38 $30 $00 $00 | NA | NA | NA | NA | NA | 6 |
NA | NA | .byte $00 | NA | NA | NA | NA | NA | 1 |
2080 | NA | * = $0820 | NA | NA | NA | NA | NA | 0 |
NA | NA | ldx #$00 | immediate | 162 | a2 | NA | NA | 2 |
NA | loop | loop lda message x | absolute x | 189 | bd | absolute x | message | 3 |
NA | NA | and #$3f | immediate | 41 | 29 | NA | NA | 2 |
NA | NA | sta $0400 x | absolute x | 157 | 9d | NA | NA | 3 |
NA | NA | inx | implied | 232 | e8 | NA | NA | 1 |
NA | NA | cpx #$0c | immediate | 224 | e0 | NA | NA | 2 |
NA | NA | bne loop | relative | 208 | d0 | relative | loop | 2 |
NA | NA | rts | implied | 96 | 60 | NA | NA | 1 |
NA | message | message | NA | NA | NA | NA | NA | 0 |
NA | NA | .text “Hello World!” | NA | NA | NA | NA | NA | 12 |
message
) may have their values defined
by
storage = $3000
* = $0820
for example), then
byte counting is used to figure out the address of all subsequent
instructions.
prg_df <- c64asm::process_symbols(prg_df)
addr | label | line | opmode | opbyte | ophex | symbol_expr | nbytes | symbol_value | symbol_bytes | bytes |
---|---|---|---|---|---|---|---|---|---|---|
2049 | NA | * = $0801 | NA | NA | NA | NA | 0 | NA | NA, NA | |
2049 | NA | .byte $0c $08 $0a $00 $9e $20 | NA | NA | NA | NA | 6 | NA | NA, NA | 12, 8, 10, 0, 158, 32 |
2055 | NA | .byte $32 $30 $38 $30 $00 $00 | NA | NA | NA | NA | 6 | NA | NA, NA | 50, 48, 56, 48, 0, 0 |
2061 | NA | .byte $00 | NA | NA | NA | NA | 1 | NA | NA, NA | 0 |
2080 | NA | * = $0820 | NA | NA | NA | NA | 0 | NA | NA, NA | |
2080 | NA | ldx #$00 | immediate | 162 | a2 | NA | 2 | NA | NA, NA | 162, 0 |
2082 | loop | loop lda message x | absolute x | 189 | bd | message | 3 | 2096 | 48, 8 | 189, 48, 8 |
2085 | NA | and #$3f | immediate | 41 | 29 | NA | 2 | NA | NA, NA | 41, 63 |
2087 | NA | sta $0400 x | absolute x | 157 | 9d | NA | 3 | NA | NA, NA | 157, 0, 4 |
2090 | NA | inx | implied | 232 | e8 | NA | 1 | NA | NA, NA | 232 |
2091 | NA | cpx #$0c | immediate | 224 | e0 | NA | 2 | NA | NA, NA | 224, 12 |
2093 | NA | bne loop | relative | 208 | d0 | loop | 2 | 2082 | 34, 8 | 208, 243 |
2095 | NA | rts | implied | 96 | 60 | NA | 1 | NA | NA, NA | 96 |
2096 | message | message | NA | NA | NA | NA | 0 | NA | NA, NA | |
2096 | NA | .text “Hello World!” | NA | NA | NA | NA | 12 | NA | NA, NA | 200, 69, 76, 76, 79, 32, 215, 79, 82, 76, 68, 33 |
prg_df <- c64asm::process_zero_padding(prg_df)
addr | label | line | opmode | opbyte | ophex | symbol_expr | nbytes | symbol_value | symbol_bytes | bytes |
---|---|---|---|---|---|---|---|---|---|---|
2049 | NA | * = $0801 | NA | NA | NA | NA | 0 | NA | NA, NA | |
2049 | NA | .byte $0c $08 $0a $00 $9e $20 | NA | NA | NA | NA | 6 | NA | NA, NA | 12, 8, 10, 0, 158, 32 |
2055 | NA | .byte $32 $30 $38 $30 $00 $00 | NA | NA | NA | NA | 6 | NA | NA, NA | 50, 48, 56, 48, 0, 0 |
2061 | NA | .byte $00 | NA | NA | NA | NA | 1 | NA | NA, NA | 0 |
2062 | NA | (zero padding) | NA | NA | NA | NA | 18 | NA | NULL | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
2080 | NA | * = $0820 | NA | NA | NA | NA | 0 | NA | NA, NA | |
2080 | NA | ldx #$00 | immediate | 162 | a2 | NA | 2 | NA | NA, NA | 162, 0 |
2082 | loop | loop lda message x | absolute x | 189 | bd | message | 3 | 2096 | 48, 8 | 189, 48, 8 |
2085 | NA | and #$3f | immediate | 41 | 29 | NA | 2 | NA | NA, NA | 41, 63 |
2087 | NA | sta $0400 x | absolute x | 157 | 9d | NA | 3 | NA | NA, NA | 157, 0, 4 |
2090 | NA | inx | implied | 232 | e8 | NA | 1 | NA | NA, NA | 232 |
2091 | NA | cpx #$0c | immediate | 224 | e0 | NA | 2 | NA | NA, NA | 224, 12 |
2093 | NA | bne loop | relative | 208 | d0 | loop | 2 | 2082 | 34, 8 | 208, 243 |
2095 | NA | rts | implied | 96 | 60 | NA | 1 | NA | NA, NA | 96 |
2096 | message | message | NA | NA | NA | NA | 0 | NA | NA, NA | |
2096 | NA | .text “Hello World!” | NA | NA | NA | NA | 12 | NA | NA, NA | 200, 69, 76, 76, 79, 32, 215, 79, 82, 76, 68, 33 |
prg_df
prg_df$bytes
is a list column which represents all the
instruction bytes which make up a PRG file.purrr::flatten()
to convert to an integer vector
(and remove any NULL entries)
prg_df$bytes %>%
purrr::flatten() %>%
as.integer() %>%
as.raw()
#> [1] 0c 08 0a 00 9e 20 32 30 38 30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
#> [26] 00 00 00 00 00 00 a2 00 bd 30 08 29 3f 9d 00 04 e8 e0 0c d0 f3 60 c8 45 4c
#> [51] 4c 4f 20 d7 4f 52 4c 44 21
prg_df
and convert it to 2
bytes in low-byte/high-byte format using c64asm::w2b()
# The following is equivalent to: c64asm::extract_prg_bytes(prg_df)
as.raw(c(w2b(prg_df$addr[1]), as.integer(purrr::flatten(prg_df$bytes))))
#> [1] 01 08 0c 08 0a 00 9e 20 32 30 38 30 00 00 00 00 00 00 00 00 00 00 00 00 00
#> [26] 00 00 00 00 00 00 00 00 a2 00 bd 30 08 29 3f 9d 00 04 e8 e0 0c d0 f3 60 c8
#> [51] 45 4c 4c 4f 20 d7 4f 52 4c 44 21
writeBin()
- no other processing is needed.