#!/usr/bin/perl # (C) 2003-2004 by Mark Hillebrand <mah at cs.uni-sb.de> # This code is licensed under GNU General Public License (GPL) Version 2, # see http://www.opensource.org/licenses/gpl-license.php for full license text. # # TODO some of the eval's are not needed # TODO data stuff allowed in text segment? # TODO replace fake-1-pass by (buffering) 2-pass assembler # TODO have a strict grammar for expressions # TODO allow to build binary object or different targets # TODO just for doing it once: use logger just error reporting? generate test suite? # TODO range checks might be improved # TODO add options to enable / disable parts of the ISA (e.g. floating point, no system mode instructions) use Pod::Usage; pod2usage(1) if @ARGV==0; use Getopt::Long; my %options; GetOptions(\%options, 'help', 'meyerpad', 'output|o=s', 'pedantic', 'symboltable|st=s', 'verbose|v+', ) or pod2usage(2); pod2usage(1) if $options{help}; $options{pedantic}=1 if $options{verbose}; # implication required currently pod2usage("?Specify (single) assembly source file.") unless @ARGV==1; $SRC = shift; pod2usage("?Input and symbol table files match.") if exists $options{symboltable} and $options{symboltable} eq $SRC; if( exists $options{output} ) { pod2usage("?Input and output files match.") if $options{output} eq $SRC; pod2usage("?Output and symbol table files match.") if exists $options{symboltable} and $options{output} eq $options{symboltable}; # reopen stdout open STDOUT, ">".$options{output} or die "?Can't open `$options{output}' for output: $!"; } my $labelre = '\b[_a-zA-Z]\w*'; sub imm($$$) { # neck-breaking expression parsing my ($s,$a,$l) = @_; $s =~ s/#//g; $s =~ s/\./$a/g; # dot is the current address $s =~ s/($labelre)/exists($label{$1}) or warn "?Undefined label `$1' used in line $l";$label{$1}+0/ge; return int(eval $s); } sub imm16($$$) { # with range check (not a strict one, since we cannot distinguish unsigned and signed constants) my ($s,$a,$l) = @_; my $val = imm($_[0],$_[1],$_[2]); # check whether the topmost 17 bits are equal or in unsigned 16-bit range if( ($val & 0xFFFF8000)!=0xFFFF8000 && ($val & 0xFFFF8000)!=0x0 && !($val>=0&&$val <= 0xFFFF) ) { die "?Immediate constant `$s==$val' out of range in line $l\n"; } return $val; } sub imm5($$$) { # shift amount with range check (unsigned interpretation) my ($s,$a,$l) = @_; my $val = imm($_[0],$_[1],$_[2]); if( $val < 0 || $val >= 32 ) { die "?Immediate constant `$s==$val' out of range (unsigned 5 bit) in line $l\n"; } return $val; } sub imm26($$$) { # with range check my ($s,$a,$l) = @_; my $val = imm($_[0],$_[1],$_[2]); # check whether the topmost 27 bits are equal if( ($val & 0xFE000000)!=0xFE000000 && ($val & 0xFE000000)!=0x0 ) { die "?Immediate constant `$s==$val' out of range in line $l\n"; } return $val; } sub gpr($$$) { my ($s,$a,$l) = @_; $s =~ /[rR](\d+)/ or die "?Cannot parse register in line $l\n"; die "?Invalid register in line $l\n" if $1 > 31; return $1; } sub fprs($$$) { my ($s,$a,$l) = @_; $s =~ /[fF](\d+)/ or die "?Cannot parse register in line $l\n"; die "?Invalid register in line $l\n" if $1 > 31; return $1; }; sub fprd($$$) { my ($s,$a,$l) = @_; my $i = fprs($s,$a,$l); die "?Invalid FP double register in line $l\n" if $i & 1; return $i; }; sub ea($$$) { # returns register, immediate my ($s,$a,$l) = @_; if( $s =~ m/^\s*(.*)\((.*)\)\s*$/ ) { return [gpr($2,$a,$l),imm16($1,$a,$l)]; } else { die "?Invalid effective address specification in line $l\n"; } } sub target16($$$) { # 16-bit relative branch target my ($s,$a,$l) = @_; my $imm = imm($s,$a,$l) - $a - 4; # check whether the topmost 17 bits are equal if( ($imm & 0xFFFF8000)!=0xFFFF8000 && ($imm & 0xFFFF8000)!=0x0 ) { die "?Branch to `$s' out of range for 16 bit immediate (=$imm) in line $l\n"; } return $imm; } sub target26($$$) { # 26-bit relative branch target my ($s,$a,$l) = @_; my $imm = imm($s,$a,$l) - $a - 4; # check whether the topmost 7 bits are equal if( ($imm & 0xFE000000)!=0xFE000000 && ($imm & 0xFE000000)!=0x0 ) { die "?Branch to `$s==$imm' out of range for 26 bit immediate (=$imm) in line $l\n"; } return $imm; } my %sprnames = qw(sr 0 esr 1 eca 2 epc 3 edpc 4 edata 5 rm 6 ieeef 7 fcc 8 pto 9 ptl 10 emode 11 mode 16); sub spr($$$) { my ($s,$a,$l) = @_; my $i; if( exists $sprnames{lc $s} ) { return $sprnames{lc $s}; } elsif( $s =~ /[sS](\d+)/ ) { die "?Invalid register in line $l\n" if $1 > 31; $i = $1; } else { $i = gpr($s,$a,$l); } die "?Invalid SPR register in line $l\n" unless $i < 12 or $i==16; return $i; } %isa = ( 'sll' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x4)] }, 'srl' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x6)] }, 'sra' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x7)] }, 'addo' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x20)] }, 'add' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x21)] }, 'subo' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x22)] }, 'sub' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x23)] }, 'and' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x24)] }, 'or' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x25)] }, 'xor' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x26)] }, 'seq' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2a)] }, 'sne' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2d)] }, 'slt' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2c)] }, 'sls' => 'slt', # symbolic replacement! 'sgt' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x29)] }, 'sgr' => 'sgt', # symbolic replacement! 'sle' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2e)] }, 'sge' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2b)] }, 'movi2s' => { args => [\&spr,\&gpr], format => [qw(6 0x0 5 $2 10 0x0 5 $1 6 0x11)] }, 'movs2i' => { args => [\&gpr,\&spr], format => [qw(16 0x0 5 $1 5 $2 6 0x10)] }, 'mi2s' => 'movi2s', # symbolic replacement! 'ms2i' => 'movs2i', # symbolic replacement! 'fmovs' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x8)] }, 'fnegs' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x4)] }, 'fmovd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x8)] }, 'fnegd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x4)] }, 'mf2i' => { args => [\&gpr,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x9)] }, 'mi2f' => { args => [\&fprs,\&gpr], format => [qw(6 0x11 5 0x 5 $2 5 $1 5 0x0 6 0xa)] }, 'fadds' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x0)] }, 'fsubs' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x1)] }, 'fmuls' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x2)] }, 'fdivs' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x3)] }, 'faddd' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x0)] }, 'fsubd' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x1)] }, 'fmuld' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x2)] }, 'fdivd' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x3)] }, 'cvtds' => { args => [\&fprs,\&fprd], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x21)] }, 'cvtis' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x24)] }, 'cvtsd' => { args => [\&fprd,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x20)] }, 'cvtid' => { args => [\&fprd,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x24)] }, 'cvtsi' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x4 6 0x20)] }, 'cvtdi' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x4 6 0x21)] }, 'fceqs' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x32)] }, 'fcnes' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x3d)] }, 'fclts' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x34)] }, 'fcgts' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x38)] }, 'fcles' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x36)] }, 'fcges' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x3a)] }, 'fceqd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x32)] }, 'fcned' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x3d)] }, 'fcltd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x34)] }, 'fcgtd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x38)] }, 'fcled' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x36)] }, 'fcged' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x3a)] }, 'j' => { args => [\&target26], format => [qw(6 0x2 26 $1)] }, 'jal' => { args => [\&target26], format => [qw(6 0x3 26 $1)] }, 'beqz' => { args => [\&gpr,\&target16], format => [qw(6 0x4 5 $1 5 0x0 16 $2)] }, 'bnez' => { args => [\&gpr,\&target16], format => [qw(6 0x5 5 $1 5 0x0 16 $2)] }, 'fbeqz' => { args => [\&target16], format => [qw(6 0x6 5 0x0 5 0x0 16 $1)] }, 'fbnez' => { args => [\&target16], format => [qw(6 0x7 5 0x0 5 0x0 16 $1)] }, 'addio' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x8 5 $2 5 $1 16 $3)] }, 'addi' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x9 5 $2 5 $1 16 $3)] }, 'subio' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xa 5 $2 5 $1 16 $3)] }, 'subi' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xb 5 $2 5 $1 16 $3)] }, 'andi' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xc 5 $2 5 $1 16 $3)] }, 'ori' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xd 5 $2 5 $1 16 $3)] }, 'xori' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xe 5 $2 5 $1 16 $3)] }, 'seqi' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1a 5 $2 5 $1 16 $3)] }, 'snei' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1d 5 $2 5 $1 16 $3)] }, 'slti' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1c 5 $2 5 $1 16 $3)] }, 'sgti' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x19 5 $2 5 $1 16 $3)] }, 'slei' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1e 5 $2 5 $1 16 $3)] }, 'sgei' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1b 5 $2 5 $1 16 $3)] }, 'lhi' => { args => [\&gpr,\&imm16], format => [qw(6 0xf 5 0x0 5 $1 16 $2)] }, 'rfe' => { args => [], format => [qw(6 0x3f 26 0x0)] }, 'nop' => 'sra r1, r1, r0', # symbolic replacement! 'sync' => 'movs2i r0, ieeef', # symbolic replacement! 'trap' => { args => [\&imm26], format => [qw(6 0x3e 26 $1)] }, 'jr' => { args => [\&gpr], format => [qw(6 0x16 5 $1 21 0x0)] }, 'jalr' => { args => [\&gpr], format => [qw(6 0x17 5 $1 21 0x0)] }, 'slli' => { args => [\&gpr,\&gpr,\&imm5], format => [qw(6 0x0 5 $2 5 0x0 5 $1 5 $3 6 0x0)] }, 'srli' => { args => [\&gpr,\&gpr,\&imm5], format => [qw(6 0x0 5 $2 5 0x0 5 $1 5 $3 6 0x2)] }, 'srai' => { args => [\&gpr,\&gpr,\&imm5], format => [qw(6 0x0 5 $2 5 0x0 5 $1 5 $3 6 0x3)] }, 'lb' => { args => [\&gpr,\&ea], format => [qw(6 0x20 5 $2 5 $1 16 $3)] }, 'lh' => { args => [\&gpr,\&ea], format => [qw(6 0x21 5 $2 5 $1 16 $3)] }, 'lw' => { args => [\&gpr,\&ea], format => [qw(6 0x23 5 $2 5 $1 16 $3)] }, 'lbu' => { args => [\&gpr,\&ea], format => [qw(6 0x24 5 $2 5 $1 16 $3)] }, 'lhu' => { args => [\&gpr,\&ea], format => [qw(6 0x25 5 $2 5 $1 16 $3)] }, 'loads' => { args => [\&fprs,\&ea], format => [qw(6 0x31 5 $2 5 $1 16 $3)] }, 'loadd' => { args => [\&fprd,\&ea], format => [qw(6 0x35 5 $2 5 $1 16 $3)] }, 'sb' => { args => [\&ea,\&gpr], format => [qw(6 0x28 5 $1 5 $3 16 $2)] }, 'sh' => { args => [\&ea,\&gpr], format => [qw(6 0x29 5 $1 5 $3 16 $2)] }, 'sw' => { args => [\&ea,\&gpr], format => [qw(6 0x2b 5 $1 5 $3 16 $2)] }, 'stores' => { args => [\&ea,\&fprs], format => [qw(6 0x39 5 $1 5 $3 16 $2)] }, 'stored' => { args => [\&ea,\&fprd], format => [qw(6 0x3d 5 $1 5 $3 16 $2)] }, ); # TODO brute force, should be changed probably my %BYTES; sub check_seg_overlap_and_advance { return unless $options{pedantic}; my ($start,$len) = @_; for ($start..($start+$len-1)) { die "?Segment overlap / double address use in line $. at address $_\n" if exists $BYTES{$_}; $BYTES{$_} = $cursegment; } } %segments = ( t => { start => undef, end => undef, address => 0 }, d => { start => undef, end => undef, address => 0 }, ); $cursegment = 't'; open SRC or pod2usage("?Can't open `$SRC' for input: $!"); my %reportmultlabel; while(<SRC>) { chomp; s/^\s+//; # remove leading whitespace s/\s*;.*// unless substr($_,0,6) eq '.ascii'; # remove comments next if /^$/; # skip empty lines if( /^([_a-zA-Z]\w*):\s*(.+)?/ ) { # label warn "?Trailing garbage in line $.\n" if defined $2; warn "?Multiply defined label / symbol `$1' in line $. (reported only once)\n" if defined $label{$1} and !$reportmultlabel{$1}++; $label{$1} = $segments{$cursegment}->{address}; } elsif( /^([a-zA-Z]\w*)\b/ ) { # instruction die "Instructions not allowed in data segment (at line $.)\n" if $cursegment eq 'd'; die "Unknown instruction in line $.\n" unless exists $isa{$1}; die "Misaligned instruction in line $.\n" if $segments{$cursegment}->{address} & 0x3 and $options{pedantic}; push @D, [$segments{$cursegment}->{address},$_,$.]; # push for later processing check_seg_overlap_and_advance($segments{$cursegment}->{address},4); $segments{$cursegment}->{address} +=4; } elsif( /^\.set\b\s+([_a-zA-Z]\w*)\s*,\s*(.+)/ ) { die "?multiply defined label / symbol `$1' in line $.\n" if defined $label{$1}; $label{$1} = imm($2,$segments{$cursegment}->{address},$.); } elsif( /^\.(text|data)\b(?:\s+(\S+))?\s*(.+)?/ ) { warn "?Trailing garbage in line $.\n" if defined $3; # save current end address if( $segments{$cursegment}->{address} > $segments{$cursegment}->{end} ) { $segments{$cursegment}->{end} = $segments{$cursegment}->{address}; } #my $arg = eval $2; # TODO correct? my $arg = imm($2,$segments{$cursegment}->{address},$.); $cursegment = substr $1,0,1; if( defined $2 ) { # initialize start if necessary, set address if( $arg >= $segments{$cursegment}->{start} and $segments{$cursegment}->{end} > $arg ) { warn "?Possible segment overlap in line $.\n"; } $segments{$cursegment}->{address} = $arg; $segments{$cursegment}->{start} ||= $arg; } } elsif( /^\.((end)?proc|global)/ ) { # not supported } elsif( /^\.space\b(?:\s+(\S+))?\s*(.+)?/ ) { die ".space not allowed in text segment (at line $.)\n" if $cursegment eq 't'; die ".space without argument (at line $.)\n" unless defined $1; warn "?Trailing garbage in line $.\n" if defined $2; $segments{$cursegment}->{address} += imm($1,$segments{$cursegment}->{address},$.); } elsif( /^\.(?:ascii(z)?)(?:\s+(.*))?/ ) { die ".ascii$1 without argument (at line $.)\n" unless defined $2; $out = pack 'a*' . ($1 && 'x'), eval $2; # pad out to multiple of 4 bytes $out .= "\0" x ((-length($out)) % 4) if $options{meyerpad}; # reverse all groups of 4 bytes (for endianness?) $out =~ s/(.)(.)(.)(.)/$4$3$2$1/gs; # /s is important push @D, [$segments{$cursegment}->{address},$out]; # push for later processing check_seg_overlap_and_advance($segments{$cursegment}->{address},length $out); $segments{$cursegment}->{address} += length $out; } elsif( /^\.(byte|word|half)\s*(.+)?/ ) { die ".$1 without argument (at line $.)\n" unless defined $2; my $s = $2; $s = 1 + $s =~ tr/,/,/; # count arguments if( $1 eq 'half' ) { die "Misaligned half word(s) in line $.\n" if $segments{$cursegment}->{address} & 0x1 and $options{pedantic}; push @D, [$segments{$cursegment}->{address},'n*',$2,666]; check_seg_overlap_and_advance($segments{$cursegment}->{address},2*$s); $segments{$cursegment}->{address} += 2 * $s; } elsif( $1 eq 'byte' ) { push @D, [$segments{$cursegment}->{address},'C*',$2,666]; check_seg_overlap_and_advance($segments{$cursegment}->{address},$s); $segments{$cursegment}->{address} += $s; } else { # word die "Misaligned word(s) in line $.\n" if $segments{$cursegment}->{address} & 0x3 and $options{pedantic}; push @D, [$segments{$cursegment}->{address},'N*',$2,666]; check_seg_overlap_and_advance($segments{$cursegment}->{address},4*$s); $segments{$cursegment}->{address} += 4*$s; } } elsif( /^\.(float|double)\s*(.+)?/ ) { die ".$1 without argument (at line $.)\n" unless defined $2; die "Misaligned float(s) in line $.\n" if $segments{$cursegment}->{address} & 0x3 and $options{pedantic} and $1 eq 'float'; die "Misaligned double(s) in line $.\n" if $segments{$cursegment}->{address} & 0x7 and $options{pedantic} and $1 eq 'double'; my @a = split /\s*,\s*/, $2; @a = map eval $_, @a; # evaluate expressions separately $out = pack substr($1,0,1).'*', @a; push @D, [$segments{$cursegment}->{address},$out]; # push for later processing check_seg_overlap_and_advance($segments{$cursegment}->{address},length $out); $segments{$cursegment}->{address} += length $out; } elsif( /^\.align\b(?:\s+(\S+))?\s*(.+)?/ ) { die ".align without argument (at line $.)\n" unless defined $1; warn "?Trailing garbage in line $.\n" if defined $2; $segments{$cursegment}->{address} += (-($segments{$cursegment}->{address}) % (1<<$1)); } else { warn "?Unknown directive in line $.\n"; } } # update segment endings for $s (keys %segments) { if( $segments{$s}->{address} > $segments{$s}->{end} ) { $segments{$s}->{end} = $segments{$s}->{address}; } } printf "start:%08x %08x %08x %08x %08x %08x\n", $segments{t}->{start}, (($segments{t}->{end} > $segments{d}->{end}) ? $segments{t}->{end} : $segments{d}->{end}), $segments{t}->{start}, $segments{t}->{end} - $segments{t}->{start}, $segments{d}->{start}, $segments{d}->{end} - $segments{d}->{start}; # spill out data $nextaddress = -1; for $d (@D) { my ($a,$o); if( $#$d==2 ) { # must be an instruction my ($a,$instr,$l) = @$d; $_ = $instr; s/^(\w+)\s*// or die; my $mnemonic = $1; # symbolic replacement if( ref( $isa{$mnemonic} ) ne 'HASH' ) { $_ = $_ ne '' ? "$isa{$mnemonic} $_" : $isa{$mnemonic}; # add space only if needed s/^(\w+)\s*// or die; $mnemonic = $1; } my @args = split /\s*,\s*/; my @value; die "?Wrong number of arguments to instruction in line $l\n" if $#args != $#{$isa{$mnemonic}->{args}}; # parse arguments for (@{$isa{$mnemonic}->{args}}) { my $result = $_->(shift(@args),$a,$l); if( ref($result) eq 'ARRAY' ) { push @value, @{$result}; } elsif( ref($result) eq '' ) { push @value, $result; } else { die "Argument without value / error checking... giving up\n"; } } # pack the instruction my $iw = 0x00000000; for $i (0..$#{$isa{$mnemonic}->{format}}) { next if $i & 1; # process this array in pairs my $bw = $isa{$mnemonic}->{format}->[$i]; my $d = $isa{$mnemonic}->{format}->[$i+1]; # get the data if( substr($d,0,1) eq '$' ) { $d = $value[substr($d,1)-1]; } else { $d = eval $d; } if( $bw >= 32 ) { $d &= 0xffffffff; } else { $d &= ((1<<$bw) - 1); }; # or it to the current instruction word $iw <<= $bw if $i; $iw |= $d; } # push the instruction word $d->[1] = pack "N",$iw; } elsif( $#$d==3 ) { # must be half|byte|word # very, very ugly my @a = map imm($_,$d->[0],0), split /\s*,\s*/, $d->[2]; # TODO what about the address ^ here ? $d->[1] = pack $d->[1], @a; } ($a,$o) = @$d; printf "%08x", $a if($a!=$nextaddress); my $data = unpack 'H*', $o; for (my $j = 0; $j < length ($data); $j += 8) { print ':' . substr ($data,$j,8) . "\n"; } $nextaddress = $a + length($o); } # output symbol table if( exists $options{symboltable} ) { open O, ">$options{symboltable}" or die "?Can't open `$options{symboltable}' for symbol table output: $!"; for ( sort { $label{$a} <=> $label{$b} } keys %label ) { printf O "%s 0x%08x\n", $_, $label{$_}; } } if( $options{verbose} ) { # spill out the segment table print STDERR "Sorted segment table:\n"; my $start = 0; my $length; my $type; for ((sort { $a <=> $b } keys %BYTES),-1) { if($type eq $BYTES{$_} and $_==$start+$length) { # continuation, type did not change and address continues $length++; } else { # spill out seg printf STDERR "0x%08x - 0x%08x (0x%08x bytes): %s\n", $start, $start + $length - 1, $length, $type if defined $type; $start = $_; # set new start $length = 1; $type = $BYTES{$_}; # remember type } } } __END__ =head1 NAME mahdlxasm - Assemble DLX (VAMP) to object code =head1 SYNOPSIS B<mahdlxasm> [B<--help>] [B<--output> I<object file>] [B<--pedantic>] [B<--symboltable> I<symbol file>] [B<--verbose>] I<assembler file> =head1 DESCRIPTION mahdlxasm takes a DLX (VAMP) assembler in GNU assembler notation as input and assembles it into a (custom format) object code file. =head1 OPTIONS =over 4 =item B<--help> Print help on options. =item B<--output> I<object file>, B<-o> I<object file> Place output in I<object file> instead of printing it on standard output. =item B<--meyerpad> Zero-pad strings generated by the I<.ascii> and I<.asciiz> directives up multiple of 4 bytes. =item B<--pedantic> Switch on more, possibly time-consuming, sanity checks for the assembler sources. =item B<--symboltable> I<symbol file>, B<--st> I<symbol file> Output all symbols sorted by value into I<symbol file>. =item B<--verbose>, B<-v> Increase verbosity level. Verbosity implies pedantry. =head1 CAVEATS Not all things that work in the GNU assembler work here; features have been added on an as-needed basis rather than being complete. For example, there are no macro defines currently. Expressions are currently almost fully-fledged perl expressions. That is subject to change. Despite its name this program assembles VAMP instructions which are almost, but not quite, entirely unlike DLX assembler. So, it is not advisable to process DLX assembly language with this program. =head1 AUTHOR Mark A. Hillebrand <mah at cs.uni-sb.de> =head1 SEE ALSO as(1), L<http://www-wjp.cs.uni-sb.de/forschung/projekte/VAMP/>