#!/usr/bin/perl
# (C) 2003-2004 by Mark Hillebrand <mah at cs.uni-sb.de>
# This code is licensed under GNU General Public License (GPL) Version 2,
# see http://www.opensource.org/licenses/gpl-license.php for full license text.
#
# TODO some of the eval's are not needed
# TODO data stuff allowed in text segment?
# TODO replace fake-1-pass by (buffering) 2-pass assembler
# TODO have a strict grammar for expressions
# TODO allow to build binary object or different targets
# TODO just for doing it once: use logger just error reporting? generate test suite?
# TODO range checks might be improved
# TODO add options to enable / disable parts of the ISA (e.g. floating point, no system mode instructions)
use Pod::Usage;
pod2usage(1) if @ARGV==0;
use Getopt::Long;
my %options;
GetOptions(\%options,
	'help',
	'meyerpad',
	'output|o=s',
	'pedantic',
	'symboltable|st=s',
	'verbose|v+',
) or pod2usage(2);
pod2usage(1) if $options{help};
$options{pedantic}=1 if $options{verbose};	# implication required currently

pod2usage("?Specify (single) assembly source file.") unless @ARGV==1;
$SRC = shift;

pod2usage("?Input and symbol table files match.") if exists $options{symboltable} and $options{symboltable} eq $SRC;
if( exists $options{output} ) {
	pod2usage("?Input and output files match.") if $options{output} eq $SRC;
	pod2usage("?Output and symbol table files match.") if exists $options{symboltable} and $options{output} eq $options{symboltable};
	# reopen stdout 
	open STDOUT, ">".$options{output} or die "?Can't open `$options{output}' for output: $!";
}

my $labelre = '\b[_a-zA-Z]\w*';

sub imm($$$) {	# neck-breaking expression parsing
	my ($s,$a,$l) = @_;
	$s =~ s/#//g;
	$s =~ s/\./$a/g;	# dot is the current address
	$s =~ s/($labelre)/exists($label{$1}) or warn "?Undefined label `$1' used in line $l";$label{$1}+0/ge;
	return int(eval $s);
}

sub imm16($$$) {	# with range check (not a strict one, since we cannot distinguish unsigned and signed constants)
	my ($s,$a,$l) = @_;
	my $val = imm($_[0],$_[1],$_[2]);
	# check whether the topmost 17 bits are equal or in unsigned 16-bit range
	if( ($val & 0xFFFF8000)!=0xFFFF8000 && ($val & 0xFFFF8000)!=0x0 && !($val>=0&&$val <= 0xFFFF) ) {
		die "?Immediate constant `$s==$val' out of range in line $l\n";
	}
	return $val;
}

sub imm5($$$) {	# shift amount with range check (unsigned interpretation)
	my ($s,$a,$l) = @_;
	my $val = imm($_[0],$_[1],$_[2]);
	if( $val < 0 || $val >= 32 ) {
		die "?Immediate constant `$s==$val' out of range (unsigned 5 bit) in line $l\n";
	}
	return $val;
}

sub imm26($$$) {	# with range check
	my ($s,$a,$l) = @_;
	my $val = imm($_[0],$_[1],$_[2]);
	# check whether the topmost 27 bits are equal
	if( ($val & 0xFE000000)!=0xFE000000 && ($val & 0xFE000000)!=0x0 ) {
		die "?Immediate constant `$s==$val' out of range in line $l\n";
	}
	return $val;
}

sub gpr($$$) {
	my ($s,$a,$l) = @_;
	$s =~ /[rR](\d+)/ or die "?Cannot parse register in line $l\n";
	die "?Invalid register in line $l\n" if $1 > 31;
	return $1;
}

sub fprs($$$) {
	my ($s,$a,$l) = @_;
	$s =~ /[fF](\d+)/ or die "?Cannot parse register in line $l\n";
	die "?Invalid register in line $l\n" if $1 > 31;
	return $1;
};

sub fprd($$$) { 
	my ($s,$a,$l) = @_;
	my $i = fprs($s,$a,$l);
	die "?Invalid FP double register in line $l\n" if $i & 1;
	return $i;
};

sub ea($$$) { # returns register, immediate
	my ($s,$a,$l) = @_;
	if( $s =~ m/^\s*(.*)\((.*)\)\s*$/ ) {
		return [gpr($2,$a,$l),imm16($1,$a,$l)];
	} else {
		die "?Invalid effective address specification in line $l\n";
	}
}

sub target16($$$) { # 16-bit relative branch target
	my ($s,$a,$l) = @_;
	my $imm = imm($s,$a,$l) - $a - 4;
	# check whether the topmost 17 bits are equal
	if( ($imm & 0xFFFF8000)!=0xFFFF8000 && ($imm & 0xFFFF8000)!=0x0 ) {
		die "?Branch to `$s' out of range for 16 bit immediate (=$imm) in line $l\n";
	}
	return $imm;
}

sub target26($$$) { # 26-bit relative branch target
	my ($s,$a,$l) = @_;
	my $imm = imm($s,$a,$l) - $a - 4;
	# check whether the topmost 7 bits are equal
	if( ($imm & 0xFE000000)!=0xFE000000 && ($imm & 0xFE000000)!=0x0 ) {
		die "?Branch to `$s==$imm' out of range for 26 bit immediate (=$imm) in line $l\n";
	}
	return $imm;
}

my %sprnames = qw(sr 0 esr 1 eca 2 epc 3 edpc 4 edata 5 rm 6 ieeef 7 fcc 8 pto 9 ptl 10 emode 11 mode 16);
sub spr($$$) {
	my ($s,$a,$l) = @_;
	my $i;
	if( exists $sprnames{lc $s} ) {
		return $sprnames{lc $s};
	} elsif(  $s =~ /[sS](\d+)/ ) {
		die "?Invalid register in line $l\n" if $1 > 31;
		$i = $1;
	} else {
		$i = gpr($s,$a,$l);
	}
	die "?Invalid SPR register in line $l\n" unless $i < 12 or $i==16;
	return $i;
}

%isa = (
	'sll' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x4)] },
	'srl' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x6)] },
	'sra' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x7)] },
	'addo' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x20)] },
	'add' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x21)] },
	'subo' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x22)] },
	'sub' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x23)] },
	'and' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x24)] },
	'or' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x25)] },
	'xor' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x26)] },
	'seq' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2a)] },
	'sne' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2d)] },
	'slt' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2c)] },
	'sls' => 'slt',	# symbolic replacement!
	'sgt' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x29)] },
	'sgr' => 'sgt',   # symbolic replacement!
	'sle' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2e)] },
	'sge' => { args => [\&gpr,\&gpr,\&gpr], format => [qw(6 0x0 5 $2 5 $3 5 $1 11 0x2b)] },
	'movi2s' => { args => [\&spr,\&gpr], format => [qw(6 0x0 5 $2 10 0x0 5 $1 6 0x11)] },
	'movs2i' => { args => [\&gpr,\&spr], format => [qw(16 0x0 5 $1 5 $2 6 0x10)] },
	'mi2s' => 'movi2s',	# symbolic replacement!
	'ms2i' => 'movs2i',	# symbolic replacement!
	'fmovs' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x8)] },
	'fnegs' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x4)] },
	'fmovd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x8)] },
	'fnegd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x4)] },
	'mf2i' => { args => [\&gpr,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x9)] },
	'mi2f' => { args => [\&fprs,\&gpr], format => [qw(6 0x11 5 0x 5 $2 5 $1 5 0x0 6 0xa)] },
	'fadds' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x0)] },
	'fsubs' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x1)] },
	'fmuls' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x2)] },
	'fdivs' => { args => [\&fprs,\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x0 6 0x3)] },
	'faddd' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x0)] },
	'fsubd' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x1)] },
	'fmuld' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x2)] },
	'fdivd' => { args => [\&fprd,\&fprd,\&fprd], format => [qw(6 0x11 5 $2 5 $3 5 $1 5 0x1 6 0x3)] },
	'cvtds' => { args => [\&fprs,\&fprd], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x21)] },
	'cvtis' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x0 6 0x24)] },
	'cvtsd' => { args => [\&fprd,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x20)] },
	'cvtid' => { args => [\&fprd,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x1 6 0x24)] },
	'cvtsi' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x4 6 0x20)] },
	'cvtdi' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $2 5 0x0 5 $1 5 0x4 6 0x21)] },
	'fceqs' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x32)] },
	'fcnes' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x3d)] },
	'fclts' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x34)] },
	'fcgts' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x38)] },
	'fcles' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x36)] },
	'fcges' => { args => [\&fprs,\&fprs], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x0 6 0x3a)] },
	'fceqd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x32)] },
	'fcned' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x3d)] },
	'fcltd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x34)] },
	'fcgtd' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x38)] },
	'fcled' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x36)] },
	'fcged' => { args => [\&fprd,\&fprd], format => [qw(6 0x11 5 $1 5 $2 5 0x0 5 0x1 6 0x3a)] },
	'j' => { args => [\&target26], format => [qw(6 0x2 26 $1)] },
	'jal' => { args => [\&target26], format => [qw(6 0x3 26 $1)] },
	'beqz' => { args => [\&gpr,\&target16], format => [qw(6 0x4 5 $1 5 0x0 16 $2)] },
	'bnez' => { args => [\&gpr,\&target16], format => [qw(6 0x5 5 $1 5 0x0 16 $2)] },
	'fbeqz' => { args => [\&target16], format => [qw(6 0x6 5 0x0 5 0x0 16 $1)] },
	'fbnez' => { args => [\&target16], format => [qw(6 0x7 5 0x0 5 0x0 16 $1)] },
	'addio' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x8 5 $2 5 $1 16 $3)] },
	'addi'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x9 5 $2 5 $1 16 $3)] },
	'subio' => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xa 5 $2 5 $1 16 $3)] },
	'subi'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xb 5 $2 5 $1 16 $3)] },
	'andi'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xc 5 $2 5 $1 16 $3)] },
	'ori'   => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xd 5 $2 5 $1 16 $3)] },
	'xori'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0xe 5 $2 5 $1 16 $3)] },
	'seqi'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1a 5 $2 5 $1 16 $3)] },
	'snei'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1d 5 $2 5 $1 16 $3)] },
	'slti'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1c 5 $2 5 $1 16 $3)] },
	'sgti'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x19 5 $2 5 $1 16 $3)] },
	'slei'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1e 5 $2 5 $1 16 $3)] },
	'sgei'  => { args => [\&gpr,\&gpr,\&imm16], format => [qw(6 0x1b 5 $2 5 $1 16 $3)] },
	'lhi'  => { args => [\&gpr,\&imm16], format => [qw(6 0xf 5 0x0 5 $1 16 $2)] },
	'rfe' => { args => [], format => [qw(6 0x3f 26 0x0)] },
	'nop' => 'sra r1, r1, r0',	# symbolic replacement!
	'sync' => 'movs2i r0, ieeef',	# symbolic replacement!
	'trap' => { args => [\&imm26], format => [qw(6 0x3e 26 $1)] },
	'jr' => { args => [\&gpr], format => [qw(6 0x16 5 $1 21 0x0)] },
	'jalr' => { args => [\&gpr], format => [qw(6 0x17 5 $1 21 0x0)] },
	'slli' => { args => [\&gpr,\&gpr,\&imm5], format => [qw(6 0x0 5 $2 5 0x0 5 $1 5 $3 6 0x0)] },
	'srli' => { args => [\&gpr,\&gpr,\&imm5], format => [qw(6 0x0 5 $2 5 0x0 5 $1 5 $3 6 0x2)] },
	'srai' => { args => [\&gpr,\&gpr,\&imm5], format => [qw(6 0x0 5 $2 5 0x0 5 $1 5 $3 6 0x3)] },
	'lb' => { args => [\&gpr,\&ea], format => [qw(6 0x20 5 $2 5 $1 16 $3)] },
	'lh' => { args => [\&gpr,\&ea], format => [qw(6 0x21 5 $2 5 $1 16 $3)] },
	'lw' => { args => [\&gpr,\&ea], format => [qw(6 0x23 5 $2 5 $1 16 $3)] },
	'lbu' => { args => [\&gpr,\&ea], format => [qw(6 0x24 5 $2 5 $1 16 $3)] },
	'lhu' => { args => [\&gpr,\&ea], format => [qw(6 0x25 5 $2 5 $1 16 $3)] },
	'loads' => { args => [\&fprs,\&ea], format => [qw(6 0x31 5 $2 5 $1 16 $3)] },
	'loadd' => { args => [\&fprd,\&ea], format => [qw(6 0x35 5 $2 5 $1 16 $3)] },
	'sb' => { args => [\&ea,\&gpr], format => [qw(6 0x28 5 $1 5 $3 16 $2)] },
	'sh' => { args => [\&ea,\&gpr], format => [qw(6 0x29 5 $1 5 $3 16 $2)] },
	'sw' => { args => [\&ea,\&gpr], format => [qw(6 0x2b 5 $1 5 $3 16 $2)] },
	'stores' => { args => [\&ea,\&fprs], format => [qw(6 0x39 5 $1 5 $3 16 $2)] },
	'stored' => { args => [\&ea,\&fprd], format => [qw(6 0x3d 5 $1 5 $3 16 $2)] },
);

# TODO brute force, should be changed probably
my %BYTES;
sub check_seg_overlap_and_advance {
	return unless $options{pedantic};
	my ($start,$len) = @_;
	for ($start..($start+$len-1)) {
		die "?Segment overlap / double address use in line $. at address $_\n" if exists $BYTES{$_};
		$BYTES{$_} = $cursegment;
	}
}

%segments = (
	t => { start => undef, end => undef, address => 0 },
	d => { start => undef, end => undef, address => 0 },
);

$cursegment = 't';

open SRC or pod2usage("?Can't open `$SRC' for input: $!");

my %reportmultlabel;

while(<SRC>) {
	chomp;
	s/^\s+//; # remove leading whitespace
	s/\s*;.*// unless substr($_,0,6) eq '.ascii'; # remove comments
	next if /^$/; # skip empty lines
	if( /^([_a-zA-Z]\w*):\s*(.+)?/ ) {	# label
		warn "?Trailing garbage in line $.\n" if defined $2;
		warn "?Multiply defined label / symbol `$1' in line $. (reported only once)\n" if defined $label{$1} and !$reportmultlabel{$1}++;
		$label{$1} = $segments{$cursegment}->{address};
	} elsif( /^([a-zA-Z]\w*)\b/ ) {	# instruction
		die "Instructions not allowed in data segment (at line $.)\n" if $cursegment eq 'd';
		die "Unknown instruction in line $.\n" unless exists $isa{$1};
		die "Misaligned instruction in line $.\n" if $segments{$cursegment}->{address} & 0x3 and $options{pedantic};
		push @D, [$segments{$cursegment}->{address},$_,$.];	# push for later processing
		check_seg_overlap_and_advance($segments{$cursegment}->{address},4);
		$segments{$cursegment}->{address} +=4;
	} elsif( /^\.set\b\s+([_a-zA-Z]\w*)\s*,\s*(.+)/ ) {	
		die "?multiply defined label / symbol `$1' in line $.\n" if defined $label{$1};
		$label{$1} = imm($2,$segments{$cursegment}->{address},$.);
	} elsif( /^\.(text|data)\b(?:\s+(\S+))?\s*(.+)?/ ) {	
		warn "?Trailing garbage in line $.\n" if defined $3;
		# save current end address
		if( $segments{$cursegment}->{address} > $segments{$cursegment}->{end} ) {
			$segments{$cursegment}->{end} = $segments{$cursegment}->{address};
		}
		#my $arg = eval $2;
		# TODO correct?
		my $arg = imm($2,$segments{$cursegment}->{address},$.);
		$cursegment = substr $1,0,1;
		if( defined $2 ) {
			# initialize start if necessary, set address
			if( $arg >= $segments{$cursegment}->{start} and $segments{$cursegment}->{end} > $arg ) {
				warn "?Possible segment overlap in line $.\n";
			}
			$segments{$cursegment}->{address} = $arg;
			$segments{$cursegment}->{start} ||= $arg;
		}
	} elsif( /^\.((end)?proc|global)/ ) {
		# not supported
	} elsif( /^\.space\b(?:\s+(\S+))?\s*(.+)?/ ) {
		die ".space not allowed in text segment (at line $.)\n" if $cursegment eq 't';
		die ".space without argument (at line $.)\n" unless defined $1;
		warn "?Trailing garbage in line $.\n" if defined $2;
		$segments{$cursegment}->{address} += imm($1,$segments{$cursegment}->{address},$.);
	} elsif( /^\.(?:ascii(z)?)(?:\s+(.*))?/ ) {
		die ".ascii$1 without argument (at line $.)\n" unless defined $2;
		$out = pack 'a*' . ($1 && 'x'), eval $2; 
		# pad out to multiple of 4 bytes
		$out .= "\0" x ((-length($out)) % 4) if $options{meyerpad};
		# reverse all groups of 4 bytes (for endianness?)
		$out =~ s/(.)(.)(.)(.)/$4$3$2$1/gs;	# /s is important
		push @D, [$segments{$cursegment}->{address},$out];	# push for later processing
		check_seg_overlap_and_advance($segments{$cursegment}->{address},length $out);
		$segments{$cursegment}->{address} += length $out;
	} elsif( /^\.(byte|word|half)\s*(.+)?/ ) {
		die ".$1 without argument (at line $.)\n" unless defined $2;
		my $s = $2;
		$s = 1 + $s =~ tr/,/,/;	# count arguments
		if( $1 eq 'half' ) {
			die "Misaligned half word(s) in line $.\n" if $segments{$cursegment}->{address} & 0x1 and $options{pedantic};
			push @D, [$segments{$cursegment}->{address},'n*',$2,666];
			check_seg_overlap_and_advance($segments{$cursegment}->{address},2*$s);
			$segments{$cursegment}->{address} += 2 * $s;
		} elsif( $1 eq 'byte' ) {
			push @D, [$segments{$cursegment}->{address},'C*',$2,666];
			check_seg_overlap_and_advance($segments{$cursegment}->{address},$s);
			$segments{$cursegment}->{address} += $s;
		} else { # word
			die "Misaligned word(s) in line $.\n" if $segments{$cursegment}->{address} & 0x3 and $options{pedantic};
			push @D, [$segments{$cursegment}->{address},'N*',$2,666];
			check_seg_overlap_and_advance($segments{$cursegment}->{address},4*$s);
			$segments{$cursegment}->{address} += 4*$s;
		}
	} elsif( /^\.(float|double)\s*(.+)?/ ) {
		die ".$1 without argument (at line $.)\n" unless defined $2;
		die "Misaligned float(s) in line $.\n"
			if $segments{$cursegment}->{address} & 0x3 and $options{pedantic} and $1 eq 'float';
		die "Misaligned double(s) in line $.\n"
			if $segments{$cursegment}->{address} & 0x7 and $options{pedantic} and $1 eq 'double';
		my @a = split /\s*,\s*/, $2;
		@a = map eval $_, @a;	# evaluate expressions separately
		$out = pack substr($1,0,1).'*', @a;
		push @D, [$segments{$cursegment}->{address},$out];	# push for later processing
		check_seg_overlap_and_advance($segments{$cursegment}->{address},length $out);
		$segments{$cursegment}->{address} += length $out;
	} elsif( /^\.align\b(?:\s+(\S+))?\s*(.+)?/ ) {
		die ".align without argument (at line $.)\n" unless defined $1;
		warn "?Trailing garbage in line $.\n" if defined $2;
		$segments{$cursegment}->{address} += (-($segments{$cursegment}->{address}) % (1<<$1));
	} else {
		warn "?Unknown directive in line $.\n";
	}
}
# update segment endings
for $s (keys %segments) {
	if( $segments{$s}->{address} > $segments{$s}->{end} ) {
		$segments{$s}->{end} = $segments{$s}->{address};
	}
}
printf "start:%08x %08x %08x %08x %08x %08x\n",
	$segments{t}->{start},
	(($segments{t}->{end} > $segments{d}->{end}) ? $segments{t}->{end} : $segments{d}->{end}),
	$segments{t}->{start},
	$segments{t}->{end} - $segments{t}->{start},
	$segments{d}->{start},
	$segments{d}->{end} - $segments{d}->{start};

# spill out data
$nextaddress = -1;
for $d (@D) {
	my ($a,$o);

	if( $#$d==2 ) {	# must be an instruction
		my ($a,$instr,$l) = @$d;
		$_ = $instr;
		s/^(\w+)\s*// or die;
		my $mnemonic = $1;
		# symbolic replacement
		if( ref( $isa{$mnemonic} ) ne 'HASH' ) {
			$_ = $_ ne '' ? "$isa{$mnemonic} $_" : $isa{$mnemonic};	# add space only if needed
			s/^(\w+)\s*// or die;
			$mnemonic = $1;
		}
		my @args = split /\s*,\s*/;
		my @value;
		die "?Wrong number of arguments to instruction in line $l\n" if $#args != $#{$isa{$mnemonic}->{args}};
		# parse arguments
		for (@{$isa{$mnemonic}->{args}}) {
			my $result = $_->(shift(@args),$a,$l);
			if( ref($result) eq 'ARRAY' ) {
				push @value, @{$result};
			} elsif( ref($result) eq '' ) {
				push @value, $result;
			} else {
				die "Argument without value / error checking... giving up\n";
			}
		}
		# pack the instruction
		my $iw = 0x00000000;
		for $i (0..$#{$isa{$mnemonic}->{format}}) {
			next if $i & 1;	# process this array in pairs
			my $bw = $isa{$mnemonic}->{format}->[$i];
			my $d = $isa{$mnemonic}->{format}->[$i+1];
			# get the data
			if( substr($d,0,1) eq '$' ) {
				$d = $value[substr($d,1)-1];
			} else {
				$d = eval $d;
			}
			if( $bw >= 32 ) { $d &= 0xffffffff; } else { $d &= ((1<<$bw) - 1); };
			# or it to the current instruction word
			$iw <<= $bw if $i;
			$iw |= $d;
		}
		# push the instruction word
		$d->[1] = pack "N",$iw;
	} elsif( $#$d==3 ) {	# must be half|byte|word
		# very, very ugly
		my @a = map imm($_,$d->[0],0), split /\s*,\s*/, $d->[2];
		# TODO what about the address ^ here ?
		$d->[1] = pack $d->[1], @a;
	}

	($a,$o) = @$d;
	printf "%08x", $a if($a!=$nextaddress);
	my $data = unpack 'H*', $o;
	for (my $j = 0; $j < length ($data); $j += 8) {
		print ':' . substr ($data,$j,8) . "\n";
	}
	$nextaddress = $a + length($o);
}
# output symbol table
if( exists $options{symboltable} ) {
	open O, ">$options{symboltable}" or die "?Can't open `$options{symboltable}' for symbol table output: $!";
	for ( sort { $label{$a} <=> $label{$b} } keys %label ) {
		printf O "%s 0x%08x\n", $_, $label{$_};
	}
}

if( $options{verbose} ) {
	# spill out the segment table
	print STDERR "Sorted segment table:\n";
	my $start = 0;
	my $length;
	my $type;
	for ((sort { $a <=> $b } keys %BYTES),-1) {
		if($type eq $BYTES{$_} and $_==$start+$length) {
			# continuation, type did not change and address continues
			$length++;
		} else {
			# spill out seg
			printf STDERR "0x%08x - 0x%08x (0x%08x bytes): %s\n",
				$start, $start + $length - 1, $length, $type if defined $type;
			$start = $_;	# set new start
			$length = 1;
			$type = $BYTES{$_}; # remember type
		}
	}
}
__END__
=head1 NAME

mahdlxasm - Assemble DLX (VAMP) to object code

=head1 SYNOPSIS

B<mahdlxasm>
[B<--help>]
[B<--output> I<object file>]
[B<--pedantic>]
[B<--symboltable> I<symbol file>]
[B<--verbose>]
I<assembler file>

=head1 DESCRIPTION

mahdlxasm takes a DLX (VAMP) assembler in GNU assembler notation as input and assembles it into a (custom format) object code file.

=head1 OPTIONS

=over 4

=item B<--help>

Print help on options.

=item B<--output> I<object file>, B<-o> I<object file>

Place output in I<object file> instead of printing it on standard output.

=item B<--meyerpad>

Zero-pad strings generated by the I<.ascii> and I<.asciiz> directives up multiple of 4 bytes.

=item B<--pedantic>

Switch on more, possibly time-consuming, sanity checks for the assembler sources.

=item B<--symboltable> I<symbol file>, B<--st> I<symbol file>

Output all symbols sorted by value into I<symbol file>.

=item B<--verbose>, B<-v>

Increase verbosity level.
Verbosity implies pedantry.

=head1 CAVEATS

Not all things that work in the GNU assembler work here;
features have been added on an as-needed basis rather than being complete.
For example, there are no macro defines currently.

Expressions are currently almost fully-fledged perl expressions.
That is subject to change.

Despite its name this program assembles VAMP instructions
which are almost, but not quite, entirely unlike DLX assembler.
So, it is not advisable to process DLX assembly language with this program.

=head1 AUTHOR

Mark A. Hillebrand <mah at cs.uni-sb.de>

=head1 SEE ALSO

as(1),
L<http://www-wjp.cs.uni-sb.de/forschung/projekte/VAMP/>