@ libgcc1 routines for ARM cpu.
@ Division and remainder, from Appendix E of the Sparc Version 8
@ Architecture Manual, with fixes from Gordon Irlam.
@ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org)

/* Copyright (C) 1995 Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file with other programs, and to distribute
those programs without any restriction coming from the use of this
file.  (The General Public License restrictions do apply in other
respects; for example, they cover modification of the file, and
distribution when not linked into another program.)

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

/* As a special exception, if you link this library with other files,
   some of which are compiled with GCC, to produce an executable,
   this library does not by itself cause the resulting executable
   to be covered by the GNU General Public License.
   This exception does not however invalidate any other reasons why
   the executable file might be covered by the GNU General Public License.  */

/*
 * Input: dividend and divisor in r0 and r1 respectively.
 *
 * m4 parameters:
 *  NAME	name of function to generate
 *  OP		OP=div => r0 / r1; OP=mod => r0 % r1
 *  S		S=true => signed; S=false => unsigned
 *
 * Algorithm parameters:
 *  N		how many bits per iteration we try to get (4)
 *  WORDSIZE	total number of bits (32)
 *
 * Derived constants:
 *  TOPBITS	number of bits in the top `decade' of a number
 *
 * Important variables:
 *  Q		the partial quotient under development (initially 0)
 *  R		the remainder so far, initially the dividend
 *  ITER	number of main division loop iterations required;
 *		equal to ceil(log2(quotient) / N).  Note that this
 *		is the log base (2^N) of the quotient.
 *  V		the current comparand, initially divisor*2^(ITER*N-1)
 *
 * Cost:
 *  Current estimate for non-large dividend is
 *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
 *  different path, as the upper bits of the quotient must be developed
 *  one bit at a time.
 */

/*
define(N, `4')dnl
define(WORDSIZE, `32')dnl
define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
dnl
define(dividend, `r0')dnl
define(divisor, `r1')dnl
define(Q, `r2')dnl
define(R, `r3')dnl
define(ITER, `ip')dnl
define(V, `lr')dnl
dnl
dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
define(T, `r4')dnl
define(SC, `r5')dnl
ifelse(S, `true', `define(SIGN, `r6')')dnl
define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl
define(ret, `ldmia	sp!, REGLIST pc}')dnl
dnl
dnl This is the recursive definition for developing quotient digits.
dnl
dnl Parameters:
dnl  $1	the current depth, 1 <= $1 <= N
dnl  $2	the current accumulation of quotient bits
dnl  N	max depth
dnl
dnl We add a new bit to $2 and either recurse or insert the bits in
dnl the quotient.  R, Q, and V are inputs and outputs as defined above;
dnl the condition codes are expected to reflect the input R, and are
dnl modified to reflect the output R.
dnl
define(DEVELOP_QUOTIENT_BITS,
`	@ depth $1, accumulated bits $2
	mov	V, V, lsr #1
	blt	L.$1.eval(2^N+$2+999)
	@ remainder is positive
	subs	R, R, V
	ifelse($1, N,
	`	ifelse(eval(2*$2+1<0), `0',
		`add	Q, Q, `#'eval($2*2+1)',
		`sub	Q, Q, `#'eval(-($2*2+1))')

		b	9f
	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
L.$1.eval(2^N+$2+999):
	@ remainder is negative
	adds	R, R, V
	ifelse($1, N,
	`	ifelse(eval(2*$2-1<0), `0',
		`add	Q, Q, `#'eval($2*2-1)',
		`sub	Q, Q, `#'eval(-($2*2-1))')
		b	9f

	', `	DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
	ifelse($1, 1, `9:')')dnl

#include "trap.h"

ip	.req	r12
sp	.req	r13
lr	.req	r14
pc	.req	r15
.text
	.globl NAME
	.align 0
NAME:
	stmdb	sp!, REGLIST lr}
ifelse(S, `true',
`	@ compute sign of result; if neither is negative, no problem
	ifelse(OP, `div', `eor	SIGN, divisor, dividend	@ compute sign',
		`mov	SIGN, dividend')
	cmp	divisor, #0
	rsbmi	divisor, divisor, #0
	beq	Ldiv_zero
	mov	V, divisor
	movs	R, dividend
	rsbmi	R, R, #0	@ make dividend nonnegative
',
`	@ Ready to divide.  Compute size of quotient; scale comparand.
	movs	V, divisor
	mov	R, dividend
	beq	Ldiv_zero
')

	cmp	R, V			@ if divisor exceeds dividend, done
	mov	Q, #0
	bcc	Lgot_result		@ (and algorithm fails otherwise)
	mov	T, `#'(1 << (WORDSIZE - TOPBITS - 1))
	cmp	R, T
	mov	ITER, #0
	bcc	Lnot_really_big

	@ `Here the dividend is >= 2^(31-N) or so.  We must be careful here,
	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
	@ Compute ITER in an unorthodox manner: know we need to shift V into
	@ the top decade: so do not even bother to compare to R.'
		mov	SC, #1
	1:
		cmp	V, T
		bcs	3f
		mov	V, V, lsl `#'N
		add	ITER, ITER, #1
		b	1b

	@ Now compute SC.
	2:	adds	V, V, V
		add	SC, SC, #1
		bcc	Lnot_too_big

		@ We get here if the divisor overflowed while shifting.
		@ This means that R has the high-order bit set.
		@ Restore V and subtract from R.
		mov	T, T, lsl `#'TOPBITS
		mov	V, V, lsr #1
		add	V, T, V
		sub	SC, SC, #1
		b	Ldo_single_div

	Lnot_too_big:
	3:	cmp	V, R
		bcc	2b
@		beq	Ldo_single_div

	/-* NB: these are commented out in the V8-Sparc manual as well *-/
	/-* (I do not understand this) *-/
	@ V > R: went too far: back up 1 step
	@	srl	V, 1, V
	@	dec	SC
	@ do single-bit divide steps
	@
	@ We have to be careful here.  We know that R >= V, so we can do the
	@ first divide step without thinking.  BUT, the others are conditional,
	@ and are only done if R >= 0.  Because both R and V may have the high-
	@ order bit set in the first step, just falling into the regular
	@ division loop will mess up the first time around.
	@ So we unroll slightly...
	Ldo_single_div:
		subs	SC, SC, #1
		blt	Lend_regular_divide
		sub	R, R, V
		mov	Q, #1
		b	Lend_single_divloop
	Lsingle_divloop:
		cmp	R, #0
		mov	Q, Q, lsl #1
		mov	V, V, lsr #1
		@ R >= 0
		subpl	R, R, V
		addpl	Q, Q, #1
		@ R < 0
		addmi	R, R, V
		submi	Q, Q, #1
	Lend_single_divloop:
		subs	SC, SC, #1
		bge	Lsingle_divloop
		b	Lend_regular_divide

1:
	add	ITER, ITER, #1
Lnot_really_big:
	mov	V, V, lsl `#'N
	cmp	V, R
	bls	1b
	@
	@	HOW CAN ITER EVER BE -1 HERE ?????
	@
	cmn	ITER, #1
	beq	Lgot_result

Ldivloop:
	cmp	R, #0	@ set up for initial iteration
	mov	Q, Q, lsl `#'N
	DEVELOP_QUOTIENT_BITS(1, 0)
Lend_regular_divide:
	subs	ITER, ITER, #1
	bge	Ldivloop
	cmp	R, #0
	@ non-restoring fixup here (one instruction only!)
ifelse(OP, `div',
`	sublt	Q, Q, #1
', `	addlt	R, divisor, R
')

Lgot_result:
ifelse(S, `true',
`	@ check to see if answer should be < 0
	cmp	SIGN, #0
	ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0')
')
	ifelse(OP, `div', `mov r0, Q', `mov r0, R')
	ret

Ldiv_zero:
	@ Divide by zero trap.  If it returns, return 0 (about as
	@ wrong as possible, but that is what SunOS does...).
	bl	___div0
	mov	r0, #0
	ret
*/

#ifdef L_udivsi3

ip	.req	r12
sp	.req	r13
lr	.req	r14
pc	.req	r15
.text
	.globl ___udivsi3
	.align 0
___udivsi3:
	stmdb	sp!, {r4, r5, lr}
	@ Ready to divide.  Compute size of quotient; scale comparand.
	movs	lr, r1
	mov	r3, r0
	beq	Ldiv_zero


	cmp	r3, lr			@ if r1 exceeds r0, done
	mov	r2, #0
	bcc	Lgot_result		@ (and algorithm fails otherwise)
	mov	r4, #(1 << (32 - 4 - 1))
	cmp	r3, r4
	mov	ip, #0
	bcc	Lnot_really_big

	@ Here the dividend is >= 2^(31-N) or so.  We must be careful here,
	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
	@ Compute ITER in an unorthodox manner: know we need to shift V into
	@ the top decade: so do not even bother to compare to R.
		mov	r5, #1
	1:
		cmp	lr, r4
		bcs	3f
		mov	lr, lr, lsl #4
		add	ip, ip, #1
		b	1b

	@ Now compute r5.
	2:	adds	lr, lr, lr
		add	r5, r5, #1
		bcc	Lnot_too_big

		@ We get here if the r1 overflowed while shifting.
		@ This means that r3 has the high-order bit set.
		@ Restore lr and subtract from r3.
		mov	r4, r4, lsl #4
		mov	lr, lr, lsr #1
		add	lr, r4, lr
		sub	r5, r5, #1
		b	Ldo_single_div

	Lnot_too_big:
	3:	cmp	lr, r3
		bcc	2b
@		beq	Ldo_single_div

	/* NB: these are commented out in the V8-Sparc manual as well */
	/* (I do not understand this) */
	@ lr > r3: went too far: back up 1 step
	@	srl	lr, 1, lr
	@	dec	r5
	@ do single-bit divide steps
	@
	@ We have to be careful here.  We know that r3 >= lr, so we can do the
	@ first divide step without thinking.  BUT, the others are conditional,
	@ and are only done if r3 >= 0.  Because both r3 and lr may have the high-
	@ order bit set in the first step, just falling into the regular
	@ division loop will mess up the first time around.
	@ So we unroll slightly...
	Ldo_single_div:
		subs	r5, r5, #1
		blt	Lend_regular_divide
		sub	r3, r3, lr
		mov	r2, #1
		b	Lend_single_divloop
	Lsingle_divloop:
		cmp	r3, #0
		mov	r2, r2, lsl #1
		mov	lr, lr, lsr #1
		@ r3 >= 0
		subpl	r3, r3, lr
		addpl	r2, r2, #1
		@ r3 < 0
		addmi	r3, r3, lr
		submi	r2, r2, #1
	Lend_single_divloop:
		subs	r5, r5, #1
		bge	Lsingle_divloop
		b	Lend_regular_divide

1:
	add	ip, ip, #1
Lnot_really_big:
	mov	lr, lr, lsl #4
	cmp	lr, r3
	bls	1b
	@
	@	HOW CAN ip EVER BE -1 HERE ?????
	@
	cmn	ip, #1
	beq	Lgot_result

Ldivloop:
	cmp	r3, #0	@ set up for initial iteration
	mov	r2, r2, lsl #4
		@ depth 1, accumulated bits 0
	mov	lr, lr, lsr #1
	blt	L.1.1015
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 2, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.2.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.3.1018
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 7
	mov	lr, lr, lsr #1
	blt	L.4.1022
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #15

		b	9f
	
L.4.1022:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #13
		b	9f

	
L.3.1018:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 5
	mov	lr, lr, lsr #1
	blt	L.4.1020
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #11

		b	9f
	
L.4.1020:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #9
		b	9f

	
L.2.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.3.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.4.1018
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #7

		b	9f
	
L.4.1018:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #5
		b	9f

	
L.3.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.4.1016
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #3

		b	9f
	
L.4.1016:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #1
		b	9f

	
L.1.1015:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 2, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.2.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.3.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.4.1014
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #1

		b	9f
	
L.4.1014:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #3
		b	9f

	
L.3.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.4.1012
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #5

		b	9f
	
L.4.1012:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #7
		b	9f

	
L.2.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.3.1012
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -5
	mov	lr, lr, lsr #1
	blt	L.4.1010
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #9

		b	9f
	
L.4.1010:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #11
		b	9f

	
L.3.1012:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -7
	mov	lr, lr, lsr #1
	blt	L.4.1008
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #13

		b	9f
	
L.4.1008:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #15
		b	9f

	
	9:
Lend_regular_divide:
	subs	ip, ip, #1
	bge	Ldivloop
	cmp	r3, #0
	@ non-restoring fixup here (one instruction only!)
	sublt	r2, r2, #1


Lgot_result:

	mov r0, r2
	ldmia	sp!, {r4, r5, pc}

Ldiv_zero:
	@ Divide by zero trap.  If it returns, return 0 (about as
	@ wrong as possible, but that is what SunOS does...).
	bl	___div0
	mov	r0, #0
	ldmia	sp!, {r4, r5, pc}

#endif /* L_udivsi3 */

#ifdef L_divsi3

ip	.req	r12
sp	.req	r13
lr	.req	r14
pc	.req	r15
.text
	.globl ___divsi3
	.align 0
___divsi3:
	stmdb	sp!, {r4, r5, r6, lr}
	@ compute sign of result; if neither is negative, no problem
	eor	r6, r1, r0	@ compute sign
	cmp	r1, #0
	rsbmi	r1, r1, #0
	beq	Ldiv_zero
	mov	lr, r1
	movs	r3, r0
	rsbmi	r3, r3, #0	@ make dividend nonnegative


	cmp	r3, lr			@ if r1 exceeds r0, done
	mov	r2, #0
	bcc	Lgot_result		@ (and algorithm fails otherwise)
	mov	r4, #(1 << (32 - 4 - 1))
	cmp	r3, r4
	mov	ip, #0
	bcc	Lnot_really_big

	@ Here the dividend is >= 2^(31-N) or so.  We must be careful here,
	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
	@ Compute ITER in an unorthodox manner: know we need to shift V into
	@ the top decade: so do not even bother to compare to R.
		mov	r5, #1
	1:
		cmp	lr, r4
		bcs	3f
		mov	lr, lr, lsl #4
		add	ip, ip, #1
		b	1b

	@ Now compute r5.
	2:	adds	lr, lr, lr
		add	r5, r5, #1
		bcc	Lnot_too_big

		@ We get here if the r1 overflowed while shifting.
		@ This means that r3 has the high-order bit set.
		@ Restore lr and subtract from r3.
		mov	r4, r4, lsl #4
		mov	lr, lr, lsr #1
		add	lr, r4, lr
		sub	r5, r5, #1
		b	Ldo_single_div

	Lnot_too_big:
	3:	cmp	lr, r3
		bcc	2b
@		beq	Ldo_single_div

	/* NB: these are commented out in the V8-Sparc manual as well */
	/* (I do not understand this) */
	@ lr > r3: went too far: back up 1 step
	@	srl	lr, 1, lr
	@	dec	r5
	@ do single-bit divide steps
	@
	@ We have to be careful here.  We know that r3 >= lr, so we can do the
	@ first divide step without thinking.  BUT, the others are conditional,
	@ and are only done if r3 >= 0.  Because both r3 and lr may have the high-
	@ order bit set in the first step, just falling into the regular
	@ division loop will mess up the first time around.
	@ So we unroll slightly...
	Ldo_single_div:
		subs	r5, r5, #1
		blt	Lend_regular_divide
		sub	r3, r3, lr
		mov	r2, #1
		b	Lend_single_divloop
	Lsingle_divloop:
		cmp	r3, #0
		mov	r2, r2, lsl #1
		mov	lr, lr, lsr #1
		@ r3 >= 0
		subpl	r3, r3, lr
		addpl	r2, r2, #1
		@ r3 < 0
		addmi	r3, r3, lr
		submi	r2, r2, #1
	Lend_single_divloop:
		subs	r5, r5, #1
		bge	Lsingle_divloop
		b	Lend_regular_divide

1:
	add	ip, ip, #1
Lnot_really_big:
	mov	lr, lr, lsl #4
	cmp	lr, r3
	bls	1b
	@
	@	HOW CAN ip EVER BE -1 HERE ?????
	@
	cmn	ip, #1
	beq	Lgot_result

Ldivloop:
	cmp	r3, #0	@ set up for initial iteration
	mov	r2, r2, lsl #4
		@ depth 1, accumulated bits 0
	mov	lr, lr, lsr #1
	blt	L.1.1015
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 2, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.2.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.3.1018
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 7
	mov	lr, lr, lsr #1
	blt	L.4.1022
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #15

		b	9f
	
L.4.1022:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #13
		b	9f

	
L.3.1018:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 5
	mov	lr, lr, lsr #1
	blt	L.4.1020
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #11

		b	9f
	
L.4.1020:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #9
		b	9f

	
L.2.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.3.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.4.1018
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #7

		b	9f
	
L.4.1018:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #5
		b	9f

	
L.3.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.4.1016
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #3

		b	9f
	
L.4.1016:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #1
		b	9f

	
L.1.1015:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 2, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.2.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.3.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.4.1014
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #1

		b	9f
	
L.4.1014:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #3
		b	9f

	
L.3.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.4.1012
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #5

		b	9f
	
L.4.1012:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #7
		b	9f

	
L.2.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.3.1012
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -5
	mov	lr, lr, lsr #1
	blt	L.4.1010
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #9

		b	9f
	
L.4.1010:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #11
		b	9f

	
L.3.1012:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -7
	mov	lr, lr, lsr #1
	blt	L.4.1008
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #13

		b	9f
	
L.4.1008:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #15
		b	9f

	
	9:
Lend_regular_divide:
	subs	ip, ip, #1
	bge	Ldivloop
	cmp	r3, #0
	@ non-restoring fixup here (one instruction only!)
	sublt	r2, r2, #1


Lgot_result:
	@ check to see if answer should be < 0
	cmp	r6, #0
	rsbmi r2, r2, #0

	mov r0, r2
	ldmia	sp!, {r4, r5, r6, pc}

Ldiv_zero:
	@ Divide by zero trap.  If it returns, return 0 (about as
	@ wrong as possible, but that is what SunOS does...).
	bl	___div0
	mov	r0, #0
	ldmia	sp!, {r4, r5, r6, pc}

#endif /* L_divsi3 */

#ifdef L_umodsi3

ip	.req	r12
sp	.req	r13
lr	.req	r14
pc	.req	r15
.text
	.globl ___umodsi3
	.align 0
___umodsi3:
	stmdb	sp!, {r4, r5, lr}
	@ Ready to divide.  Compute size of quotient; scale comparand.
	movs	lr, r1
	mov	r3, r0
	beq	Ldiv_zero


	cmp	r3, lr			@ if r1 exceeds r0, done
	mov	r2, #0
	bcc	Lgot_result		@ (and algorithm fails otherwise)
	mov	r4, #(1 << (32 - 4 - 1))
	cmp	r3, r4
	mov	ip, #0
	bcc	Lnot_really_big

	@ Here the dividend is >= 2^(31-N) or so.  We must be careful here,
	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
	@ Compute ITER in an unorthodox manner: know we need to shift V into
	@ the top decade: so do not even bother to compare to R.
		mov	r5, #1
	1:
		cmp	lr, r4
		bcs	3f
		mov	lr, lr, lsl #4
		add	ip, ip, #1
		b	1b

	@ Now compute r5.
	2:	adds	lr, lr, lr
		add	r5, r5, #1
		bcc	Lnot_too_big

		@ We get here if the r1 overflowed while shifting.
		@ This means that r3 has the high-order bit set.
		@ Restore lr and subtract from r3.
		mov	r4, r4, lsl #4
		mov	lr, lr, lsr #1
		add	lr, r4, lr
		sub	r5, r5, #1
		b	Ldo_single_div

	Lnot_too_big:
	3:	cmp	lr, r3
		bcc	2b
@		beq	Ldo_single_div

	/* NB: these are commented out in the V8-Sparc manual as well */
	/* (I do not understand this) */
	@ lr > r3: went too far: back up 1 step
	@	srl	lr, 1, lr
	@	dec	r5
	@ do single-bit divide steps
	@
	@ We have to be careful here.  We know that r3 >= lr, so we can do the
	@ first divide step without thinking.  BUT, the others are conditional,
	@ and are only done if r3 >= 0.  Because both r3 and lr may have the high-
	@ order bit set in the first step, just falling into the regular
	@ division loop will mess up the first time around.
	@ So we unroll slightly...
	Ldo_single_div:
		subs	r5, r5, #1
		blt	Lend_regular_divide
		sub	r3, r3, lr
		mov	r2, #1
		b	Lend_single_divloop
	Lsingle_divloop:
		cmp	r3, #0
		mov	r2, r2, lsl #1
		mov	lr, lr, lsr #1
		@ r3 >= 0
		subpl	r3, r3, lr
		addpl	r2, r2, #1
		@ r3 < 0
		addmi	r3, r3, lr
		submi	r2, r2, #1
	Lend_single_divloop:
		subs	r5, r5, #1
		bge	Lsingle_divloop
		b	Lend_regular_divide

1:
	add	ip, ip, #1
Lnot_really_big:
	mov	lr, lr, lsl #4
	cmp	lr, r3
	bls	1b
	@
	@	HOW CAN ip EVER BE -1 HERE ?????
	@
	cmn	ip, #1
	beq	Lgot_result

Ldivloop:
	cmp	r3, #0	@ set up for initial iteration
	mov	r2, r2, lsl #4
		@ depth 1, accumulated bits 0
	mov	lr, lr, lsr #1
	blt	L.1.1015
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 2, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.2.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.3.1018
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 7
	mov	lr, lr, lsr #1
	blt	L.4.1022
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #15

		b	9f
	
L.4.1022:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #13
		b	9f

	
L.3.1018:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 5
	mov	lr, lr, lsr #1
	blt	L.4.1020
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #11

		b	9f
	
L.4.1020:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #9
		b	9f

	
L.2.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.3.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.4.1018
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #7

		b	9f
	
L.4.1018:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #5
		b	9f

	
L.3.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.4.1016
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #3

		b	9f
	
L.4.1016:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #1
		b	9f

	
L.1.1015:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 2, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.2.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.3.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.4.1014
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #1

		b	9f
	
L.4.1014:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #3
		b	9f

	
L.3.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.4.1012
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #5

		b	9f
	
L.4.1012:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #7
		b	9f

	
L.2.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.3.1012
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -5
	mov	lr, lr, lsr #1
	blt	L.4.1010
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #9

		b	9f
	
L.4.1010:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #11
		b	9f

	
L.3.1012:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -7
	mov	lr, lr, lsr #1
	blt	L.4.1008
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #13

		b	9f
	
L.4.1008:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #15
		b	9f

	
	9:
Lend_regular_divide:
	subs	ip, ip, #1
	bge	Ldivloop
	cmp	r3, #0
	@ non-restoring fixup here (one instruction only!)
	addlt	r3, r1, r3


Lgot_result:

	mov r0, r3
	ldmia	sp!, {r4, r5, pc}

Ldiv_zero:
	@ Divide by zero trap.  If it returns, return 0 (about as
	@ wrong as possible, but that is what SunOS does...).
	bl	___div0
	mov	r0, #0
	ldmia	sp!, {r4, r5, pc}

#endif /* L_umodsi3 */

#ifdef L_modsi3

ip	.req	r12
sp	.req	r13
lr	.req	r14
pc	.req	r15
.text
	.globl ___modsi3
	.align 0
___modsi3:
	stmdb	sp!, {r4, r5, r6, lr}
	@ compute sign of result; if neither is negative, no problem
	mov	r6, r0
	cmp	r1, #0
	rsbmi	r1, r1, #0
	beq	Ldiv_zero
	mov	lr, r1
	movs	r3, r0
	rsbmi	r3, r3, #0	@ make dividend nonnegative


	cmp	r3, lr			@ if r1 exceeds r0, done
	mov	r2, #0
	bcc	Lgot_result		@ (and algorithm fails otherwise)
	mov	r4, #(1 << (32 - 4 - 1))
	cmp	r3, r4
	mov	ip, #0
	bcc	Lnot_really_big

	@ Here the dividend is >= 2^(31-N) or so.  We must be careful here,
	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
	@ Compute ITER in an unorthodox manner: know we need to shift V into
	@ the top decade: so do not even bother to compare to R.
		mov	r5, #1
	1:
		cmp	lr, r4
		bcs	3f
		mov	lr, lr, lsl #4
		add	ip, ip, #1
		b	1b

	@ Now compute r5.
	2:	adds	lr, lr, lr
		add	r5, r5, #1
		bcc	Lnot_too_big

		@ We get here if the r1 overflowed while shifting.
		@ This means that r3 has the high-order bit set.
		@ Restore lr and subtract from r3.
		mov	r4, r4, lsl #4
		mov	lr, lr, lsr #1
		add	lr, r4, lr
		sub	r5, r5, #1
		b	Ldo_single_div

	Lnot_too_big:
	3:	cmp	lr, r3
		bcc	2b
@		beq	Ldo_single_div

	/* NB: these are commented out in the V8-Sparc manual as well */
	/* (I do not understand this) */
	@ lr > r3: went too far: back up 1 step
	@	srl	lr, 1, lr
	@	dec	r5
	@ do single-bit divide steps
	@
	@ We have to be careful here.  We know that r3 >= lr, so we can do the
	@ first divide step without thinking.  BUT, the others are conditional,
	@ and are only done if r3 >= 0.  Because both r3 and lr may have the high-
	@ order bit set in the first step, just falling into the regular
	@ division loop will mess up the first time around.
	@ So we unroll slightly...
	Ldo_single_div:
		subs	r5, r5, #1
		blt	Lend_regular_divide
		sub	r3, r3, lr
		mov	r2, #1
		b	Lend_single_divloop
	Lsingle_divloop:
		cmp	r3, #0
		mov	r2, r2, lsl #1
		mov	lr, lr, lsr #1
		@ r3 >= 0
		subpl	r3, r3, lr
		addpl	r2, r2, #1
		@ r3 < 0
		addmi	r3, r3, lr
		submi	r2, r2, #1
	Lend_single_divloop:
		subs	r5, r5, #1
		bge	Lsingle_divloop
		b	Lend_regular_divide

1:
	add	ip, ip, #1
Lnot_really_big:
	mov	lr, lr, lsl #4
	cmp	lr, r3
	bls	1b
	@
	@	HOW CAN ip EVER BE -1 HERE ?????
	@
	cmn	ip, #1
	beq	Lgot_result

Ldivloop:
	cmp	r3, #0	@ set up for initial iteration
	mov	r2, r2, lsl #4
		@ depth 1, accumulated bits 0
	mov	lr, lr, lsr #1
	blt	L.1.1015
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 2, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.2.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.3.1018
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 7
	mov	lr, lr, lsr #1
	blt	L.4.1022
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #15

		b	9f
	
L.4.1022:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #13
		b	9f

	
L.3.1018:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 5
	mov	lr, lr, lsr #1
	blt	L.4.1020
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #11

		b	9f
	
L.4.1020:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #9
		b	9f

	
L.2.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.3.1016
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits 3
	mov	lr, lr, lsr #1
	blt	L.4.1018
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #7

		b	9f
	
L.4.1018:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #5
		b	9f

	
L.3.1016:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits 1
	mov	lr, lr, lsr #1
	blt	L.4.1016
	@ remainder is positive
	subs	r3, r3, lr
		add	r2, r2, #3

		b	9f
	
L.4.1016:
	@ remainder is negative
	adds	r3, r3, lr
		add	r2, r2, #1
		b	9f

	
L.1.1015:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 2, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.2.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 3, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.3.1014
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -1
	mov	lr, lr, lsr #1
	blt	L.4.1014
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #1

		b	9f
	
L.4.1014:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #3
		b	9f

	
L.3.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.4.1012
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #5

		b	9f
	
L.4.1012:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #7
		b	9f

	
L.2.1014:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 3, accumulated bits -3
	mov	lr, lr, lsr #1
	blt	L.3.1012
	@ remainder is positive
	subs	r3, r3, lr
			@ depth 4, accumulated bits -5
	mov	lr, lr, lsr #1
	blt	L.4.1010
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #9

		b	9f
	
L.4.1010:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #11
		b	9f

	
L.3.1012:
	@ remainder is negative
	adds	r3, r3, lr
			@ depth 4, accumulated bits -7
	mov	lr, lr, lsr #1
	blt	L.4.1008
	@ remainder is positive
	subs	r3, r3, lr
		sub	r2, r2, #13

		b	9f
	
L.4.1008:
	@ remainder is negative
	adds	r3, r3, lr
		sub	r2, r2, #15
		b	9f

	
	9:
Lend_regular_divide:
	subs	ip, ip, #1
	bge	Ldivloop
	cmp	r3, #0
	@ non-restoring fixup here (one instruction only!)
	addlt	r3, r1, r3


Lgot_result:
	@ check to see if answer should be < 0
	cmp	r6, #0
	rsbmi r3, r3, #0

	mov r0, r3
	ldmia	sp!, {r4, r5, r6, pc}

Ldiv_zero:
	@ Divide by zero trap.  If it returns, return 0 (about as
	@ wrong as possible, but that is what SunOS does...).
	bl	___div0
	mov	r0, #0
	ldmia	sp!, {r4, r5, r6, pc}

#endif /* L_modsi3 */

#ifdef L_dvmd_tls

	.globl ___div0
	.align 0
___div0:
	mov	pc, lr

#endif /* L_divmodsi_tools */