/* umadr_ppc32.S -- unsigned multiply add divide with remainder; PowerPC 32-bit
   Copyright 2005 BitWagon Software LLC.  All rights reserved.
   Licensed under GNU General Public License, version 2; see file COPYING.
*/
	.globl umadr
umadr:  # q = umadr(a, b, c, d, &r)   (a * b + c) / d
t= 0
a= 3; lo= a; q= a
b= 4; hi= b; r= b
c= 5
d= 6
rp= 7
	mulhwu  t,a,b
	mullw  lo,a,b
	addc   lo,lo,c
	addze. hi,t; bne+ L5
	  /* reduces to 32-bit only */
	divwu t,lo,d
	mullw r,t,d
	subf  r,r,lo
	mr    q,t
	b L90

L5:  # trim loop count by difference in leading zeroes
	cntlzw t,d
	cntlzw c,hi
	subf. c,t,c  # c= lz(dividend) - lz(divisor)
	/* beq- L8  # skips only 4 cycles on 2-way superscalar */
		/* (hi,,lo) << c */
	blt- L200  # coarse-grained overflow
	li t,~0
	slw   hi,hi,c
	rlwnm lo,lo,c,0,31
	slw    t, t,c
	xor hi,hi,lo
	and lo,lo,t
	xor hi,hi,lo
L8:
	subfic c,c,32  # ignore CArry
	cmpl 0,hi,d
	addc  lo,lo,lo  # shift MQ
	bgt- L210  # fine-grained overflow
	mtctr c  # iterations remaining
L10:
	adde  hi,hi,hi  # shift MQ
	subfc hi,d,hi  # trial subtraction
	subfe t,t,t  # Borrow  in all bits
	and   t,t,d  # correction, if needed
	adde  lo,lo,lo  # shift MQ; CI= bit for q; CArry= shift_out
	add   hi,hi,t  # restoring division
	bdnz L10
L90:
	cmpli 7,rp,0; beq 7,L99
	stw r,0(rp)
L99:
	blr

L200:  # coarse overflow
	subfic c,t,32  # ignore CArry
L210:  # fine overflow
	li q,1; slw q,q,c
	li r,0
	b L90

	.section	.note.GNU-stack,"",@progbits
