; ; Bcopy ; ; Aaron Sawdey 1996; released to the Public Domain. ; .text .proc _bcopy .global _bcopy _bcopy: ; Function 'bcopy'; 0 bytes of locals, 5 regs to save. sw -4(r29),r30 ; push sp add r30,r0,r29 ; fp = sp sw -8(r29),r31 ; push ret addr subui r29,r29,#32 ; alloc local storage sw 0(r29),r2 sw 4(r29),r3 sw 8(r29),r4 sw 12(r29),r5 sw 16(r29),r6 lw r4,8(r30) lw r5,(r30) snei r1,r4,#0 beqz r1,LbcopyL1 ;; filled delay slot: lw r3,4(r30) sltu r1,r5,r3 beqz r1,LbcopyL3 ;; filled delay slot: add r1,r5,r4 addi r1,r1,#-1 sltu r1,r1,r3 beqz r1,LbcopyL3 ;; filled delay slot: sub r1,r3,r5 andi r1,r1,#0x3 snei r1,r1,#0 bnez r1,LbcopyL45 ;; filled delay slot: add r1,r0,r4 j LbcopyL46 ;; filled delay slot: andi r2,r5,#0x3 LbcopyL3: sltu r1,r3,r5 beqz r1,LbcopyL9 ;; filled delay slot: sub r1,r5,r3 andi r1,r1,#0x3 snei r1,r1,#0 beqz r1,LbcopyL7 ;; filled delay slot: andi r2,r5,#0x3 LbcopyL5: add r1,r0,r4 LbcopyL45: sgti r1,r1,#0 beqz r1,LbcopyL1 ;; filled delay slot: addi r4,r4,#-1 LbcopyL13: lb r6,(r5) sb (r3),r6 addi r5,r5,#1 addi r3,r3,#1 add r1,r0,r4 sgti r1,r1,#0 bnez r1,LbcopyL13 ;; filled delay slot: addi r4,r4,#-1 j LbcopyL1 nop ; not filled. LbcopyL7: LbcopyL46: addi r1,r0,#4 sub r1,r1,r2 j LbcopyL43 ;; filled delay slot: andi r2,r1,#0x3 LbcopyL20: add r1,r0,r4 sgti r1,r1,#0 beqz r1,LbcopyL17 ;; filled delay slot: addi r4,r4,#-1 lb r6,(r5) sb (r3),r6 addi r5,r5,#1 addi r3,r3,#1 LbcopyL43: add r1,r0,r2 sgti r1,r1,#0 bnez r1,LbcopyL20 ;; filled delay slot: addi r2,r2,#-1 LbcopyL17: sgti r1,r4,#3 beqz r1,LbcopyL22 ;; filled delay slot: add r2,r0,r5 LbcopyL23: lw r6,(r2) sw (r3),r6 addi r2,r2,#4 addi r4,r4,#-4 sgti r1,r4,#3 bnez r1,LbcopyL23 ;; filled delay slot: addi r3,r3,#4 LbcopyL22: j LbcopyL5 ;; filled delay slot: add r5,r0,r2 LbcopyL9: addi r1,r4,#-1 add r3,r3,r1 add r5,r5,r1 sub r1,r3,r5 andi r1,r1,#0x3 snei r1,r1,#0 beqz r1,LbcopyL26 ;; filled delay slot: addi r1,r5,#1 LbcopyL27: add r1,r0,r4 LbcopyL47: sgti r1,r1,#0 beqz r1,LbcopyL1 ;; filled delay slot: addi r4,r4,#-1 LbcopyL30: lb r6,(r5) sb (r3),r6 addi r5,r5,#-1 addi r3,r3,#-1 add r1,r0,r4 sgti r1,r1,#0 bnez r1,LbcopyL30 ;; filled delay slot: addi r4,r4,#-1 j LbcopyL1 nop ; not filled. LbcopyL26: j LbcopyL44 ;; filled delay slot: andi r2,r1,#0x3 LbcopyL37: add r1,r0,r4 sgti r1,r1,#0 beqz r1,LbcopyL34 ;; filled delay slot: addi r4,r4,#-1 lb r6,(r5) sb (r3),r6 addi r5,r5,#-1 addi r3,r3,#-1 LbcopyL44: add r1,r0,r2 sgti r1,r1,#0 bnez r1,LbcopyL37 ;; filled delay slot: addi r2,r2,#-1 LbcopyL34: slei r1,r4,#3 bnez r1,LbcopyL47 ;; filled delay slot: add r1,r0,r4 addi r2,r5,#-3 sgti r1,r4,#3 beqz r1,LbcopyL40 ;; filled delay slot: addi r3,r3,#-3 LbcopyL41: lw r6,(r2) sw (r3),r6 addi r2,r2,#-4 addi r4,r4,#-4 sgti r1,r4,#3 bnez r1,LbcopyL41 ;; filled delay slot: addi r3,r3,#-4 LbcopyL40: addi r5,r2,#3 j LbcopyL27 ;; filled delay slot: addi r3,r3,#3 LbcopyL1: lw r2,0(r29) lw r3,4(r29) lw r4,8(r29) lw r5,12(r29) lw r6,16(r29) lw r31,-8(r30) add r29,r0,r30 jr r31 lw r30,-4(r30) .endproc _bcopy