# FFFF (Fast Floating Fractal Fun) Generic (R1x000) MIPS4 ISA (64 bit) single pixel FPU calculation. # Author: Daniele Paccaloni .set noreorder .set noat .set nomacro .file 1 "/work/devel/FFFF/calcPixelRow.cpp" .section .text, 1, 0x00000006, 4, 16 .text: .section .lit8, 1, 0x30000002, 8, 8 .lit8: .section .text # Program Unit: calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25 .ent calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25 .globl calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25 calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25: # 0x0 .dynsym calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25 sto_default .frame $sp, 0, $31 # leaf routine. #.mask 0x80000000, -80 # zx = 8 # zy = 16 # zx2 = 32 # zy2 = 40 # x = 0 # i = 24 # lcl_spill_temp_0 = 48 # bool calcPixelRow_FPU_ASM_MIPS(unsigned int* rowBuffer, unsigned int maxi, unsigned int iter_black, int width, double cx, double cy, double sx) { .BB1.calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25: # 0x0 lui $8,%hi(%neg(%gp_rel(calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25))) # calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25 addiu $8,$8,%lo(%neg(%gp_rel(calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25))) # calcPixelRow_FPU_ASM_MIPS__GPUiUiT2idN25 daddu $1,$25,$8 # # [Daniele] We are using MIPS4 ISA (64 bits code). # C code passes params in regs: # $4.d = rowBuffer # $5.w = maxi # $6.w = iter_black # $7.w = width # $f16.d = cx # $f17.d = cy # $f18.d = sx # We can safely use intregs $2..$15 and floatregs $f0..$f23. # Return value is in $2 (bool). ldc1 $f4, %gp_rel(.lit8-30720)($1) # 4.0 .NxtPix: or $2, $0, $0 # i = 0 mov.d $f6, $f16 # zx = cx mov.d $f8, $f17 # zy = cy .NxtI: mul.d $f10, $f6, $f6 # zx2 = zx*zx mul.d $f12, $f8, $f8 # zy2 = zy*zy beq $2, $5, .DonePix # i == maxi ? add.d $f6, $f6, $f6 # 2*zx add.d $f14, $f10, $f12 # m2 = (zx2 + zy2) mul.d $f8, $f8, $f6 # zy = 2*zx*zy (remove to try MADD) #madd.d $f8, $f17, $f8, $f6 # zy = 2*zx*zy + cy (MADD seems slower on R12000 !!) sub.d $f6, $f10, $f12 # zx = zx2-zy2 add.d $f8, $f8, $f17 # zy += cy (remove to try MADD) c.lt.d $fcc0, $f4, $f14 # 4.0 < m2 ? add.d $f6, $f6, $f16 # zx += cx bc1f $fcc0, .NxtI # 4.0 < m2 ? if false, next iter... addiu $2, $2, +1 # i++ (delay slot 1) addiu $2, $2, -1 # i-- (compensate for delay slot 1) .DonePix: add.d $f16, $f16, $f18 # cx = cx + sx slt $3, $2, $5 # i < max_iters ? movz $2, $6, $3 # -> if false, i = iter_black sw $2, 0($4) # [rowBuffer] = i addiu $7, $7, -1 # width-- bne $7, $0, .NxtPix # if (width > 0) calculate next pixel... addiu $4, $4, +4 # rowBuffer += 4 (delay slot 2) addiu $2, $0, 1 # Return value (true) jr $31 # Return. nop # .end calcPixelRow_FPU_ASM_MIPSR8000__GPUiUiT2idN25 .section .text .section .lit8 .origin 0x0 .align 4 .dword 0x4010000000000000 # double 4.00000 .section .text .align 4 .section .lit8 .align 3 .gpvalue 30720