Full Version : Jack Tidwell's Fast Floating Point Routines (ASM)
avr >>ASSMBLER ROUTINES >>Jack Tidwell's Fast Floating Point Routines (ASM)


Admin5- 04-19-2006
Jack Tidwell's Fast Floating Point Math Routines

CODE

; ** FAST IEEE FP Routines for Atmel AVR RISC **
; ** 1998,1999 by Jack Tidwell <jackt@igalaxy.net> **
; ** These routines may be used for personal and educational
; **      purposes, and are free to download. **
; ** Work is still in progress (smaller & FASTER!) **
; ** Trig funcs comming soon **
; ** I would like feedback. **

; ** Most problems fixed, converting to Decimal seems to be
;    the only one left?
; ** Added some 'C' support **
; ** Work on preservation of r9 to r25 is needed **

; NOTE: All funcs w/xxxMEM: i.e; FDIVMEM:
; enter w/r30,r31 as a pointer to 32bit SRAM FPvalue.
; You still have to preserv r9-r25
; It's a start toward Rons' SmallC.

.include "8515def.inc"; (Any AVR except 1200)

.def QUOT =r9; partial product scratch area
.def QUOTM =r10
.def QUOTH =r11

.def DEXP =r12; Decimal Exp for 'ftoa(), ecvt()' funcs

.def MANT1 =r13; Accumulator 1
.def MANT1m =r14
.def MANT1h =r15
.def EXP1 =r16
.def SIGN1 =r17

.def MANT2 =r18; Accumulator 2
.def MANT2m =r19
.def MANT2h =r20
.def EXP2 =r21
.def SIGN2 =r22

.def MANT1T =r23; used to extend 24bit to 32bit ops
.def count =r24; GP loop counter
.def temp =r25; scratch 'ram'?

.dseg

DECBUFF: .byte 10; some place to hold Decimal Digits

MATHTMP: .byte 8; Accumulator 'Holding' registers.

.cseg
.org $000
rjmp RESET

;*
.org $00b
;** Avr Studio test code **
RESET: ldi temp,low(RAMEND)
out SPL,temp
ldi temp,high(RAMEND)
out SPH, temp

Main:
ldi zl,low(k1e7*2)
ldi zh,high(k1e7*2)
rcall KTOAC1
rcall fdiv10
rcall fdiv10
rcall fdiv10
rcall fdiv10
rcall faddp5
rcall fsqrt
rcall fltneg
main1: rjmp main1
;
;**************************************************************************
;**** FP Math Routines: ****
; ** Very similar to _ecvt(), (still a little buggy?) **
FLTDEC: rcall PUSH1
push sign1
push exp1 ; FP to 7 digit dec # +exp
tst exp1
brpl FLTD1
rcall FLTNEG
FLTD1: ldi temp,0x7f
sub temp,exp1
mov DEXP,temp
FLTD2: ldi zl,low(k1E8 *2)
ldi zh,high(k1E8 *2)
rcall KTOAC2
rcall FLTCP
brpl FLTD5
ldi zl,low(k10*2)
ldi zh,high(k10*2)
rcall KTOAC2
rcall FMUL
inc DEXP
rjmp FLTD2
FLTD5: pop temp; orig exp1
mov count,temp
neg temp
add temp,DEXP
subi temp,0x7f
mov DEXP,temp
andi count,0x80
rcall FADDp5
pop sign1
ITOD: rcall FTOI
ldi yl,low(DECBUFF)
ldi yh,high(DECBUFF)
tst sign1
brpl ITOD1
ldi temp,'-'
st y+,temp
rcall FLTNEG
ITOD1: ldi zl,low(I1E6*2); decimal 'lookup' table
ldi zh,high(I1E6*2)
ldi count,6
ITODLP: push count
rcall KTOAC2
clr temp
ITOD2: cp mant1,mant2
cpc mant1m,mant2m
cpc mant1h,mant2h
brlo ITOD3
sub mant1,mant2
sbc mant1m,mant2m
sbc mant1h,mant2h
inc temp
brne ITOD2
ITOD3: ori temp,'0'; convert to Ascii char
st y+,temp ; store the dig in the SRAM Buffer, and post-inc 'y' reg
pop count
dec count ; have we processed all tables yet?
brne ITODLP
mov temp,mant1
ori temp,'0'
st y+,temp
; * count = 0 here, use it to add ending 'NULL' char. *
st y+,count
; * DEXP has dp position -1 *
st y+,DEXP
rjmp POP1 ; restore ac1
;
FTOIMEM: rcall FSRAMA1
FTOI: rcall UNPACK
brne FTOI1
rjmp MINRES
FTOI1: ldi temp,22
sub temp,exp1
brpl FTOI2
rjmp MAXRES
FTOI2: cpi temp,24
brlo FTOI3
rjmp MINRES
FTOI3: cpi temp,8; use fast byte-move, if possible
brlo FTOI4
mov mant1,mant1m
mov mant1m,mant1h
mov mant1h,quot
clr quot
subi temp,8
rjmp FTOI3
FTOI4: tst temp
breq FTOI6
FTOI5: lsr quot
ror mant1h
ror mant1m
ror mant1
dec temp
brne FTOI5
FTOI6:
; tst sign1
; brpl FTOIX
; rjmp NEGMANT
FTOIX: ret
;
;** ACC1 -= *r30 **
FSUBMEM: rcall FSRAMA2; Acc1 -= *r30
;** ACC1 -= ACC2 **
FSUB: rcall UNPACK
cpi exp2,0x80
breq FSADX
ldi temp,0x80
eor sign2,temp
rjmp FADD1
FSUNDER: brmi FSADX
FRET2: rcall SWAPACC
FSADX: rjmp REPACK
;** ACC1 += 0.5f **
FADDp5: ldi exp2,0x3f
clr mant2
clr mant2m
clr mant2h
rjmp FADD
FADDMEM: rcall FSRAMA2; Acc1 += *r30
;** ACC1 += ACC2 **
FADD: rcall UNPACK
cpi exp2,0x80
breq FSADX
FADD1: cpi exp1,0x80
breq FRET2
FADD2: mov temp,exp1
sub temp,exp2
brvs FSADX
brpl FADD3
rcall SWAPACC
rjmp FADD2
FADD3: cpi temp,24; are we more than 24 bits diff?
brlo FADD3a; no, we can add it.
clr mant2; yes, acc2 is to small
clr mant2m
clr mant2h
FADD3a: cpi temp,8
brlo FADD3b
mov mant2,mant2m
mov mant2m,mant2h
clr mant2h
subi temp,8
rjmp FADD3a
FADD3b: tst temp
breq FADD3d
FADD3c: lsr mant2h
ror mant2m
ror mant2
dec temp
brne FADD3c
FADD3d: mov temp,sign1
eor temp,sign2
brmi FADD4
rcall UADD
brcc FSADX
ror mant1h
ror mant1m
ror mant1
subi exp1,-1; Add +1
brvc FSADX
rjmp MAXRES
FADD4: rcall USUB
breq FCLR
brcc FADD5
rcall NEGMANT
FADD5: tst mant1h; normalize
brmi FSADX
lsl mant1
rol mant1m
rol mant1h
subi exp1,1
brvc FADD5
rjmp MAXRES
FCLR: rjmp MINRES
;
UADD: add mant1,mant2
adc mant1m,mant2m
adc mant1h,mant2h
ret
;
USUB: sub mant1,mant2
sbc mant1m,mant2m
sbc mant1h,mant2h
ret
;
;** ACC1 = -ACC1 **
FLTNEG: rcall UNPACK
com sign1
rjmp REPACK
;
NEGMANT: ldi temp,-1
sub mant1,temp
sbc mant1m,temp
sbc mant1h,temp
ret
;
;** MANT1 <<= 1 **
RLMANT1: lsl mant1
rol mant1m
rol mant1h
rol mant1t
ret
;
;** Straight 24bit Unsigned ACC1 / ACC2 **
UDIVMANT: clr quot
clr quotm
clr quoth
ldi count,24
UDIVLP: cp mant1,mant2
cpc mant1m,mant2m
cpc mant1h,mant2h
brlo UDIV1
sub mant1,mant2
sbc mant1m,mant2m
sbc mant1h,mant2h
sec
rjmp UDIV2
UDIV1: clc
UDIV2: rol quot
rol quotm
rol quoth
lsl mant1
rol mant1m
rol mant1h
dec count
brne UDIVLP
push mant1
mov mant1,quot
mov mant1m,quotm
mov mant1h,quoth
pop temp
lsr temp
brcc udivx
inc mant1
brne udivx
inc mant1m
brne udivx
inc mant1h
udivx:
ret
;
;** ACC1 = 1/*r30 **
F1OVERMEM: rcall FSRAMA1
;** ACC1 = 1/ACC1 **
F1OVERX: rcall AC1TOAC2
clr mant1
clr mant1m
ldi temp,0x80
mov mant1h,temp
ldi exp1,0x3f; ieee 1.0f
rjmp FDIV ; do 1/x
;
;** Acc1 /= *r30 **
FDIVMEM: rcall FSRAMA2
rjmp FDIV
;** ACC1 /= 10.0f */
FDIV10: clr mant2 ; x/10.0f  could use fmul * 0.1f?
clr mant2m
ldi mant2h,0x20
ldi exp2,0x41;Put 10.0 -> Acc2
;** ACC1 /= ACC2 **
FDIV: tst exp2 ; test for x/0.0
breq FDIVZ
tst exp1
breq MINRES ; 0.0f/x = return 0.0f
FDIVa: rcall UNPACK
breq MINRES
eor sign1,sign2; get result sign
sec
sbc exp1,exp2; sub exps.
brvs MAXRES ; overflow ?
lsr mant2h ; no, de-norm & div mants
ror mant2m
ror mant2
lsr mant1h
ror mant1m
ror mant1
rcall UDIVMANT
tst mant1h ; need norm?
brmi FDIVX ; no, exit
lsl mant1
rol mant1m
rol mant1h
subi exp1,1 ; --exp
brvs MAXRES
FDIVX: rjmp REPACK ; re-pack
FDIVZ: rjmp MINRES
MAXRES: ldi temp,0x7f
mov exp1,temp
or mant1h,temp
ldi temp,0xff
mov mant1,temp
mov mant1m,temp
ret
MINRES: clr mant1;Result = 0.0f
clr mant1m
clr mant1h
clr exp1
clr sign1
ret
;
;** ACC1 *= *r30 **
FMULMEM: rcall FSRAMA2 ; Acc1 *= *r30
rjmp FMUL
;** ACC1 *= 10.0f **
FMUL10: ldi exp2,0x41; IEEE 10.0f
ldi mant2h,0x20
clr mant2m
clr mant2
;** ACC1 *= ACC2 **
FMUL: rcall UNPACK
breq MINRES
cpi exp2,0x80
breq MINRES
eor sign1,sign2
sec
adc exp1,exp2
brvs MAXRES;FMULMAX
rcall UMUL
tst mant1h
brmi FMULX
lsl quoth
rol mant1
rol mant1m
rol mant1h
rjmp REPACK
FMULX: subi exp1,-1
brvs MAXRES;FMULMAX
rjmp REPACK
;
;** 24bit unsigned Multiply
UMUL: push temp
push count
clr quot
clr quotm
clr quoth
clr temp
ldi count,24
UMULLP: lsl quot
rol quotm
rol quoth
rol mant1
rol mant1m
rol mant1h
brcc UMUL1
add quot,mant2
adc quotm,mant2m
adc quoth,mant2h
adc mant1,temp
adc mant1m,temp
adc mant1h,temp
UMUL1: dec count
brne UMULLP
pop count
pop temp
ret
;
;** ACC1 = (*r30) * (*r30) **
FSQRMEM: rcall FSRAMA1
;** ACC1 *= ACC1 **
FSQR: rcall AC1TOAC2
rjmp FMUL
;
sqrthalf:
clr mant2
clr mant2m
clr mant2h
ldi exp2,0x40
rjmp FDIVa ; div by 2 to get our guess
SQERR: rjmp MINRES ; add your own error handler here.
;** ACC1 = sqrt(*r30) **
FSQRTMEM: rcall FSRAMA1
;** ACC1 = sqrt(ACC1) **
FSQRT: tst exp1
breq SQERR ; trap sqrt(0) or sqrt(-x)!
brmi SQERR
mov r5,mant1; save org Num. user must preserve r5-r8!
mov r6,mant1m
mov r7,mant1h
mov r8,exp1
rcall sqrthalf; get first guess
rcall AC1TOAC2; put it in Acc2
ldi temp,5 ; number of iterations
SQRTLP: push temp
rcall PUSH2 ; save 'guess' results
mov mant1,r5; restore orig Number
mov mant1m,r6
mov mant1h,r7
mov exp1,r8
rcall FDIV
rcall POP2
rcall FADD
rcall sqrthalf
rcall AC1TOAC2
pop temp
dec temp
brne SQRTLP
ret
;
;** Compare ACC1 == ACC2 **
;** return w/temp = 0,1,or -1 (Z,N flags set) **
FLTCPFLASH: rcall KTOAC2
rjmp FLTCP
;** compare two sram floats **
FLTCP2: push zl; save r30,r31, (pointer to acc2 variable)
push zh
mov zl,yl
mov zh,yh
rcall FSRAMA1
pop zh
pop zl
FLTCPMEM: rcall FSRAMA2 ; compare Acc1 w/Memory
FLTCP: rcall FCMP ; do compare
tst temp ; set status reg accordingly
ret  ; return to caller
;
FCMP: tst exp1 ; is acc1 pos?
brmi FCMP1 ; no. test acc2
tst exp2 ; yes, is acc2 pos?
brmi A1GTA2 ; no, return Acc1 > Acc2
cp exp1,exp2; both are '+'
brlo A1LTA2
brne A1GTA2
cp mant1,mant2; exps are equal, are mants?
cpc mant1m,mant2m
cpc mant1h,mant2h
brlo A1LTA2
breq A1EQA2
A1GTA2: ldi temp,1 ; ac1 > ac2
ret
A1LTA2: ldi temp,0xff; ac1 < ac2
ret
A1EQA2: clr temp ; ac1 = ac2
ret
;** we're here, acc1 must be '-' **
FCMP1: tst exp2 ; if acc1 is '-' & acc2 is '+'
brpl A1LTA2 ; then acc1 is < acc2
cp exp2,exp1; both are '-', is exp1 more negative?
brlo A1GTA2
brne A1LTA2
cp mant2,mant1
cpc mant2m,mant1m
cpc mant2h,mant1h
brlo A1GTA2
breq A1EQA2 ; return acc1 == acc2
rjmp A1LTA2
;
;*********************************************************************
; * These routines are for future atof(), atoi(), atol(), etc.
;** Fast Multiply by 10 **
UMUL10: rcall AC1TOAC2
rcall RLMANT1
rcall RLMANT1
rcall UADD
sbci mant1t,-1; adci 0
rjmp RLMANT1
;
ADDDIG: rcall UMUL10
clr mant2m
clr mant2h
mov mant2,temp
andi mant2,0x0f
rcall UADD
sbci mant1t,-1; just add 'C'
ret
;*********************************************************************
;
;** convert an integer to FP **
ITOFMEM: rcall FSRAMA1 ; psuedo 32bit long?
mov mant1t,exp1
ITOF: mov temp,mant1
or temp,mant1m
or temp,mant1h
or temp,mant1t
brne ITOF1
rjmp MINRES
ITOF1: tst mant1t
brpl ITOF2
rcall NEGMANT
ITOF2: ldi exp1,30
tst mant1t
ITOF3: brmi ITOF4
dec exp1
lsl mant1
rol mant1m
rol mant1h
rol mant1t
rjmp ITOF3
ITOF4: mov mant1,mant1m
mov mant1m,mant1h
mov mant1h,mant1t
rjmp REPACK
;
;********************************************************************
;** Memory Move funcs **
;** Copy SRAM Float to Acc1 **
FSRAMA1: ld mant1,z+
ld mant1m,z+
ld mant1h,z+
ld exp1,z+
ret
;
;** Copy SRAM Float to Acc2 **
FSRAMA2: ld mant2,z+
ld mant2m,z+
ld mant2h,z+
ld exp2,z+
ret
;
;
;** Copy ACC2 = ACC1 **
AC1TOAC2:
mov exp2,exp1
mov mant2,mant1
mov mant2m,mant1m
mov mant2h,mant1h
mov sign2,sign1
ret
;
;** ACC1 <-> ACC2 **
SWAPACC: push mant1
push mant1m
push mant1h
push exp1
push sign1
mov mant1,mant2
mov mant1m,mant2m
mov mant1h,mant2h
mov exp1,exp2
mov sign1,sign2
pop sign2
pop exp2
pop mant2h
pop mant2m
pop mant2
ret
;
;** Get 1 byte from program memory & inc pointer **
_lpmbyte: lpm
adiw zl,1
ret
;
;** Put a Flash const -> ACC1
KTOAC1: rcall _lpmbyte
mov mant1,r0
rcall _lpmbyte
mov mant1m,r0
rcall _lpmbyte
mov mant1h,r0
rcall _lpmbyte
mov exp1,r0
ret
;
;** Put a Flash const -> ACC2
KTOAC2: rcall _lpmbyte
mov mant2,r0
rcall _lpmbyte
mov mant2m,r0
rcall _lpmbyte
mov mant2h,r0
rcall _lpmbyte
mov exp2,r0
ret
;
;** 'Hold' Acc1 in sram **
PUSH1: sts MATHTMP,mant1
sts MATHTMP+1,mant1m
sts MATHTMP+2,mant1h
sts MATHTMP+3,exp1
ret
;
;** 'Restore' Acc1 from sram **
POP1: lds mant1,MATHTMP
lds mant1m,MATHTMP+1
lds mant1h,MATHTMP+2
lds exp1,MATHTMP+3
ret
;
;** 'Hold' Acc2 in sram **
PUSH2: sts MATHTMP+4,mant2
sts MATHTMP+5,mant2m
sts MATHTMP+6,mant2h
sts MATHTMP+7,exp2
ret
;
;** 'Restore' Acc2 from sram **
POP2: lds mant2,MATHTMP+4
lds mant2m,MATHTMP+5
lds mant2h,MATHTMP+6
lds exp2,MATHTMP+7
ret
;
;********************************************************************
;** IEEE convertions **
;** Convert ACC1 & ACC2 from IEEE to work format **
UNPACK: push temp
mov sign1,exp1
ldi temp,0x80
lsl mant1h
rol exp1
eor exp1,temp; AVR has no 'eori'
lsl temp
ror mant1h ; restore Hidden '1' bit
andi sign1,0x80
;** Un-Pack Acc2 **
mov sign2,exp2
ldi temp,0x80
lsl mant2h
rol exp2
eor exp2,temp
lsl temp
ror mant2h
andi sign2,0x80
pop temp
cpi exp1,0x80; return w/ACC1=0 ?
ret
;
;** Convert Acc1 from work format to IEEE **
REPACK: push temp
lsl mant1h
ldi temp,0x80
eor temp,exp1
lsl sign1
ror temp
ror mant1h
mov exp1,temp
pop temp
ret
;
;** Constants used for decimal conversion **
I1E6: .db 0x40,0x42,0x0f,0;  1,000,000
I1E5: .db 0xa0,0x86,0x01,0;    100,000
I1E4: .db 0x10,0x27,0x00,0;     10,000
I1E3: .db 0xe8,0x03,0x00,0;      1,000
I1E2: .db 0x64,0x00,0x00,0;        100
I1E1: .db 0x0a,0x00,0x00,0;         10
;
;** IEEE format **
PI: .db 0xdb,0x0f,0x49,0x40; 3.1415927f
Kp1: .db 0xCD,0xCC,0xCC,0x3D; 0.1f
Kp5: .db 0x00,0x00,0x00,0x3F; 0.5f
K1: .db 0x00,0x00,0x80,0x3F; 1.0f
K10: .db 0x00,0x00,0x20,0x41;10.0f
;
K1E7: .db 0x80,0x96,0x18,0x4b
K1E8: .db 0x20,0xbc,0xbe,0x4c
K999999p9: .db 0xfe,0x23,0x74,0x49; 999999.9f
K9999999:  .db 0x7f,0x96,0x18,0x4b;9999999.0f
;




Forumer™ is Voted #1 Free Forum Hosting provider
Build your own community today with the largest message board hosting company.