Full Version : Needham & Wheeler XTEA Routine (AVR ASM)
avr >>ASSMBLER ROUTINES >>Needham & Wheeler XTEA Routine (AVR ASM)


AVR_Admin- 04-18-2006
CODE
;(C)2005 wek   http://www.efton.sk
;free for personal use
;for commercial use, please contact wek@efton.sk

;implementation of XTEA algorithm by Needham&Wheeler for AVR
;

;6347 cycles excl. ret
;$67 words = $CE bytes = 206 bytes of code FLASH, including 16 bytes of key

;uses 8 bytes of SRAM (input/output data) and 10 registers

;key can be easily placed into SRAM - replace lpm xxx,Z+ by ld xxx,Z+
;for devices which don't support lpm xxx,Z+ replace it by lpm; mov xxx,r0
;the 4 registers used by sum0..sum3 can be spared by moving sum to SRAM with 3 modifications:
;  - when getting the pointer to key, load the appropriate byte from sum into tmp4
;  - the increment of sum should be done moving bytes from and to SRAM (e.g. in tmp4)
;  - when adding up sum to key, an additional register is necessary (so the spare is 3 registers only...)



.dseg

ydata:    .byte   4
zdata:    .byte   4


;register definitions
.def  ZL  = r30
.def  ZH  = r31

.def            tmp0            = r16
.def            tmp1            = r17
.def            tmp2            = r18
.def            tmp3            = r19
.def            tmp4            = r20

.def            sum0            = r21
.def            sum1            = r22
.def            sum2            = r23
.def            sum3            = r24

.def            RoundCnt        = r25

.cseg

Tea:
   ldi   RoundCnt,2*32
   clr   sum0
   clr   sum1
   clr   sum2
   clr   sum3
   ldi   ZL,LOW(zdata)
   ldi   ZH,HIGH(zdata)
   ldd   tmp0,Z+0
   ldd   tmp1,Z+1
   ldd   tmp2,Z+2
   ldd   tmp3,Z+3
TeaRound:
   clr   tmp4   ;z>>5 but as (z<<3)>>8 (simultaneously preparing for z<<4)
   lsl   tmp0
   rol   tmp1
   rol   tmp2
   rol   tmp3
   rol   tmp4
   lsl   tmp0
   rol   tmp1
   rol   tmp2
   rol   tmp3
   rol   tmp4
   lsl   tmp0
   rol   tmp1
   rol   tmp2
   rol   tmp3
   rol   tmp4

   lsl   tmp0 ;the last step for z<<4 and simultaneously the xor
   eor   tmp0,tmp1
   rol   tmp1
   eor   tmp1,tmp2
   rol   tmp2
   eor   tmp2,tmp3
   rol   tmp3
   eor   tmp3,tmp4

   ldd   tmp4,Z+0  ;now add z
   add   tmp0,tmp4
   ldd   tmp4,Z+1
   adc   tmp1,tmp4
   ldd   tmp4,Z+2
   adc   tmp2,tmp4
   ldd   tmp4,Z+3
   adc   tmp3,tmp4

   ldi   ZL,LOW(Key*2)   ;now calculate key pointer depending on which subround is it
   ldi   ZH,HIGH(Key*2)
   sbrc  RoundCnt,0
   rjmp  TeaRoundX1
   sbrc  sum0,0
   adiw  ZL,4
   sbrc  sum0,1
   adiw  ZL,8
   rjmp  TeaRoundX2
TeaRoundX1:
   sbrc  sum1,3             ;sum>>11 = (sum>>8)>>3
   adiw  ZL,4
   sbrc  sum1,4
   adiw  ZL,8
TeaRoundX2:
   
   lpm   tmp4,Z+         ;(tmp) xor (sum+k[ptr])
   add   tmp4,sum0
   eor   tmp0,tmp4
   lpm   tmp4,Z+
   adc   tmp4,sum1
   eor   tmp1,tmp4
   lpm   tmp4,Z+
   adc   tmp4,sum2
   eor   tmp2,tmp4
   lpm   tmp4,Z+
   adc   tmp4,sum3
   eor   tmp3,tmp4

   sbrc  RoundCnt,0     ;in the first subround increment sum (AVR: decrement by its neg)
   rjmp  TeaRoundX3
   subi  sum0,$47
   sbci  sum1,$86
   sbci  sum2,$C8
   sbci  sum3,$61
   ldi   ZL,LOW(ydata)  ;and set pointer to ydata to add the calculated value in tmp (and it remains valid for the next subround, too)
   ldi   ZH,HIGH(ydata)
   rjmp  TeaRoundX4
TeaRoundX3:
   ldi   ZL,LOW(zdata)  ;in the second subround only set pointer to zdata
   ldi   ZH,HIGH(zdata)
TeaRoundX4:

   ldd   tmp4,Z+0        ;add the calculated value to y or z
   add   tmp0,tmp4
   std   Z+0,tmp0    
   ldd   tmp4,Z+1
   adc   tmp1,tmp4
   std   Z+1,tmp1
   ldd   tmp4,Z+2
   adc   tmp2,tmp4
   std   Z+2,tmp2
   ldd   tmp4,Z+3
   adc   tmp3,tmp4
   std   Z+3,tmp3

   dec   RoundCnt
   brne  TeaRoundA
   ret    
TeaRoundA:
   rjmp  TeaRound


Key:
   .db   $4b,$5a,$69,$78
   .db   $0f,$1e,$2d,$3c
   .db   $c3,$d2,$e1,$f0
   .db   $87,$96,$a5,$b4
   
;   0x78695a4b 0x3c2d1e0f 0xf0e1d2c3 0xb4a59687 </PRE></BODY></HTML>



Forumer™ is Voted #1 Free Forum Hosting provider
Build your own community today with the largest message board hosting company.