ARM: Count leading zeros

From ScienceZero
Jump to: navigation, search

This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction.

Minimum size

       ;R0 - value
       ;R1 = count of leading zeros
       ;Uses R0-R1
       ;Worst case 159 cycles, best case 4 cycles
       mov	r1,#0
loop   movs	r0,r0,lsl #1
       addcc	r1,r1,#1
       bcc	loop
       mov	pc,r14


Maximum performance

       ;R0 - value
       ;R1 = count of leading zeros
       ;Uses R0-R1
       ;16 cycles constant
       movs	r1,r0,lsr #16
       mov	r1,#0
       addeq	r1,r1,#16
       moveqs	r0,r0,lsl #16
       addeq	r1,r1,#1
       
       tst	r0,#0xff000000
       addeq	r1,r1,#8
       moveq	r0,r0,lsl #8
       
       tst	r0,#0xf0000000
       addeq	r1,r1,#4
       moveq	r0,r0,lsl #4
       
       tst	r0,#0xc0000000
       addeq	r1,r1,#2
       moveq	r0,r0,lsl #2
       
       tst	r0,#0x80000000
       addeq	r1,r1,#1
       
       mov	pc,r14


Maximum performance with normalising

       ;R0 - value
       ;R1 = count of leading zeros
       ;Uses R0-R1
       ;17 cycles constant
       movs	r1,r0,lsr #16
       mov	r1,#0
       addeq	r1,r1,#16
       moveqs	r0,r0,lsl #16
       addeq	r1,r1,#1
       
       tst	r0,#0xff000000
       addeq	r1,r1,#8
       moveq	r0,r0,lsl #8
       
       tst	r0,#0xf0000000
       addeq	r1,r1,#4
       moveq	r0,r0,lsl #4
       
       tst	r0,#0xc0000000
       addeq	r1,r1,#2
       moveq	r0,r0,lsl #2
       
       tst	r0,#0x80000000
       addeq	r1,r1,#1
       moveq	r0,r0,lsl #1
       
       mov	pc,r14