Difference between revisions of "ARM: Count leading zeros"

From ScienceZero
Jump to: navigation, search
 
(3 intermediate revisions by 2 users not shown)
Line 1: Line 1:
 +
This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction.
 +
 
==Minimum size==
 
==Minimum size==
 
         ;R0 - value
 
         ;R0 - value
 
         ;R1 = count of leading zeros
 
         ;R1 = count of leading zeros
         ;Uses R0-R1       mov r1,#0
+
         ;Uses R0-R1
 
         ;Worst case 159 cycles, best case 4 cycles
 
         ;Worst case 159 cycles, best case 4 cycles
 
         mov r1,#0
 
         mov r1,#0
Line 39: Line 41:
 
         mov pc,r14
 
         mov pc,r14
  
 +
 +
==Maximum performance with normalising==
 +
        ;R0 - value
 +
        ;R1 = count of leading zeros
 +
        ;Uses R0-R1
 +
        ;17 cycles constant
 +
        movs r1,r0,lsr #16
 +
        mov r1,#0
 +
        addeq r1,r1,#16
 +
        moveqs r0,r0,lsl #16
 +
        addeq r1,r1,#1
 +
       
 +
        tst r0,#0xff000000
 +
        addeq r1,r1,#8
 +
        moveq r0,r0,lsl #8
 +
       
 +
        tst r0,#0xf0000000
 +
        addeq r1,r1,#4
 +
        moveq r0,r0,lsl #4
 +
       
 +
        tst r0,#0xc0000000
 +
        addeq r1,r1,#2
 +
        moveq r0,r0,lsl #2
 +
       
 +
        tst r0,#0x80000000
 +
        addeq r1,r1,#1
 +
        moveq r0,r0,lsl #1
 +
       
 +
        mov pc,r14
  
 
[[Category:Computing]]
 
[[Category:Computing]]

Latest revision as of 05:26, 5 June 2011

This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction.

Minimum size

       ;R0 - value
       ;R1 = count of leading zeros
       ;Uses R0-R1
       ;Worst case 159 cycles, best case 4 cycles
       mov	r1,#0
loop   movs	r0,r0,lsl #1
       addcc	r1,r1,#1
       bcc	loop
       mov	pc,r14


Maximum performance

       ;R0 - value
       ;R1 = count of leading zeros
       ;Uses R0-R1
       ;16 cycles constant
       movs	r1,r0,lsr #16
       mov	r1,#0
       addeq	r1,r1,#16
       moveqs	r0,r0,lsl #16
       addeq	r1,r1,#1
       
       tst	r0,#0xff000000
       addeq	r1,r1,#8
       moveq	r0,r0,lsl #8
       
       tst	r0,#0xf0000000
       addeq	r1,r1,#4
       moveq	r0,r0,lsl #4
       
       tst	r0,#0xc0000000
       addeq	r1,r1,#2
       moveq	r0,r0,lsl #2
       
       tst	r0,#0x80000000
       addeq	r1,r1,#1
       
       mov	pc,r14


Maximum performance with normalising

       ;R0 - value
       ;R1 = count of leading zeros
       ;Uses R0-R1
       ;17 cycles constant
       movs	r1,r0,lsr #16
       mov	r1,#0
       addeq	r1,r1,#16
       moveqs	r0,r0,lsl #16
       addeq	r1,r1,#1
       
       tst	r0,#0xff000000
       addeq	r1,r1,#8
       moveq	r0,r0,lsl #8
       
       tst	r0,#0xf0000000
       addeq	r1,r1,#4
       moveq	r0,r0,lsl #4
       
       tst	r0,#0xc0000000
       addeq	r1,r1,#2
       moveq	r0,r0,lsl #2
       
       tst	r0,#0x80000000
       addeq	r1,r1,#1
       moveq	r0,r0,lsl #1
       
       mov	pc,r14