Difference between revisions of "ARM: Count leading zeros"
From ScienceZero
(3 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
+ | This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction. | ||
+ | |||
==Minimum size== | ==Minimum size== | ||
;R0 - value | ;R0 - value | ||
;R1 = count of leading zeros | ;R1 = count of leading zeros | ||
− | ;Uses R0-R1 | + | ;Uses R0-R1 |
;Worst case 159 cycles, best case 4 cycles | ;Worst case 159 cycles, best case 4 cycles | ||
mov r1,#0 | mov r1,#0 | ||
Line 39: | Line 41: | ||
mov pc,r14 | mov pc,r14 | ||
+ | |||
+ | ==Maximum performance with normalising== | ||
+ | ;R0 - value | ||
+ | ;R1 = count of leading zeros | ||
+ | ;Uses R0-R1 | ||
+ | ;17 cycles constant | ||
+ | movs r1,r0,lsr #16 | ||
+ | mov r1,#0 | ||
+ | addeq r1,r1,#16 | ||
+ | moveqs r0,r0,lsl #16 | ||
+ | addeq r1,r1,#1 | ||
+ | |||
+ | tst r0,#0xff000000 | ||
+ | addeq r1,r1,#8 | ||
+ | moveq r0,r0,lsl #8 | ||
+ | |||
+ | tst r0,#0xf0000000 | ||
+ | addeq r1,r1,#4 | ||
+ | moveq r0,r0,lsl #4 | ||
+ | |||
+ | tst r0,#0xc0000000 | ||
+ | addeq r1,r1,#2 | ||
+ | moveq r0,r0,lsl #2 | ||
+ | |||
+ | tst r0,#0x80000000 | ||
+ | addeq r1,r1,#1 | ||
+ | moveq r0,r0,lsl #1 | ||
+ | |||
+ | mov pc,r14 | ||
[[Category:Computing]] | [[Category:Computing]] |
Latest revision as of 05:26, 5 June 2011
This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction.
Minimum size
;R0 - value ;R1 = count of leading zeros ;Uses R0-R1 ;Worst case 159 cycles, best case 4 cycles mov r1,#0 loop movs r0,r0,lsl #1 addcc r1,r1,#1 bcc loop mov pc,r14
Maximum performance
;R0 - value ;R1 = count of leading zeros ;Uses R0-R1 ;16 cycles constant movs r1,r0,lsr #16 mov r1,#0 addeq r1,r1,#16 moveqs r0,r0,lsl #16 addeq r1,r1,#1 tst r0,#0xff000000 addeq r1,r1,#8 moveq r0,r0,lsl #8 tst r0,#0xf0000000 addeq r1,r1,#4 moveq r0,r0,lsl #4 tst r0,#0xc0000000 addeq r1,r1,#2 moveq r0,r0,lsl #2 tst r0,#0x80000000 addeq r1,r1,#1 mov pc,r14
Maximum performance with normalising
;R0 - value ;R1 = count of leading zeros ;Uses R0-R1 ;17 cycles constant movs r1,r0,lsr #16 mov r1,#0 addeq r1,r1,#16 moveqs r0,r0,lsl #16 addeq r1,r1,#1 tst r0,#0xff000000 addeq r1,r1,#8 moveq r0,r0,lsl #8 tst r0,#0xf0000000 addeq r1,r1,#4 moveq r0,r0,lsl #4 tst r0,#0xc0000000 addeq r1,r1,#2 moveq r0,r0,lsl #2 tst r0,#0x80000000 addeq r1,r1,#1 moveq r0,r0,lsl #1 mov pc,r14