Difference between revisions of "ARM: Count leading zeros"
From ScienceZero
(5 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
− | Minimum size | + | This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction. |
+ | |||
+ | ==Minimum size== | ||
;R0 - value | ;R0 - value | ||
;R1 = count of leading zeros | ;R1 = count of leading zeros | ||
− | ;Uses R0-R1 | + | ;Uses R0-R1 |
;Worst case 159 cycles, best case 4 cycles | ;Worst case 159 cycles, best case 4 cycles | ||
mov r1,#0 | mov r1,#0 | ||
Line 11: | Line 13: | ||
− | Maximum performance | + | ==Maximum performance== |
;R0 - value | ;R0 - value | ||
;R1 = count of leading zeros | ;R1 = count of leading zeros | ||
;Uses R0-R1 | ;Uses R0-R1 | ||
− | ; | + | ;16 cycles constant |
movs r1,r0,lsr #16 | movs r1,r0,lsr #16 | ||
mov r1,#0 | mov r1,#0 | ||
addeq r1,r1,#16 | addeq r1,r1,#16 | ||
moveqs r0,r0,lsl #16 | moveqs r0,r0,lsl #16 | ||
− | addeq r1,r1,# | + | addeq r1,r1,#1 |
− | + | ||
tst r0,#0xff000000 | tst r0,#0xff000000 | ||
Line 40: | Line 41: | ||
mov pc,r14 | mov pc,r14 | ||
+ | |||
+ | ==Maximum performance with normalising== | ||
+ | ;R0 - value | ||
+ | ;R1 = count of leading zeros | ||
+ | ;Uses R0-R1 | ||
+ | ;17 cycles constant | ||
+ | movs r1,r0,lsr #16 | ||
+ | mov r1,#0 | ||
+ | addeq r1,r1,#16 | ||
+ | moveqs r0,r0,lsl #16 | ||
+ | addeq r1,r1,#1 | ||
+ | |||
+ | tst r0,#0xff000000 | ||
+ | addeq r1,r1,#8 | ||
+ | moveq r0,r0,lsl #8 | ||
+ | |||
+ | tst r0,#0xf0000000 | ||
+ | addeq r1,r1,#4 | ||
+ | moveq r0,r0,lsl #4 | ||
+ | |||
+ | tst r0,#0xc0000000 | ||
+ | addeq r1,r1,#2 | ||
+ | moveq r0,r0,lsl #2 | ||
+ | |||
+ | tst r0,#0x80000000 | ||
+ | addeq r1,r1,#1 | ||
+ | moveq r0,r0,lsl #1 | ||
+ | |||
+ | mov pc,r14 | ||
[[Category:Computing]] | [[Category:Computing]] |
Latest revision as of 05:26, 5 June 2011
This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction.
Minimum size
;R0 - value ;R1 = count of leading zeros ;Uses R0-R1 ;Worst case 159 cycles, best case 4 cycles mov r1,#0 loop movs r0,r0,lsl #1 addcc r1,r1,#1 bcc loop mov pc,r14
Maximum performance
;R0 - value ;R1 = count of leading zeros ;Uses R0-R1 ;16 cycles constant movs r1,r0,lsr #16 mov r1,#0 addeq r1,r1,#16 moveqs r0,r0,lsl #16 addeq r1,r1,#1 tst r0,#0xff000000 addeq r1,r1,#8 moveq r0,r0,lsl #8 tst r0,#0xf0000000 addeq r1,r1,#4 moveq r0,r0,lsl #4 tst r0,#0xc0000000 addeq r1,r1,#2 moveq r0,r0,lsl #2 tst r0,#0x80000000 addeq r1,r1,#1 mov pc,r14
Maximum performance with normalising
;R0 - value ;R1 = count of leading zeros ;Uses R0-R1 ;17 cycles constant movs r1,r0,lsr #16 mov r1,#0 addeq r1,r1,#16 moveqs r0,r0,lsl #16 addeq r1,r1,#1 tst r0,#0xff000000 addeq r1,r1,#8 moveq r0,r0,lsl #8 tst r0,#0xf0000000 addeq r1,r1,#4 moveq r0,r0,lsl #4 tst r0,#0xc0000000 addeq r1,r1,#2 moveq r0,r0,lsl #2 tst r0,#0x80000000 addeq r1,r1,#1 moveq r0,r0,lsl #1 mov pc,r14