Difference between revisions of "ARM: Count ones (bit count)"
From ScienceZero
(New page: Minimum size ;R0 - value ;R1 = number of ones ;Uses R0-R1 ;81 cycles worst case, 4 cycles best case, exit when r1=0 mov r1,r0,lsr #31 loop movs ...) |
(→Maximum performance) |
||
(6 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
− | Minimum size | + | ==Related terms== |
+ | *Population-count | ||
+ | *Hamming weight | ||
+ | *Sideways addition | ||
+ | |||
+ | |||
+ | ==Minimum size== | ||
;R0 - value | ;R0 - value | ||
;R1 = number of ones | ;R1 = number of ones | ||
Line 11: | Line 17: | ||
− | Maximum performance | + | ==Maximum performance== |
;R0 - value | ;R0 - value | ||
;R0 = number of ones | ;R0 = number of ones | ||
Line 30: | Line 36: | ||
add r0,r0,r0,lsr #4 | add r0,r0,r0,lsr #4 | ||
− | and r0,r0, | + | and r0,r0,r3 |
add r0,r0,r0,lsr #8 | add r0,r0,r0,lsr #8 | ||
Line 36: | Line 42: | ||
and r0,r0,#63 | and r0,r0,#63 | ||
mov pc,r14 | mov pc,r14 | ||
+ | |||
+ | == Thumb-2 version == | ||
+ | ;R0 - Value | ||
+ | ;R0 = Number of ones | ||
+ | ;Uses R0-R1 | ||
+ | ;Thumb-2 version | ||
+ | and r1,r0,#0xaaaaaaaa | ||
+ | sub r0,r0,r1,lsr #1 | ||
+ | |||
+ | and r1,r0,#0xcccccccc | ||
+ | and r0,r0,#0x33333333 | ||
+ | add r0,r0,r1,lsr #2 | ||
+ | |||
+ | add r0,r0,r0,lsr #4 | ||
+ | and r0,r0,#0x0f0f0f0f | ||
+ | |||
+ | add r0,r0,r0,lsr #8 | ||
+ | add r0,r0,r0,lsr #16 | ||
+ | and r0,r0,#63 | ||
+ | bx lr | ||
[[Category:Computing]] | [[Category:Computing]] |
Latest revision as of 04:42, 15 May 2011
Related terms
- Population-count
- Hamming weight
- Sideways addition
Minimum size
;R0 - value ;R1 = number of ones ;Uses R0-R1 ;81 cycles worst case, 4 cycles best case, exit when r1=0 mov r1,r0,lsr #31 loop movs r0,r0,lsl #2 adc r1,r1,r0,lsr #31 bne loop mov pc,r14
Maximum performance
;R0 - value ;R0 = number of ones ;Uses R0-R5 ;15 cycles constant, 10 cycles when masks can be generated outside the loop mov r2,#0xff ;Masks orr r2,r2,#0xff<<16 ;00000000111111110000000011111111 eor r3,r2,r2,lsl #4 ;00001111000011110000111100001111 eor r4,r3,r3,lsl #2 ;00110011001100110011001100110011 eor r5,r4,r4,lsl #1 ;01010101010101010101010101010101 and r1,r5,r0,lsr #1 sub r0,r0,r1 and r1,r4,r0,lsr #2 and r0,r4,r0 add r0,r0,r1 add r0,r0,r0,lsr #4 and r0,r0,r3 add r0,r0,r0,lsr #8 add r0,r0,r0,lsr #16 and r0,r0,#63 mov pc,r14
Thumb-2 version
;R0 - Value ;R0 = Number of ones ;Uses R0-R1 ;Thumb-2 version and r1,r0,#0xaaaaaaaa sub r0,r0,r1,lsr #1 and r1,r0,#0xcccccccc and r0,r0,#0x33333333 add r0,r0,r1,lsr #2 add r0,r0,r0,lsr #4 and r0,r0,#0x0f0f0f0f add r0,r0,r0,lsr #8 add r0,r0,r0,lsr #16 and r0,r0,#63 bx lr