Difference between revisions of "ARM: Count ones (bit count)"

From ScienceZero
Jump to: navigation, search
(Maximum performance)
 
(4 intermediate revisions by the same user not shown)
Line 1: Line 1:
Population-count
+
==Related terms==
Sideways addition
+
*Population-count
Hamming weight
+
*Hamming weight
 +
*Sideways addition
  
Minimum size
+
 
 +
==Minimum size==
 
         ;R0 - value
 
         ;R0 - value
 
         ;R1 = number of ones
 
         ;R1 = number of ones
Line 15: Line 17:
  
  
Maximum performance
+
==Maximum performance==
 
         ;R0 - value
 
         ;R0 - value
 
         ;R0 = number of ones
 
         ;R0 = number of ones
Line 34: Line 36:
 
          
 
          
 
         add r0,r0,r0,lsr #4
 
         add r0,r0,r0,lsr #4
         and r0,r0,r2
+
         and r0,r0,r3
 
          
 
          
 
         add r0,r0,r0,lsr #8
 
         add r0,r0,r0,lsr #8
Line 40: Line 42:
 
         and r0,r0,#63
 
         and r0,r0,#63
 
         mov pc,r14
 
         mov pc,r14
 +
 +
== Thumb-2 version ==
 +
;R0 - Value
 +
;R0 = Number of ones
 +
;Uses R0-R1
 +
;Thumb-2 version
 +
and r1,r0,#0xaaaaaaaa
 +
sub r0,r0,r1,lsr #1
 +
 +
and r1,r0,#0xcccccccc
 +
and r0,r0,#0x33333333
 +
add r0,r0,r1,lsr #2
 +
 +
add r0,r0,r0,lsr #4
 +
and r0,r0,#0x0f0f0f0f
 +
 +
add r0,r0,r0,lsr #8
 +
add r0,r0,r0,lsr #16
 +
and r0,r0,#63
 +
bx lr
  
 
[[Category:Computing]]
 
[[Category:Computing]]

Latest revision as of 04:42, 15 May 2011

Related terms

  • Population-count
  • Hamming weight
  • Sideways addition


Minimum size

       ;R0 - value
       ;R1 = number of ones
       ;Uses R0-R1
       ;81 cycles worst case, 4 cycles best case, exit when r1=0
       mov 	r1,r0,lsr #31
loop   movs 	r0,r0,lsl #2	
       adc 	r1,r1,r0,lsr #31	
       bne 	loop
       mov 	pc,r14


Maximum performance

       ;R0 - value
       ;R0 = number of ones
       ;Uses R0-R5
       ;15 cycles constant, 10 cycles when masks can be generated outside the loop
       mov	r2,#0xff		;Masks
       orr	r2,r2,#0xff<<16		;00000000111111110000000011111111
       eor	r3,r2,r2,lsl #4		;00001111000011110000111100001111
       eor	r4,r3,r3,lsl #2		;00110011001100110011001100110011
       eor	r5,r4,r4,lsl #1		;01010101010101010101010101010101
       
       and	r1,r5,r0,lsr #1
       sub	r0,r0,r1
       
       and	r1,r4,r0,lsr #2
       and	r0,r4,r0
       add	r0,r0,r1
       
       add	r0,r0,r0,lsr #4
       and	r0,r0,r3
       
       add	r0,r0,r0,lsr #8
       add	r0,r0,r0,lsr #16
       and	r0,r0,#63
       mov	pc,r14

Thumb-2 version

	;R0 - Value 
	;R0 = Number of ones
	;Uses R0-R1
	;Thumb-2 version
	and	r1,r0,#0xaaaaaaaa
	sub	r0,r0,r1,lsr #1
	
	and	r1,r0,#0xcccccccc
	and	r0,r0,#0x33333333
	add	r0,r0,r1,lsr #2
	
	add	r0,r0,r0,lsr #4
	and	r0,r0,#0x0f0f0f0f
	
	add	r0,r0,r0,lsr #8
	add	r0,r0,r0,lsr #16
	and	r0,r0,#63
	bx	lr