Difference between revisions of "ARM: Count ones (bit count)"

From ScienceZero
Jump to: navigation, search
(Maximum performance)
(Maximum performance)
Line 44: Line 44:
  
  
;R0 - Value
+
;R0 - Value  
;R0 = Number of ones
+
;R0 = Number of ones
;Uses R0-R1
+
;Uses R0-R1
;Thumb-2 version
+
;Thumb-2 version
and r1,r0,#0xaaaaaaaa
+
and r1,r0,#0xaaaaaaaa
sub r0,r0,r1,lsr #1
+
sub r0,r0,r1,lsr #1
+
and r1,r0,#0xcccccccc
+
and r1,r0,#0xcccccccc
and r0,r0,#0x33333333
+
and r0,r0,#0x33333333
add r0,r0,r1,lsr #2
+
add r0,r0,r1,lsr #2
+
add r0,r0,r0,lsr #4
+
add r0,r0,r0,lsr #4
and r0,r0,#0x0f0f0f0f
+
and r0,r0,#0x0f0f0f0f
+
add r0,r0,r0,lsr #8
+
add r0,r0,r0,lsr #8
add r0,r0,r0,lsr #16
+
add r0,r0,r0,lsr #16
and r0,r0,#63
+
and r0,r0,#63
bx lr
+
bx lr
  
 
[[Category:Computing]]
 
[[Category:Computing]]

Revision as of 05:49, 11 May 2011

Related terms

  • Population-count
  • Hamming weight
  • Sideways addition


Minimum size

       ;R0 - value
       ;R1 = number of ones
       ;Uses R0-R1
       ;81 cycles worst case, 4 cycles best case, exit when r1=0
       mov 	r1,r0,lsr #31
loop   movs 	r0,r0,lsl #2	
       adc 	r1,r1,r0,lsr #31	
       bne 	loop
       mov 	pc,r14


Maximum performance

       ;R0 - value
       ;R0 = number of ones
       ;Uses R0-R5
       ;15 cycles constant, 10 cycles when masks can be generated outside the loop
       mov	r2,#0xff		;Masks
       orr	r2,r2,#0xff<<16		;00000000111111110000000011111111
       eor	r3,r2,r2,lsl #4		;00001111000011110000111100001111
       eor	r4,r3,r3,lsl #2		;00110011001100110011001100110011
       eor	r5,r4,r4,lsl #1		;01010101010101010101010101010101
       
       and	r1,r5,r0,lsr #1
       sub	r0,r0,r1
       
       and	r1,r4,r0,lsr #2
       and	r0,r4,r0
       add	r0,r0,r1
       
       add	r0,r0,r0,lsr #4
       and	r0,r0,r3
       
       add	r0,r0,r0,lsr #8
       add	r0,r0,r0,lsr #16
       and	r0,r0,#63
       mov	pc,r14


	;R0 - Value 
	;R0 = Number of ones
	;Uses R0-R1
	;Thumb-2 version
	and	r1,r0,#0xaaaaaaaa
	sub	r0,r0,r1,lsr #1
	
	and	r1,r0,#0xcccccccc
	and	r0,r0,#0x33333333
	add	r0,r0,r1,lsr #2
	
	add	r0,r0,r0,lsr #4
	and	r0,r0,#0x0f0f0f0f
	
	add	r0,r0,r0,lsr #8
	add	r0,r0,r0,lsr #16
	and	r0,r0,#63
	bx	lr