ARM: Count ones (bit count)

From ScienceZero
Revision as of 05:49, 11 May 2011 by Bjoern (Talk | contribs) (Maximum performance)

Jump to: navigation, search

Related terms

  • Population-count
  • Hamming weight
  • Sideways addition


Minimum size

       ;R0 - value
       ;R1 = number of ones
       ;Uses R0-R1
       ;81 cycles worst case, 4 cycles best case, exit when r1=0
       mov 	r1,r0,lsr #31
loop   movs 	r0,r0,lsl #2	
       adc 	r1,r1,r0,lsr #31	
       bne 	loop
       mov 	pc,r14


Maximum performance

       ;R0 - value
       ;R0 = number of ones
       ;Uses R0-R5
       ;15 cycles constant, 10 cycles when masks can be generated outside the loop
       mov	r2,#0xff		;Masks
       orr	r2,r2,#0xff<<16		;00000000111111110000000011111111
       eor	r3,r2,r2,lsl #4		;00001111000011110000111100001111
       eor	r4,r3,r3,lsl #2		;00110011001100110011001100110011
       eor	r5,r4,r4,lsl #1		;01010101010101010101010101010101
       
       and	r1,r5,r0,lsr #1
       sub	r0,r0,r1
       
       and	r1,r4,r0,lsr #2
       and	r0,r4,r0
       add	r0,r0,r1
       
       add	r0,r0,r0,lsr #4
       and	r0,r0,r3
       
       add	r0,r0,r0,lsr #8
       add	r0,r0,r0,lsr #16
       and	r0,r0,#63
       mov	pc,r14


;R0 - Value ;R0 = Number of ones ;Uses R0-R1 ;Thumb-2 version and r1,r0,#0xaaaaaaaa sub r0,r0,r1,lsr #1

and r1,r0,#0xcccccccc and r0,r0,#0x33333333 add r0,r0,r1,lsr #2

add r0,r0,r0,lsr #4 and r0,r0,#0x0f0f0f0f

add r0,r0,r0,lsr #8 add r0,r0,r0,lsr #16 and r0,r0,#63 bx lr