ARM: Count leading zeros
From ScienceZero
This is using the old 32 bit instruction set, modern chips have a single cycle CLZ instruction.
Minimum size
;R0 - value
;R1 = count of leading zeros
;Uses R0-R1
;Worst case 159 cycles, best case 4 cycles
mov r1,#0
loop movs r0,r0,lsl #1
addcc r1,r1,#1
bcc loop
mov pc,r14
Maximum performance
;R0 - value
;R1 = count of leading zeros
;Uses R0-R1
;16 cycles constant
movs r1,r0,lsr #16
mov r1,#0
addeq r1,r1,#16
moveqs r0,r0,lsl #16
addeq r1,r1,#1
tst r0,#0xff000000
addeq r1,r1,#8
moveq r0,r0,lsl #8
tst r0,#0xf0000000
addeq r1,r1,#4
moveq r0,r0,lsl #4
tst r0,#0xc0000000
addeq r1,r1,#2
moveq r0,r0,lsl #2
tst r0,#0x80000000
addeq r1,r1,#1
mov pc,r14
Maximum performance with normalising
;R0 - value
;R1 = count of leading zeros
;Uses R0-R1
;17 cycles constant
movs r1,r0,lsr #16
mov r1,#0
addeq r1,r1,#16
moveqs r0,r0,lsl #16
addeq r1,r1,#1
tst r0,#0xff000000
addeq r1,r1,#8
moveq r0,r0,lsl #8
tst r0,#0xf0000000
addeq r1,r1,#4
moveq r0,r0,lsl #4
tst r0,#0xc0000000
addeq r1,r1,#2
moveq r0,r0,lsl #2
tst r0,#0x80000000
addeq r1,r1,#1
moveq r0,r0,lsl #1
mov pc,r14