Commit 2c57a0e2 authored by Yury Norov's avatar Yury Norov Committed by Linus Torvalds

lib: find_*_bit reimplementation

This patchset does rework to find_bit function family to achieve better
performance, and decrease size of text.  All rework is done in patch 1.
Patches 2 and 3 are about code moving and renaming.

It was boot-tested on x86_64 and MIPS (big-endian) machines.
Performance tests were ran on userspace with code like this:

	/* addr[] is filled from /dev/urandom */
	start = clock();
	while (ret < nbits)
		ret = find_next_bit(addr, nbits, ret + 1);

	end = clock();
	printf("%ld\t", (unsigned long) end - start);

On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz measurements are: (for
find_next_bit, nbits is 8M, for find_first_bit - 80K)

	find_next_bit:		find_first_bit:
	new	current		new	current
	26932	43151		14777	14925
	26947	43182		14521	15423
	26507	43824		15053	14705
	27329	43759		14473	14777
	26895	43367		14847	15023
	26990	43693		15103	15163
	26775	43299		15067	15232
	27282	42752		14544	15121
	27504	43088		14644	14858
	26761	43856		14699	15193
	26692	43075		14781	14681
	27137	42969		14451	15061
	...			...

find_next_bit performance gain is 35-40%;
find_first_bit - no measurable difference.

On ARM machine, there is arch-specific implementation for find_bit.

Thanks a lot to George Spelvin and Rasmus Villemoes for hints and
helpful discussions.

This patch (of 3):

New implementations takes less space in source file (see diffstat) and in
object.  For me it's 710 vs 453 bytes of text.  It also shows better
performance.

find_last_bit description fixed due to obvious typo.

[akpm@linux-foundation.org: include linux/bitmap.h, per Rasmus]
Signed-off-by: default avatarYury Norov <yury.norov@gmail.com>
Reviewed-by: default avatarRasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: default avatarGeorge Spelvin <linux@horizon.com>
Cc: Alexey Klimov <klimov.linux@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Valentin Rothberg <valentinrothberg@gmail.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 396ada68
...@@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) ...@@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word)
/** /**
* find_last_bit - find the last set bit in a memory region * find_last_bit - find the last set bit in a memory region
* @addr: The address to start the search at * @addr: The address to start the search at
* @size: The maximum size to search * @size: The number of bits to search
* *
* Returns the bit number of the first set bit, or size. * Returns the bit number of the last set bit, or size.
*/ */
extern unsigned long find_last_bit(const unsigned long *addr, extern unsigned long find_last_bit(const unsigned long *addr,
unsigned long size); unsigned long size);
......
...@@ -4,6 +4,9 @@ ...@@ -4,6 +4,9 @@
* Written by Rusty Russell <rusty@rustcorp.com.au> * Written by Rusty Russell <rusty@rustcorp.com.au>
* (Inspired by David Howell's find_next_bit implementation) * (Inspired by David Howell's find_next_bit implementation)
* *
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
* size and improve performance, 2015.
*
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
...@@ -11,37 +14,26 @@ ...@@ -11,37 +14,26 @@
*/ */
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/bitmap.h>
#include <linux/export.h> #include <linux/export.h>
#include <asm/types.h> #include <linux/kernel.h>
#include <asm/byteorder.h>
#ifndef find_last_bit #ifndef find_last_bit
unsigned long find_last_bit(const unsigned long *addr, unsigned long size) unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{ {
unsigned long words; if (size) {
unsigned long tmp; unsigned long val = BITMAP_LAST_WORD_MASK(size);
unsigned long idx = (size-1) / BITS_PER_LONG;
/* Start at final word. */
words = size / BITS_PER_LONG;
/* Partial final word? */ do {
if (size & (BITS_PER_LONG-1)) { val &= addr[idx];
tmp = (addr[words] & (~0UL >> (BITS_PER_LONG if (val)
- (size & (BITS_PER_LONG-1))))); return idx * BITS_PER_LONG + __fls(val);
if (tmp)
goto found;
}
while (words) { val = ~0ul;
tmp = addr[--words]; } while (idx--);
if (tmp) {
found:
return words * BITS_PER_LONG + __fls(tmp);
}
} }
/* Not found */
return size; return size;
} }
EXPORT_SYMBOL(find_last_bit); EXPORT_SYMBOL(find_last_bit);
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com) * Written by David Howells (dhowells@redhat.com)
* *
* Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
* size and improve performance, 2015.
*
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
...@@ -11,98 +14,58 @@ ...@@ -11,98 +14,58 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/export.h> #include <linux/export.h>
#include <asm/types.h> #include <linux/kernel.h>
#include <asm/byteorder.h>
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) #if !defined(find_next_bit) || !defined(find_next_zero_bit)
#ifndef find_next_bit
/* /*
* Find the next set bit in a memory region. * This is a common helper function for find_next_bit and
* find_next_zero_bit. The difference is the "invert" argument, which
* is XORed with each fetched word before searching it for one bits.
*/ */
unsigned long find_next_bit(const unsigned long *addr, unsigned long size, static unsigned long _find_next_bit(const unsigned long *addr,
unsigned long offset) unsigned long nbits, unsigned long start, unsigned long invert)
{ {
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp; unsigned long tmp;
if (offset >= size) if (!nbits || start >= nbits)
return size; return nbits;
size -= result;
offset %= BITS_PER_LONG; tmp = addr[start / BITS_PER_LONG] ^ invert;
if (offset) {
tmp = *(p++); /* Handle 1st word. */
tmp &= (~0UL << offset); tmp &= BITMAP_FIRST_WORD_MASK(start);
if (size < BITS_PER_LONG) start = round_down(start, BITS_PER_LONG);
goto found_first;
if (tmp) while (!tmp) {
goto found_middle; start += BITS_PER_LONG;
size -= BITS_PER_LONG; if (start >= nbits)
result += BITS_PER_LONG; return nbits;
}
while (size & ~(BITS_PER_LONG-1)) { tmp = addr[start / BITS_PER_LONG] ^ invert;
if ((tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
} }
if (!size)
return result;
tmp = *p;
found_first: return min(start + __ffs(tmp), nbits);
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
} }
EXPORT_SYMBOL(find_next_bit);
#endif #endif
#ifndef find_next_zero_bit #ifndef find_next_bit
/* /*
* This implementation of find_{first,next}_zero_bit was stolen from * Find the next set bit in a memory region.
* Linus' asm-alpha/bitops.h.
*/ */
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
return _find_next_bit(addr, size, offset, 0UL);
}
EXPORT_SYMBOL(find_next_bit);
#endif
#ifndef find_next_zero_bit
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset) unsigned long offset)
{ {
const unsigned long *p = addr + BITOP_WORD(offset); return _find_next_bit(addr, size, offset, ~0UL);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp |= ~0UL >> (BITS_PER_LONG - offset);
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if (~(tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found_middle:
return result + ffz(tmp);
} }
EXPORT_SYMBOL(find_next_zero_bit); EXPORT_SYMBOL(find_next_zero_bit);
#endif #endif
...@@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit); ...@@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
*/ */
unsigned long find_first_bit(const unsigned long *addr, unsigned long size) unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{ {
const unsigned long *p = addr; unsigned long idx;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) { for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
if ((tmp = *(p++))) if (addr[idx])
goto found; return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
} }
if (!size)
return result;
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); return size;
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found:
return result + __ffs(tmp);
} }
EXPORT_SYMBOL(find_first_bit); EXPORT_SYMBOL(find_first_bit);
#endif #endif
...@@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit); ...@@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
*/ */
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
{ {
const unsigned long *p = addr; unsigned long idx;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) { for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
if (~(tmp = *(p++))) if (addr[idx] != ~0UL)
goto found; return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
} }
if (!size)
return result;
tmp = (*p) | (~0UL << size); return size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found:
return result + ffz(tmp);
} }
EXPORT_SYMBOL(find_first_zero_bit); EXPORT_SYMBOL(find_first_zero_bit);
#endif #endif
...@@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit); ...@@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
#ifdef __BIG_ENDIAN #ifdef __BIG_ENDIAN
/* include/linux/byteorder does not support "unsigned long" type */ /* include/linux/byteorder does not support "unsigned long" type */
static inline unsigned long ext2_swabp(const unsigned long * x)
{
#if BITS_PER_LONG == 64
return (unsigned long) __swab64p((u64 *) x);
#elif BITS_PER_LONG == 32
return (unsigned long) __swab32p((u32 *) x);
#else
#error BITS_PER_LONG not defined
#endif
}
/* include/linux/byteorder doesn't support "unsigned long" type */
static inline unsigned long ext2_swab(const unsigned long y) static inline unsigned long ext2_swab(const unsigned long y)
{ {
#if BITS_PER_LONG == 64 #if BITS_PER_LONG == 64
...@@ -189,48 +120,38 @@ static inline unsigned long ext2_swab(const unsigned long y) ...@@ -189,48 +120,38 @@ static inline unsigned long ext2_swab(const unsigned long y)
#endif #endif
} }
#ifndef find_next_zero_bit_le #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
unsigned long find_next_zero_bit_le(const void *addr, unsigned static unsigned long _find_next_bit_le(const unsigned long *addr,
long size, unsigned long offset) unsigned long nbits, unsigned long start, unsigned long invert)
{ {
const unsigned long *p = addr;
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp; unsigned long tmp;
if (offset >= size) if (!nbits || start >= nbits)
return size; return nbits;
p += BITOP_WORD(offset);
size -= result; tmp = addr[start / BITS_PER_LONG] ^ invert;
offset &= (BITS_PER_LONG - 1UL);
if (offset) { /* Handle 1st word. */
tmp = ext2_swabp(p++); tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
tmp |= (~0UL >> (BITS_PER_LONG - offset)); start = round_down(start, BITS_PER_LONG);
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) { while (!tmp) {
if (~(tmp = *(p++))) start += BITS_PER_LONG;
goto found_middle_swap; if (start >= nbits)
result += BITS_PER_LONG; return nbits;
size -= BITS_PER_LONG;
tmp = addr[start / BITS_PER_LONG] ^ invert;
} }
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. Skip ffz */
found_middle:
return result + ffz(tmp);
found_middle_swap: return min(start + __ffs(ext2_swab(tmp)), nbits);
return result + ffz(ext2_swab(tmp)); }
#endif
#ifndef find_next_zero_bit_le
unsigned long find_next_zero_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
return _find_next_bit_le(addr, size, offset, ~0UL);
} }
EXPORT_SYMBOL(find_next_zero_bit_le); EXPORT_SYMBOL(find_next_zero_bit_le);
#endif #endif
...@@ -239,45 +160,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le); ...@@ -239,45 +160,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
unsigned long find_next_bit_le(const void *addr, unsigned unsigned long find_next_bit_le(const void *addr, unsigned
long size, unsigned long offset) long size, unsigned long offset)
{ {
const unsigned long *p = addr; return _find_next_bit_le(addr, size, offset, 0UL);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
if (offset >= size)
return size;
p += BITOP_WORD(offset);
size -= result;
offset &= (BITS_PER_LONG - 1UL);
if (offset) {
tmp = ext2_swabp(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) {
tmp = *(p++);
if (tmp)
goto found_middle_swap;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
found_middle_swap:
return result + __ffs(ext2_swab(tmp));
} }
EXPORT_SYMBOL(find_next_bit_le); EXPORT_SYMBOL(find_next_bit_le);
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment