Skip to content

Commit fed3819

Browse files
committed
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC cleanups from Eric Biggers: "Simplify the kconfig options for controlling which CRC implementations are built into the kernel, as was requested by Linus. This means making the option to disable the arch code visible only when CONFIG_EXPERT=y, and standardizing on a single generic implementation of CRC32" * tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: lib/crc32: remove other generic implementations lib/crc: simplify the kconfig options for CRC implementations
2 parents af13ff1 + 5e3c1c4 commit fed3819

File tree

4 files changed

+53
-462
lines changed

4 files changed

+53
-462
lines changed

lib/Kconfig

Lines changed: 13 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -164,34 +164,9 @@ config CRC_T10DIF
164164
config ARCH_HAS_CRC_T10DIF
165165
bool
166166

167-
choice
168-
prompt "CRC-T10DIF implementation"
169-
depends on CRC_T10DIF
170-
default CRC_T10DIF_IMPL_ARCH if ARCH_HAS_CRC_T10DIF
171-
default CRC_T10DIF_IMPL_GENERIC if !ARCH_HAS_CRC_T10DIF
172-
help
173-
This option allows you to override the default choice of CRC-T10DIF
174-
implementation.
175-
176-
config CRC_T10DIF_IMPL_ARCH
177-
bool "Architecture-optimized" if ARCH_HAS_CRC_T10DIF
178-
help
179-
Use the optimized implementation of CRC-T10DIF for the selected
180-
architecture. It is recommended to keep this enabled, as it can
181-
greatly improve CRC-T10DIF performance.
182-
183-
config CRC_T10DIF_IMPL_GENERIC
184-
bool "Generic implementation"
185-
help
186-
Use the generic table-based implementation of CRC-T10DIF. Selecting
187-
this will reduce code size slightly but can greatly reduce CRC-T10DIF
188-
performance.
189-
190-
endchoice
191-
192167
config CRC_T10DIF_ARCH
193168
tristate
194-
default CRC_T10DIF if CRC_T10DIF_IMPL_ARCH
169+
default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS
195170

196171
config CRC64_ROCKSOFT
197172
tristate "CRC calculation for the Rocksoft model CRC64"
@@ -223,87 +198,9 @@ config CRC32
223198
config ARCH_HAS_CRC32
224199
bool
225200

226-
choice
227-
prompt "CRC32 implementation"
228-
depends on CRC32
229-
default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32
230-
default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32
231-
help
232-
This option allows you to override the default choice of CRC32
233-
implementation. Choose the default unless you know that you need one
234-
of the others.
235-
236-
config CRC32_IMPL_ARCH_PLUS_SLICEBY8
237-
bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32
238-
help
239-
Use architecture-optimized implementation of CRC32. Fall back to
240-
slice-by-8 in cases where the arch-optimized implementation cannot be
241-
used, e.g. if the CPU lacks support for the needed instructions.
242-
243-
This is the default when an arch-optimized implementation exists.
244-
245-
config CRC32_IMPL_ARCH_PLUS_SLICEBY1
246-
bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32
247-
help
248-
Use architecture-optimized implementation of CRC32, but fall back to
249-
slice-by-1 instead of slice-by-8 in order to reduce the binary size.
250-
251-
config CRC32_IMPL_SLICEBY8
252-
bool "Slice by 8 bytes"
253-
help
254-
Calculate checksum 8 bytes at a time with a clever slicing algorithm.
255-
This is much slower than the architecture-optimized implementation of
256-
CRC32 (if the selected arch has one), but it is portable and is the
257-
fastest implementation when no arch-optimized implementation is
258-
available. It uses an 8KiB lookup table. Most modern processors have
259-
enough cache to hold this table without thrashing the cache.
260-
261-
config CRC32_IMPL_SLICEBY4
262-
bool "Slice by 4 bytes"
263-
help
264-
Calculate checksum 4 bytes at a time with a clever slicing algorithm.
265-
This is a bit slower than slice by 8, but has a smaller 4KiB lookup
266-
table.
267-
268-
Only choose this option if you know what you are doing.
269-
270-
config CRC32_IMPL_SLICEBY1
271-
bool "Slice by 1 byte (Sarwate's algorithm)"
272-
help
273-
Calculate checksum a byte at a time using Sarwate's algorithm. This
274-
is not particularly fast, but has a small 1KiB lookup table.
275-
276-
Only choose this option if you know what you are doing.
277-
278-
config CRC32_IMPL_BIT
279-
bool "Classic Algorithm (one bit at a time)"
280-
help
281-
Calculate checksum one bit at a time. This is VERY slow, but has
282-
no lookup table. This is provided as a debugging option.
283-
284-
Only choose this option if you are debugging crc32.
285-
286-
endchoice
287-
288201
config CRC32_ARCH
289202
tristate
290-
default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
291-
292-
config CRC32_SLICEBY8
293-
bool
294-
default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8
295-
296-
config CRC32_SLICEBY4
297-
bool
298-
default y if CRC32_IMPL_SLICEBY4
299-
300-
config CRC32_SARWATE
301-
bool
302-
default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
303-
304-
config CRC32_BIT
305-
bool
306-
default y if CRC32_IMPL_BIT
203+
default CRC32 if ARCH_HAS_CRC32 && CRC_OPTIMIZATIONS
307204

308205
config CRC64
309206
tristate "CRC64 functions"
@@ -343,6 +240,17 @@ config CRC8
343240
when they need to do cyclic redundancy check according CRC8
344241
algorithm. Module will be called crc8.
345242

243+
config CRC_OPTIMIZATIONS
244+
bool "Enable optimized CRC implementations" if EXPERT
245+
default y
246+
help
247+
Disabling this option reduces code size slightly by disabling the
248+
architecture-optimized implementations of any CRC variants that are
249+
enabled. CRC checksumming performance may get much slower.
250+
251+
Keep this enabled unless you're really trying to minimize the size of
252+
the kernel.
253+
346254
config XXHASH
347255
tristate
348256

lib/crc32.c

Lines changed: 10 additions & 215 deletions
Original file line numberDiff line numberDiff line change
@@ -30,178 +30,27 @@
3030
#include <linux/crc32poly.h>
3131
#include <linux/module.h>
3232
#include <linux/types.h>
33-
#include <linux/sched.h>
34-
#include "crc32defs.h"
35-
36-
#if CRC_LE_BITS > 8
37-
# define tole(x) ((__force u32) cpu_to_le32(x))
38-
#else
39-
# define tole(x) (x)
40-
#endif
41-
42-
#if CRC_BE_BITS > 8
43-
# define tobe(x) ((__force u32) cpu_to_be32(x))
44-
#else
45-
# define tobe(x) (x)
46-
#endif
4733

4834
#include "crc32table.h"
4935

5036
MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
5137
MODULE_DESCRIPTION("Various CRC32 calculations");
5238
MODULE_LICENSE("GPL");
5339

54-
#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
55-
56-
/* implements slicing-by-4 or slicing-by-8 algorithm */
57-
static inline u32 __pure
58-
crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
59-
{
60-
# ifdef __LITTLE_ENDIAN
61-
# define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
62-
# define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \
63-
t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255])
64-
# define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \
65-
t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255])
66-
# else
67-
# define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
68-
# define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \
69-
t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255])
70-
# define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \
71-
t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255])
72-
# endif
73-
const u32 *b;
74-
size_t rem_len;
75-
# ifdef CONFIG_X86
76-
size_t i;
77-
# endif
78-
const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
79-
# if CRC_LE_BITS != 32
80-
const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
81-
# endif
82-
u32 q;
83-
84-
/* Align it */
85-
if (unlikely((long)buf & 3 && len)) {
86-
do {
87-
DO_CRC(*buf++);
88-
} while ((--len) && ((long)buf)&3);
89-
}
90-
91-
# if CRC_LE_BITS == 32
92-
rem_len = len & 3;
93-
len = len >> 2;
94-
# else
95-
rem_len = len & 7;
96-
len = len >> 3;
97-
# endif
98-
99-
b = (const u32 *)buf;
100-
# ifdef CONFIG_X86
101-
--b;
102-
for (i = 0; i < len; i++) {
103-
# else
104-
for (--b; len; --len) {
105-
# endif
106-
q = crc ^ *++b; /* use pre increment for speed */
107-
# if CRC_LE_BITS == 32
108-
crc = DO_CRC4;
109-
# else
110-
crc = DO_CRC8;
111-
q = *++b;
112-
crc ^= DO_CRC4;
113-
# endif
114-
}
115-
len = rem_len;
116-
/* And the last few bytes */
117-
if (len) {
118-
u8 *p = (u8 *)(b + 1) - 1;
119-
# ifdef CONFIG_X86
120-
for (i = 0; i < len; i++)
121-
DO_CRC(*++p); /* use pre increment for speed */
122-
# else
123-
do {
124-
DO_CRC(*++p); /* use pre increment for speed */
125-
} while (--len);
126-
# endif
127-
}
128-
return crc;
129-
#undef DO_CRC
130-
#undef DO_CRC4
131-
#undef DO_CRC8
132-
}
133-
#endif
134-
135-
136-
/**
137-
* crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
138-
* CRC32/CRC32C
139-
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other
140-
* uses, or the previous crc32/crc32c value if computing incrementally.
141-
* @p: pointer to buffer over which CRC32/CRC32C is run
142-
* @len: length of buffer @p
143-
* @tab: little-endian Ethernet table
144-
* @polynomial: CRC32/CRC32c LE polynomial
145-
*/
146-
static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
147-
size_t len, const u32 (*tab)[256],
148-
u32 polynomial)
40+
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
14941
{
150-
#if CRC_LE_BITS == 1
151-
int i;
152-
while (len--) {
153-
crc ^= *p++;
154-
for (i = 0; i < 8; i++)
155-
crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
156-
}
157-
# elif CRC_LE_BITS == 2
158-
while (len--) {
159-
crc ^= *p++;
160-
crc = (crc >> 2) ^ tab[0][crc & 3];
161-
crc = (crc >> 2) ^ tab[0][crc & 3];
162-
crc = (crc >> 2) ^ tab[0][crc & 3];
163-
crc = (crc >> 2) ^ tab[0][crc & 3];
164-
}
165-
# elif CRC_LE_BITS == 4
166-
while (len--) {
167-
crc ^= *p++;
168-
crc = (crc >> 4) ^ tab[0][crc & 15];
169-
crc = (crc >> 4) ^ tab[0][crc & 15];
170-
}
171-
# elif CRC_LE_BITS == 8
172-
/* aka Sarwate algorithm */
173-
while (len--) {
174-
crc ^= *p++;
175-
crc = (crc >> 8) ^ tab[0][crc & 255];
176-
}
177-
# else
178-
crc = (__force u32) __cpu_to_le32(crc);
179-
crc = crc32_body(crc, p, len, tab);
180-
crc = __le32_to_cpu((__force __le32)crc);
181-
#endif
42+
while (len--)
43+
crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++];
18244
return crc;
18345
}
46+
EXPORT_SYMBOL(crc32_le_base);
18447

185-
#if CRC_LE_BITS == 1
186-
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
187-
{
188-
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
189-
}
190-
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
191-
{
192-
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
193-
}
194-
#else
195-
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
196-
{
197-
return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
198-
}
19948
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
20049
{
201-
return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
50+
while (len--)
51+
crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++];
52+
return crc;
20253
}
203-
#endif
204-
EXPORT_SYMBOL(crc32_le_base);
20554
EXPORT_SYMBOL(crc32c_le_base);
20655

20756
/*
@@ -277,64 +126,10 @@ u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
277126
EXPORT_SYMBOL(crc32_le_shift);
278127
EXPORT_SYMBOL(__crc32c_le_shift);
279128

280-
/**
281-
* crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
282-
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
283-
* other uses, or the previous crc32 value if computing incrementally.
284-
* @p: pointer to buffer over which CRC32 is run
285-
* @len: length of buffer @p
286-
* @tab: big-endian Ethernet table
287-
* @polynomial: CRC32 BE polynomial
288-
*/
289-
static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
290-
size_t len, const u32 (*tab)[256],
291-
u32 polynomial)
292-
{
293-
#if CRC_BE_BITS == 1
294-
int i;
295-
while (len--) {
296-
crc ^= *p++ << 24;
297-
for (i = 0; i < 8; i++)
298-
crc =
299-
(crc << 1) ^ ((crc & 0x80000000) ? polynomial :
300-
0);
301-
}
302-
# elif CRC_BE_BITS == 2
303-
while (len--) {
304-
crc ^= *p++ << 24;
305-
crc = (crc << 2) ^ tab[0][crc >> 30];
306-
crc = (crc << 2) ^ tab[0][crc >> 30];
307-
crc = (crc << 2) ^ tab[0][crc >> 30];
308-
crc = (crc << 2) ^ tab[0][crc >> 30];
309-
}
310-
# elif CRC_BE_BITS == 4
311-
while (len--) {
312-
crc ^= *p++ << 24;
313-
crc = (crc << 4) ^ tab[0][crc >> 28];
314-
crc = (crc << 4) ^ tab[0][crc >> 28];
315-
}
316-
# elif CRC_BE_BITS == 8
317-
while (len--) {
318-
crc ^= *p++ << 24;
319-
crc = (crc << 8) ^ tab[0][crc >> 24];
320-
}
321-
# else
322-
crc = (__force u32) __cpu_to_be32(crc);
323-
crc = crc32_body(crc, p, len, tab);
324-
crc = __be32_to_cpu((__force __be32)crc);
325-
# endif
326-
return crc;
327-
}
328-
329-
#if CRC_BE_BITS == 1
330-
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
331-
{
332-
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
333-
}
334-
#else
335129
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
336130
{
337-
return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
131+
while (len--)
132+
crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++];
133+
return crc;
338134
}
339-
#endif
340135
EXPORT_SYMBOL(crc32_be_base);

0 commit comments

Comments
 (0)