|
1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */
|
2 | 2 | /*
|
3 |
| - * Accelerated CRC32(C) using AArch64 CRC instructions |
| 3 | + * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions |
4 | 4 | *
|
5 |
| - * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> |
| 5 | + * Copyright (C) 2016 - 2018 Linaro Ltd. |
| 6 | + * Copyright (C) 2024 Google LLC |
| 7 | + * |
| 8 | + * Author: Ard Biesheuvel <ardb@kernel.org> |
6 | 9 | */
|
7 | 10 |
|
8 | 11 | #include <linux/linkage.h>
|
9 | 12 | #include <asm/assembler.h>
|
10 | 13 |
|
11 |
| - .arch armv8-a+crc |
| 14 | + .cpu generic+crc+crypto |
12 | 15 |
|
13 | 16 | .macro bitle, reg
|
14 | 17 | .endm
|
@@ -135,3 +138,225 @@ SYM_FUNC_END(crc32c_le_arm64)
|
135 | 138 | SYM_FUNC_START(crc32_be_arm64)
|
136 | 139 | __crc32 order=be
|
137 | 140 | SYM_FUNC_END(crc32_be_arm64)
|
| 141 | + |
| 142 | + in .req x1 |
| 143 | + len .req x2 |
| 144 | + |
| 145 | + /* |
| 146 | + * w0: input CRC at entry, output CRC at exit |
| 147 | + * x1: pointer to input buffer |
| 148 | + * x2: length of input in bytes |
| 149 | + */ |
| 150 | + .macro crc4way, insn, table, order=le |
| 151 | + bit\order w0 |
| 152 | + lsr len, len, #6 // len := # of 64-byte blocks |
| 153 | + |
| 154 | + /* Process up to 64 blocks of 64 bytes at a time */ |
| 155 | +.La\@: mov x3, #64 |
| 156 | + cmp len, #64 |
| 157 | + csel x3, x3, len, hi // x3 := min(len, 64) |
| 158 | + sub len, len, x3 |
| 159 | + |
| 160 | + /* Divide the input into 4 contiguous blocks */ |
| 161 | + add x4, x3, x3, lsl #1 // x4 := 3 * x3 |
| 162 | + add x7, in, x3, lsl #4 // x7 := in + 16 * x3 |
| 163 | + add x8, in, x3, lsl #5 // x8 := in + 32 * x3 |
| 164 | + add x9, in, x4, lsl #4 // x9 := in + 16 * x4 |
| 165 | + |
| 166 | + /* Load the folding coefficients from the lookup table */ |
| 167 | + adr_l x5, \table - 12 // entry 0 omitted |
| 168 | + add x5, x5, x4, lsl #2 // x5 += 12 * x3 |
| 169 | + ldp s0, s1, [x5] |
| 170 | + ldr s2, [x5, #8] |
| 171 | + |
| 172 | + /* Zero init partial CRCs for this iteration */ |
| 173 | + mov w4, wzr |
| 174 | + mov w5, wzr |
| 175 | + mov w6, wzr |
| 176 | + mov x17, xzr |
| 177 | + |
| 178 | +.Lb\@: sub x3, x3, #1 |
| 179 | + \insn w6, w6, x17 |
| 180 | + ldp x10, x11, [in], #16 |
| 181 | + ldp x12, x13, [x7], #16 |
| 182 | + ldp x14, x15, [x8], #16 |
| 183 | + ldp x16, x17, [x9], #16 |
| 184 | + |
| 185 | + \order x10, x11, x12, x13, x14, x15, x16, x17 |
| 186 | + |
| 187 | + /* Apply the CRC transform to 4 16-byte blocks in parallel */ |
| 188 | + \insn w0, w0, x10 |
| 189 | + \insn w4, w4, x12 |
| 190 | + \insn w5, w5, x14 |
| 191 | + \insn w6, w6, x16 |
| 192 | + \insn w0, w0, x11 |
| 193 | + \insn w4, w4, x13 |
| 194 | + \insn w5, w5, x15 |
| 195 | + cbnz x3, .Lb\@ |
| 196 | + |
| 197 | + /* Combine the 4 partial results into w0 */ |
| 198 | + mov v3.d[0], x0 |
| 199 | + mov v4.d[0], x4 |
| 200 | + mov v5.d[0], x5 |
| 201 | + pmull v0.1q, v0.1d, v3.1d |
| 202 | + pmull v1.1q, v1.1d, v4.1d |
| 203 | + pmull v2.1q, v2.1d, v5.1d |
| 204 | + eor v0.8b, v0.8b, v1.8b |
| 205 | + eor v0.8b, v0.8b, v2.8b |
| 206 | + mov x5, v0.d[0] |
| 207 | + eor x5, x5, x17 |
| 208 | + \insn w0, w6, x5 |
| 209 | + |
| 210 | + mov in, x9 |
| 211 | + cbnz len, .La\@ |
| 212 | + |
| 213 | + bit\order w0 |
| 214 | + ret |
| 215 | + .endm |
| 216 | + |
| 217 | + .align 5 |
| 218 | +SYM_FUNC_START(crc32c_le_arm64_4way) |
| 219 | + crc4way crc32cx, .L0 |
| 220 | +SYM_FUNC_END(crc32c_le_arm64_4way) |
| 221 | + |
| 222 | + .align 5 |
| 223 | +SYM_FUNC_START(crc32_le_arm64_4way) |
| 224 | + crc4way crc32x, .L1 |
| 225 | +SYM_FUNC_END(crc32_le_arm64_4way) |
| 226 | + |
| 227 | + .align 5 |
| 228 | +SYM_FUNC_START(crc32_be_arm64_4way) |
| 229 | + crc4way crc32x, .L1, be |
| 230 | +SYM_FUNC_END(crc32_be_arm64_4way) |
| 231 | + |
| 232 | + .section .rodata, "a", %progbits |
| 233 | + .align 6 |
| 234 | +.L0: .long 0xddc0152b, 0xba4fc28e, 0x493c7d27 |
| 235 | + .long 0x0715ce53, 0x9e4addf8, 0xba4fc28e |
| 236 | + .long 0xc96cfdc0, 0x0715ce53, 0xddc0152b |
| 237 | + .long 0xab7aff2a, 0x0d3b6092, 0x9e4addf8 |
| 238 | + .long 0x299847d5, 0x878a92a7, 0x39d3b296 |
| 239 | + .long 0xb6dd949b, 0xab7aff2a, 0x0715ce53 |
| 240 | + .long 0xa60ce07b, 0x83348832, 0x47db8317 |
| 241 | + .long 0xd270f1a2, 0xb9e02b86, 0x0d3b6092 |
| 242 | + .long 0x65863b64, 0xb6dd949b, 0xc96cfdc0 |
| 243 | + .long 0xb3e32c28, 0xbac2fd7b, 0x878a92a7 |
| 244 | + .long 0xf285651c, 0xce7f39f4, 0xdaece73e |
| 245 | + .long 0x271d9844, 0xd270f1a2, 0xab7aff2a |
| 246 | + .long 0x6cb08e5c, 0x2b3cac5d, 0x2162d385 |
| 247 | + .long 0xcec3662e, 0x1b03397f, 0x83348832 |
| 248 | + .long 0x8227bb8a, 0xb3e32c28, 0x299847d5 |
| 249 | + .long 0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86 |
| 250 | + .long 0xf6076544, 0x10746f3c, 0x18b33a4e |
| 251 | + .long 0x98d8d9cb, 0x271d9844, 0xb6dd949b |
| 252 | + .long 0x57a3d037, 0x93a5f730, 0x78d9ccb7 |
| 253 | + .long 0x3771e98f, 0x6b749fb2, 0xbac2fd7b |
| 254 | + .long 0xe0ac139e, 0xcec3662e, 0xa60ce07b |
| 255 | + .long 0x6f345e45, 0xe6fc4e6a, 0xce7f39f4 |
| 256 | + .long 0xa2b73df1, 0xb0cd4768, 0x61d82e56 |
| 257 | + .long 0x86d8e4d2, 0xd7a4825c, 0xd270f1a2 |
| 258 | + .long 0xa90fd27a, 0x0167d312, 0xc619809d |
| 259 | + .long 0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d |
| 260 | + .long 0x4597456a, 0x98d8d9cb, 0x65863b64 |
| 261 | + .long 0xc9c8b782, 0x68bce87a, 0x1b03397f |
| 262 | + .long 0x62ec6c6d, 0x6956fc3b, 0xebb883bd |
| 263 | + .long 0x2342001e, 0x3771e98f, 0xb3e32c28 |
| 264 | + .long 0xe8b6368b, 0x2178513a, 0x064f7f26 |
| 265 | + .long 0x9ef68d35, 0x170076fa, 0xdd7e3b0c |
| 266 | + .long 0x0b0bf8ca, 0x6f345e45, 0xf285651c |
| 267 | + .long 0x02ee03b2, 0xff0dba97, 0x10746f3c |
| 268 | + .long 0x135c83fd, 0xf872e54c, 0xc7a68855 |
| 269 | + .long 0x00bcf5f6, 0x86d8e4d2, 0x271d9844 |
| 270 | + .long 0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c |
| 271 | + .long 0xded288f8, 0xb3af077a, 0x93a5f730 |
| 272 | + .long 0x37170390, 0xca6ef3ac, 0x6cb08e5c |
| 273 | + .long 0xf48642e9, 0xdd66cbbb, 0x6b749fb2 |
| 274 | + .long 0xb25b29f2, 0xe9e28eb4, 0x1393e203 |
| 275 | + .long 0x45cddf4e, 0xc9c8b782, 0xcec3662e |
| 276 | + .long 0xdfd94fb2, 0x93e106a4, 0x96c515bb |
| 277 | + .long 0x021ac5ef, 0xd813b325, 0xe6fc4e6a |
| 278 | + .long 0x8e1450f7, 0x2342001e, 0x8227bb8a |
| 279 | + .long 0xe0cdcf86, 0x6d9a4957, 0xb0cd4768 |
| 280 | + .long 0x613eee91, 0xd2c3ed1a, 0x39c7ff35 |
| 281 | + .long 0xbedc6ba1, 0x9ef68d35, 0xd7a4825c |
| 282 | + .long 0x0cd1526a, 0xf2271e60, 0x0ab3844b |
| 283 | + .long 0xd6c3a807, 0x2664fd8b, 0x0167d312 |
| 284 | + .long 0x1d31175f, 0x02ee03b2, 0xf6076544 |
| 285 | + .long 0x4be7fd90, 0x363bd6b3, 0x26f6a60a |
| 286 | + .long 0x6eeed1c9, 0x5fabe670, 0xa741c1bf |
| 287 | + .long 0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb |
| 288 | + .long 0x2e7d11a7, 0x17f27698, 0x49c3cc9c |
| 289 | + .long 0x889774e1, 0xaa7c7ad5, 0x68bce87a |
| 290 | + .long 0x8a074012, 0xded288f8, 0x57a3d037 |
| 291 | + .long 0xbd0bb25f, 0x6d390dec, 0x6956fc3b |
| 292 | + .long 0x3be3c09b, 0x6353c1cc, 0x42d98888 |
| 293 | + .long 0x465a4eee, 0xf48642e9, 0x3771e98f |
| 294 | + .long 0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9 |
| 295 | + .long 0xa52f58ec, 0x9a5ede41, 0x2178513a |
| 296 | + .long 0x47972100, 0x45cddf4e, 0xe0ac139e |
| 297 | + .long 0x359674f7, 0xa51b6135, 0x170076fa |
| 298 | + |
| 299 | +.L1: .long 0xaf449247, 0x81256527, 0xccaa009e |
| 300 | + .long 0x57c54819, 0x1d9513d7, 0x81256527 |
| 301 | + .long 0x3f41287a, 0x57c54819, 0xaf449247 |
| 302 | + .long 0xf5e48c85, 0x910eeec1, 0x1d9513d7 |
| 303 | + .long 0x1f0c2cdd, 0x9026d5b1, 0xae0b5394 |
| 304 | + .long 0x71d54a59, 0xf5e48c85, 0x57c54819 |
| 305 | + .long 0x1c63267b, 0xfe807bbd, 0x0cbec0ed |
| 306 | + .long 0xd31343ea, 0xe95c1271, 0x910eeec1 |
| 307 | + .long 0xf9d9c7ee, 0x71d54a59, 0x3f41287a |
| 308 | + .long 0x9ee62949, 0xcec97417, 0x9026d5b1 |
| 309 | + .long 0xa55d1514, 0xf183c71b, 0xd1df2327 |
| 310 | + .long 0x21aa2b26, 0xd31343ea, 0xf5e48c85 |
| 311 | + .long 0x9d842b80, 0xeea395c4, 0x3c656ced |
| 312 | + .long 0xd8110ff1, 0xcd669a40, 0xfe807bbd |
| 313 | + .long 0x3f9e9356, 0x9ee62949, 0x1f0c2cdd |
| 314 | + .long 0x1d6708a0, 0x0c30f51d, 0xe95c1271 |
| 315 | + .long 0xef82aa68, 0xdb3935ea, 0xb918a347 |
| 316 | + .long 0xd14bcc9b, 0x21aa2b26, 0x71d54a59 |
| 317 | + .long 0x99cce860, 0x356d209f, 0xff6f2fc2 |
| 318 | + .long 0xd8af8e46, 0xc352f6de, 0xcec97417 |
| 319 | + .long 0xf1996890, 0xd8110ff1, 0x1c63267b |
| 320 | + .long 0x631bc508, 0xe95c7216, 0xf183c71b |
| 321 | + .long 0x8511c306, 0x8e031a19, 0x9b9bdbd0 |
| 322 | + .long 0xdb3839f3, 0x1d6708a0, 0xd31343ea |
| 323 | + .long 0x7a92fffb, 0xf7003835, 0x4470ac44 |
| 324 | + .long 0x6ce68f2a, 0x00eba0c8, 0xeea395c4 |
| 325 | + .long 0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee |
| 326 | + .long 0xb46f7cff, 0x9a1b53c8, 0xcd669a40 |
| 327 | + .long 0x60290934, 0x81b6f443, 0x6d40f445 |
| 328 | + .long 0x8e976a7d, 0xd8af8e46, 0x9ee62949 |
| 329 | + .long 0xdcf5088a, 0x9dbdc100, 0x145575d5 |
| 330 | + .long 0x1753ab84, 0xbbf2f6d6, 0x0c30f51d |
| 331 | + .long 0x255b139e, 0x631bc508, 0xa55d1514 |
| 332 | + .long 0xd784eaa8, 0xce26786c, 0xdb3935ea |
| 333 | + .long 0x6d2c864a, 0x8068c345, 0x2586d334 |
| 334 | + .long 0x02072e24, 0xdb3839f3, 0x21aa2b26 |
| 335 | + .long 0x06689b0a, 0x5efd72f5, 0xe0575528 |
| 336 | + .long 0x1e52f5ea, 0x4117915b, 0x356d209f |
| 337 | + .long 0x1d3d1db6, 0x6ce68f2a, 0x9d842b80 |
| 338 | + .long 0x3796455c, 0xb8e0e4a8, 0xc352f6de |
| 339 | + .long 0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c |
| 340 | + .long 0x28ae0976, 0xb46f7cff, 0xd8110ff1 |
| 341 | + .long 0x9764bc8d, 0xd7e7a22c, 0x712510f0 |
| 342 | + .long 0x13a13e18, 0x3e9a43cd, 0xe95c7216 |
| 343 | + .long 0xb8ee242e, 0x8e976a7d, 0x3f9e9356 |
| 344 | + .long 0x0c540e7b, 0x753c81ff, 0x8e031a19 |
| 345 | + .long 0x9924c781, 0xb9220208, 0x3edcde65 |
| 346 | + .long 0x3954de39, 0x1753ab84, 0x1d6708a0 |
| 347 | + .long 0xf32238b5, 0xbec81497, 0x9e70b943 |
| 348 | + .long 0xbbd2cd2c, 0x0925d861, 0xf7003835 |
| 349 | + .long 0xcc401304, 0xd784eaa8, 0xef82aa68 |
| 350 | + .long 0x4987e684, 0x6044fbb0, 0x00eba0c8 |
| 351 | + .long 0x3aa11427, 0x18fe3b4a, 0x87441142 |
| 352 | + .long 0x297aad60, 0x02072e24, 0xd14bcc9b |
| 353 | + .long 0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a |
| 354 | + .long 0x632d78c5, 0x3fc33de4, 0x9a1b53c8 |
| 355 | + .long 0x25b8822a, 0x1e52f5ea, 0x99cce860 |
| 356 | + .long 0xd4fc84bc, 0x1af62fb8, 0x81b6f443 |
| 357 | + .long 0x5690aa32, 0xa91fdefb, 0x688a110e |
| 358 | + .long 0x1357a093, 0x3796455c, 0xd8af8e46 |
| 359 | + .long 0x798fdd33, 0xaaa18a37, 0x357b9517 |
| 360 | + .long 0xc2815395, 0x54d42691, 0x9dbdc100 |
| 361 | + .long 0x21cfc0f7, 0x28ae0976, 0xf1996890 |
| 362 | + .long 0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6 |
0 commit comments