Skip to content

Commit eeb7a89

Browse files
Merge patch series "riscv: mm: Extend mappable memory up to hint address"
Charlie Jenkins <charlie@rivosinc.com> says: On riscv, mmap currently returns an address from the largest address space that can fit entirely inside of the hint address. This makes it such that the hint address is almost never returned. This patch raises the mappable area up to and including the hint address. This allows mmap to often return the hint address, which allows a performance improvement over searching for a valid address as well as making the behavior more similar to other architectures. Note that a previous patch introduced stronger semantics compared to other architectures for riscv mmap. On riscv, mmap will not use bits in the upper bits of the virtual address depending on the hint address. On other architectures, a random address is returned in the address space requested. On all architectures the hint address will be returned if it is available. This allows riscv applications to configure how many bits in the virtual address should be left empty. This has the two benefits of being able to request address spaces that are smaller than the default and doesn't require the application to know the page table layout of riscv. * b4-shazam-merge: docs: riscv: Define behavior of mmap selftests: riscv: Generalize mm selftests riscv: mm: Use hint address in mmap if available Link: https://lore.kernel.org/r/20240130-use_mmap_hint_address-v3-0-8a655cfa8bcb@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 2b2ca35 + cd6c916 commit eeb7a89

34 files changed

+758
-325
lines changed

Documentation/arch/riscv/vm-layout.rst

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,14 +144,8 @@ passing 0 into the hint address parameter of mmap. On CPUs with an address space
144144
smaller than sv48, the CPU maximum supported address space will be the default.
145145

146146
Software can "opt-in" to receiving VAs from another VA space by providing
147-
a hint address to mmap. A hint address passed to mmap will cause the largest
148-
address space that fits entirely into the hint to be used, unless there is no
149-
space left in the address space. If there is no space available in the requested
150-
address space, an address in the next smallest available address space will be
151-
returned.
152-
153-
For example, in order to obtain 48-bit VA space, a hint address greater than
154-
:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace
155-
ending at :code:`1 << 47` and the addresses beyond this are reserved for the
156-
kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater
157-
than or equal to :code:`1 << 56` must be provided.
147+
a hint address to mmap. When a hint address is passed to mmap, the returned
148+
address will never use more bits than the hint address. For example, if a hint
149+
address of `1 << 40` is passed to mmap, a valid returned address will never use
150+
bits 41 through 63. If no mappable addresses are available in that range, mmap
151+
will return `MAP_FAILED`.

arch/riscv/configs/defconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
4444
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
4545
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
4646
CONFIG_CPUFREQ_DT=y
47+
CONFIG_ACPI_CPPC_CPUFREQ=m
4748
CONFIG_VIRTUALIZATION=y
4849
CONFIG_KVM=m
4950
CONFIG_ACPI=y

arch/riscv/crypto/Kconfig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
menu "Accelerated Cryptographic Algorithms for CPU (riscv)"
44

55
config CRYPTO_AES_RISCV64
6-
tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS"
6+
tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTS"
77
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
88
select CRYPTO_ALGAPI
99
select CRYPTO_LIB_AES
1010
select CRYPTO_SKCIPHER
1111
help
1212
Block cipher: AES cipher algorithms
13-
Length-preserving ciphers: AES with ECB, CBC, CTR, XTS
13+
Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTS
1414

1515
Architecture: riscv64 using:
1616
- Zvkned vector crypto extension

arch/riscv/crypto/aes-riscv64-glue.c

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
// SPDX-License-Identifier: GPL-2.0-only
22
/*
33
* AES using the RISC-V vector crypto extensions. Includes the bare block
4-
* cipher and the ECB, CBC, CTR, and XTS modes.
4+
* cipher and the ECB, CBC, CBC-CTS, CTR, and XTS modes.
55
*
66
* Copyright (C) 2023 VRULL GmbH
77
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
88
*
99
* Copyright (C) 2023 SiFive, Inc.
1010
* Author: Jerry Shih <jerry.shih@sifive.com>
11+
*
12+
* Copyright 2024 Google LLC
1113
*/
1214

1315
#include <asm/simd.h>
@@ -40,6 +42,10 @@ asmlinkage void aes_cbc_decrypt_zvkned(const struct crypto_aes_ctx *key,
4042
const u8 *in, u8 *out, size_t len,
4143
u8 iv[AES_BLOCK_SIZE]);
4244

45+
asmlinkage void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
46+
const u8 *in, u8 *out, size_t len,
47+
const u8 iv[AES_BLOCK_SIZE], bool enc);
48+
4349
asmlinkage void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
4450
const u8 *in, u8 *out, size_t len,
4551
u8 iv[AES_BLOCK_SIZE]);
@@ -164,7 +170,7 @@ static int riscv64_aes_ecb_decrypt(struct skcipher_request *req)
164170

165171
/* AES-CBC */
166172

167-
static inline int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc)
173+
static int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc)
168174
{
169175
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
170176
const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -202,6 +208,70 @@ static int riscv64_aes_cbc_decrypt(struct skcipher_request *req)
202208
return riscv64_aes_cbc_crypt(req, false);
203209
}
204210

211+
/* AES-CBC-CTS */
212+
213+
static int riscv64_aes_cbc_cts_crypt(struct skcipher_request *req, bool enc)
214+
{
215+
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
216+
const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
217+
struct scatterlist sg_src[2], sg_dst[2];
218+
struct skcipher_request subreq;
219+
struct scatterlist *src, *dst;
220+
struct skcipher_walk walk;
221+
unsigned int cbc_len;
222+
int err;
223+
224+
if (req->cryptlen < AES_BLOCK_SIZE)
225+
return -EINVAL;
226+
227+
err = skcipher_walk_virt(&walk, req, false);
228+
if (err)
229+
return err;
230+
/*
231+
* If the full message is available in one step, decrypt it in one call
232+
* to the CBC-CTS assembly function. This reduces overhead, especially
233+
* on short messages. Otherwise, fall back to doing CBC up to the last
234+
* two blocks, then invoke CTS just for the ciphertext stealing.
235+
*/
236+
if (unlikely(walk.nbytes != req->cryptlen)) {
237+
cbc_len = round_down(req->cryptlen - AES_BLOCK_SIZE - 1,
238+
AES_BLOCK_SIZE);
239+
skcipher_walk_abort(&walk);
240+
skcipher_request_set_tfm(&subreq, tfm);
241+
skcipher_request_set_callback(&subreq,
242+
skcipher_request_flags(req),
243+
NULL, NULL);
244+
skcipher_request_set_crypt(&subreq, req->src, req->dst,
245+
cbc_len, req->iv);
246+
err = riscv64_aes_cbc_crypt(&subreq, enc);
247+
if (err)
248+
return err;
249+
dst = src = scatterwalk_ffwd(sg_src, req->src, cbc_len);
250+
if (req->dst != req->src)
251+
dst = scatterwalk_ffwd(sg_dst, req->dst, cbc_len);
252+
skcipher_request_set_crypt(&subreq, src, dst,
253+
req->cryptlen - cbc_len, req->iv);
254+
err = skcipher_walk_virt(&walk, &subreq, false);
255+
if (err)
256+
return err;
257+
}
258+
kernel_vector_begin();
259+
aes_cbc_cts_crypt_zvkned(ctx, walk.src.virt.addr, walk.dst.virt.addr,
260+
walk.nbytes, req->iv, enc);
261+
kernel_vector_end();
262+
return skcipher_walk_done(&walk, 0);
263+
}
264+
265+
static int riscv64_aes_cbc_cts_encrypt(struct skcipher_request *req)
266+
{
267+
return riscv64_aes_cbc_cts_crypt(req, true);
268+
}
269+
270+
static int riscv64_aes_cbc_cts_decrypt(struct skcipher_request *req)
271+
{
272+
return riscv64_aes_cbc_cts_crypt(req, false);
273+
}
274+
205275
/* AES-CTR */
206276

207277
static int riscv64_aes_ctr_crypt(struct skcipher_request *req)
@@ -434,6 +504,22 @@ static struct skcipher_alg riscv64_zvkned_aes_skcipher_algs[] = {
434504
.cra_driver_name = "cbc-aes-riscv64-zvkned",
435505
.cra_module = THIS_MODULE,
436506
},
507+
}, {
508+
.setkey = riscv64_aes_setkey_skcipher,
509+
.encrypt = riscv64_aes_cbc_cts_encrypt,
510+
.decrypt = riscv64_aes_cbc_cts_decrypt,
511+
.min_keysize = AES_MIN_KEY_SIZE,
512+
.max_keysize = AES_MAX_KEY_SIZE,
513+
.ivsize = AES_BLOCK_SIZE,
514+
.walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
515+
.base = {
516+
.cra_blocksize = AES_BLOCK_SIZE,
517+
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
518+
.cra_priority = 300,
519+
.cra_name = "cts(cbc(aes))",
520+
.cra_driver_name = "cts-cbc-aes-riscv64-zvkned",
521+
.cra_module = THIS_MODULE,
522+
},
437523
}
438524
};
439525

@@ -540,11 +626,12 @@ static void __exit riscv64_aes_mod_exit(void)
540626
module_init(riscv64_aes_mod_init);
541627
module_exit(riscv64_aes_mod_exit);
542628

543-
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS (RISC-V accelerated)");
629+
MODULE_DESCRIPTION("AES-ECB/CBC/CTS/CTR/XTS (RISC-V accelerated)");
544630
MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
545631
MODULE_LICENSE("GPL");
546632
MODULE_ALIAS_CRYPTO("aes");
547633
MODULE_ALIAS_CRYPTO("ecb(aes)");
548634
MODULE_ALIAS_CRYPTO("cbc(aes)");
635+
MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
549636
MODULE_ALIAS_CRYPTO("ctr(aes)");
550637
MODULE_ALIAS_CRYPTO("xts(aes)");

arch/riscv/crypto/aes-riscv64-zvkned.S

Lines changed: 168 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,19 +139,25 @@ SYM_FUNC_END(aes_ecb_decrypt_zvkned)
139139
.endm
140140

141141
.macro aes_cbc_decrypt keylen
142+
srli LEN, LEN, 2 // Convert LEN from bytes to words
142143
vle32.v v16, (IVP) // Load IV
143144
1:
144-
vle32.v v17, (INP) // Load ciphertext block
145-
vmv.v.v v18, v17 // Save ciphertext block
146-
aes_decrypt v17, \keylen // Decrypt
147-
vxor.vv v17, v17, v16 // XOR with IV or prev ciphertext block
148-
vse32.v v17, (OUTP) // Store plaintext block
149-
vmv.v.v v16, v18 // Next "IV" is prev ciphertext block
150-
addi INP, INP, 16
151-
addi OUTP, OUTP, 16
152-
addi LEN, LEN, -16
145+
vsetvli t0, LEN, e32, m4, ta, ma
146+
vle32.v v20, (INP) // Load ciphertext blocks
147+
vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks
148+
addi t1, t0, -4
149+
vslidedown.vx v24, v20, t1 // Save last ciphertext block
150+
aes_decrypt v20, \keylen // Decrypt the blocks
151+
vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks
152+
vse32.v v20, (OUTP) // Store plaintext blocks
153+
vmv.v.v v16, v24 // Next "IV" is last ciphertext block
154+
slli t1, t0, 2 // Words to bytes
155+
add INP, INP, t1
156+
add OUTP, OUTP, t1
157+
sub LEN, LEN, t0
153158
bnez LEN, 1b
154159

160+
vsetivli zero, 4, e32, m1, ta, ma
155161
vse32.v v16, (IVP) // Store next IV
156162
ret
157163
.endm
@@ -178,3 +184,156 @@ SYM_FUNC_START(aes_cbc_decrypt_zvkned)
178184
192:
179185
aes_cbc_decrypt 192
180186
SYM_FUNC_END(aes_cbc_decrypt_zvkned)
187+
188+
.macro aes_cbc_cts_encrypt keylen
189+
190+
// CBC-encrypt all blocks except the last. But don't store the
191+
// second-to-last block to the output buffer yet, since it will be
192+
// handled specially in the ciphertext stealing step. Exception: if the
193+
// message is single-block, still encrypt the last (and only) block.
194+
li t0, 16
195+
j 2f
196+
1:
197+
vse32.v v16, (OUTP) // Store ciphertext block
198+
addi OUTP, OUTP, 16
199+
2:
200+
vle32.v v17, (INP) // Load plaintext block
201+
vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
202+
aes_encrypt v16, \keylen // Encrypt
203+
addi INP, INP, 16
204+
addi LEN, LEN, -16
205+
bgt LEN, t0, 1b // Repeat if more than one block remains
206+
207+
// Special case: if the message is a single block, just do CBC.
208+
beqz LEN, .Lcts_encrypt_done\@
209+
210+
// Encrypt the last two blocks using ciphertext stealing as follows:
211+
// C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
212+
// C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
213+
//
214+
// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
215+
// plaintext block. Block n, the last block, may be partial; its length
216+
// is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV.
217+
//
218+
// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
219+
// INP points to P[n]. OUTP points to where C[n-1] should go.
220+
// To support in-place encryption, load P[n] before storing C[n].
221+
addi t0, OUTP, 16 // Get pointer to where C[n] should go
222+
vsetvli zero, LEN, e8, m1, tu, ma
223+
vle8.v v17, (INP) // Load P[n]
224+
vse8.v v16, (t0) // Store C[n]
225+
vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
226+
vsetivli zero, 4, e32, m1, ta, ma
227+
aes_encrypt v16, \keylen
228+
.Lcts_encrypt_done\@:
229+
vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case)
230+
ret
231+
.endm
232+
233+
#define LEN32 t4 // Length of remaining full blocks in 32-bit words
234+
#define LEN_MOD16 t5 // Length of message in bytes mod 16
235+
236+
.macro aes_cbc_cts_decrypt keylen
237+
andi LEN32, LEN, ~15
238+
srli LEN32, LEN32, 2
239+
andi LEN_MOD16, LEN, 15
240+
241+
// Save C[n-2] in v28 so that it's available later during the ciphertext
242+
// stealing step. If there are fewer than three blocks, C[n-2] means
243+
// the IV, otherwise it means the third-to-last ciphertext block.
244+
vmv.v.v v28, v16 // IV
245+
add t0, LEN, -33
246+
bltz t0, .Lcts_decrypt_loop\@
247+
andi t0, t0, ~15
248+
add t0, t0, INP
249+
vle32.v v28, (t0)
250+
251+
// CBC-decrypt all full blocks. For the last full block, or the last 2
252+
// full blocks if the message is block-aligned, this doesn't write the
253+
// correct output blocks (unless the message is only a single block),
254+
// because it XORs the wrong values with the raw AES plaintexts. But we
255+
// fix this after this loop without redoing the AES decryptions. This
256+
// approach allows more of the AES decryptions to be parallelized.
257+
.Lcts_decrypt_loop\@:
258+
vsetvli t0, LEN32, e32, m4, ta, ma
259+
addi t1, t0, -4
260+
vle32.v v20, (INP) // Load next set of ciphertext blocks
261+
vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set
262+
vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks
263+
vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set
264+
aes_decrypt v20, \keylen // Decrypt this set of blocks
265+
vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks
266+
vse32.v v24, (OUTP) // Store this set of plaintext blocks
267+
sub LEN32, LEN32, t0
268+
slli t0, t0, 2 // Words to bytes
269+
add INP, INP, t0
270+
add OUTP, OUTP, t0
271+
bnez LEN32, .Lcts_decrypt_loop\@
272+
273+
vsetivli zero, 4, e32, m4, ta, ma
274+
vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block
275+
addi t0, OUTP, -16 // Get pointer to last full plaintext block
276+
bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@
277+
278+
// Special case: if the message is a single block, just do CBC.
279+
li t1, 16
280+
beq LEN, t1, .Lcts_decrypt_done\@
281+
282+
// Block-aligned message. Just fix up the last 2 blocks. We need:
283+
//
284+
// P[n-1] = Decrypt(C[n]) ^ C[n-2]
285+
// P[n] = Decrypt(C[n-1]) ^ C[n]
286+
//
287+
// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
288+
// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
289+
// is everything needed to fix the output without re-decrypting blocks.
290+
addi t1, OUTP, -32 // Get pointer to where P[n-1] should go
291+
vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1]
292+
vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2]
293+
vse32.v v20, (t1) // Store P[n-1]
294+
vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
295+
j .Lcts_decrypt_finish\@
296+
297+
.Lcts_decrypt_non_block_aligned\@:
298+
// Decrypt the last two blocks using ciphertext stealing as follows:
299+
//
300+
// P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
301+
// P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
302+
//
303+
// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
304+
vmv.v.v v16, v20 // v16 = Decrypt(C[n-1])
305+
vsetvli zero, LEN_MOD16, e8, m1, tu, ma
306+
vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
307+
vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n]
308+
vse8.v v16, (OUTP) // Store P[n]
309+
vsetivli zero, 4, e32, m1, ta, ma
310+
aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
311+
.Lcts_decrypt_finish\@:
312+
vxor.vv v20, v20, v28 // XOR with C[n-2]
313+
vse32.v v20, (t0) // Store last full plaintext block
314+
.Lcts_decrypt_done\@:
315+
ret
316+
.endm
317+
318+
.macro aes_cbc_cts_crypt keylen
319+
vle32.v v16, (IVP) // Load IV
320+
beqz a5, .Lcts_decrypt\@
321+
aes_cbc_cts_encrypt \keylen
322+
.Lcts_decrypt\@:
323+
aes_cbc_cts_decrypt \keylen
324+
.endm
325+
326+
// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
327+
// const u8 *in, u8 *out, size_t len,
328+
// const u8 iv[16], bool enc);
329+
//
330+
// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
331+
// This is the variant that unconditionally swaps the last two blocks.
332+
SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
333+
aes_begin KEYP, 128f, 192f
334+
aes_cbc_cts_crypt 256
335+
128:
336+
aes_cbc_cts_crypt 128
337+
192:
338+
aes_cbc_cts_crypt 192
339+
SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)

0 commit comments

Comments
 (0)