@@ -139,19 +139,25 @@ SYM_FUNC_END(aes_ecb_decrypt_zvkned)
139
139
.endm
140
140
141
141
.macro aes_cbc_decrypt keylen
142
+ srli LEN, LEN, 2 // Convert LEN from bytes to words
142
143
vle32.v v16, (IVP) // Load IV
143
144
1:
144
- vle32.v v17, (INP) // Load ciphertext block
145
- vmv.v.v v18, v17 // Save ciphertext block
146
- aes_decrypt v17, \keylen // Decrypt
147
- vxor.vv v17, v17, v16 // XOR with IV or prev ciphertext block
148
- vse32.v v17, (OUTP) // Store plaintext block
149
- vmv.v.v v16, v18 // Next "IV" is prev ciphertext block
150
- addi INP, INP, 16
151
- addi OUTP, OUTP, 16
152
- addi LEN, LEN, -16
145
+ vsetvli t0, LEN, e32, m4, ta, ma
146
+ vle32.v v20, (INP) // Load ciphertext blocks
147
+ vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks
148
+ addi t1, t0, -4
149
+ vslidedown.vx v24, v20, t1 // Save last ciphertext block
150
+ aes_decrypt v20, \keylen // Decrypt the blocks
151
+ vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks
152
+ vse32.v v20, (OUTP) // Store plaintext blocks
153
+ vmv.v.v v16, v24 // Next "IV" is last ciphertext block
154
+ slli t1, t0, 2 // Words to bytes
155
+ add INP, INP, t1
156
+ add OUTP, OUTP, t1
157
+ sub LEN, LEN, t0
153
158
bnez LEN, 1b
154
159
160
+ vsetivli zero, 4 , e32, m1, ta, ma
155
161
vse32.v v16, (IVP) // Store next IV
156
162
ret
157
163
.endm
@@ -178,3 +184,156 @@ SYM_FUNC_START(aes_cbc_decrypt_zvkned)
178
184
192:
179
185
aes_cbc_decrypt 192
180
186
SYM_FUNC_END(aes_cbc_decrypt_zvkned)
187
+
188
+ .macro aes_cbc_cts_encrypt keylen
189
+
190
+ // CBC-encrypt all blocks except the last. But don't store the
191
+ // second-to-last block to the output buffer yet, since it will be
192
+ // handled specially in the ciphertext stealing step. Exception: if the
193
+ // message is single-block, still encrypt the last (and only) block.
194
+ li t0, 16
195
+ j 2f
196
+ 1:
197
+ vse32.v v16, (OUTP) // Store ciphertext block
198
+ addi OUTP, OUTP, 16
199
+ 2:
200
+ vle32.v v17, (INP) // Load plaintext block
201
+ vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
202
+ aes_encrypt v16, \keylen // Encrypt
203
+ addi INP, INP, 16
204
+ addi LEN, LEN, -16
205
+ bgt LEN, t0, 1b // Repeat if more than one block remains
206
+
207
+ // Special case: if the message is a single block, just do CBC.
208
+ beqz LEN, .Lcts_encrypt_done\@
209
+
210
+ // Encrypt the last two blocks using ciphertext stealing as follows:
211
+ // C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
212
+ // C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
213
+ //
214
+ // C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
215
+ // plaintext block. Block n, the last block, may be partial; its length
216
+ // is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV.
217
+ //
218
+ // v16 already contains Encrypt(P[n-1] ^ C[n-2]).
219
+ // INP points to P[n]. OUTP points to where C[n-1] should go.
220
+ // To support in-place encryption, load P[n] before storing C[n].
221
+ addi t0, OUTP, 16 // Get pointer to where C[n] should go
222
+ vsetvli zero, LEN, e8, m1, tu, ma
223
+ vle8.v v17, (INP) // Load P[n]
224
+ vse8.v v16, (t0) // Store C[n]
225
+ vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
226
+ vsetivli zero, 4 , e32, m1, ta, ma
227
+ aes_encrypt v16, \keylen
228
+ .Lcts_encrypt_done\@:
229
+ vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case)
230
+ ret
231
+ .endm
232
+
233
+ #define LEN32 t4 // Length of remaining full blocks in 32-bit words
234
+ #define LEN_MOD16 t5 // Length of message in bytes mod 16
235
+
236
+ .macro aes_cbc_cts_decrypt keylen
237
+ andi LEN32, LEN, ~15
238
+ srli LEN32, LEN32, 2
239
+ andi LEN_MOD16, LEN, 15
240
+
241
+ // Save C[n-2] in v28 so that it's available later during the ciphertext
242
+ // stealing step. If there are fewer than three blocks, C[n-2] means
243
+ // the IV, otherwise it means the third-to-last ciphertext block.
244
+ vmv.v.v v28, v16 // IV
245
+ add t0, LEN, -33
246
+ bltz t0, .Lcts_decrypt_loop\@
247
+ andi t0, t0, ~15
248
+ add t0, t0, INP
249
+ vle32.v v28, (t0)
250
+
251
+ // CBC-decrypt all full blocks. For the last full block, or the last 2
252
+ // full blocks if the message is block-aligned, this doesn't write the
253
+ // correct output blocks (unless the message is only a single block),
254
+ // because it XORs the wrong values with the raw AES plaintexts. But we
255
+ // fix this after this loop without redoing the AES decryptions. This
256
+ // approach allows more of the AES decryptions to be parallelized.
257
+ .Lcts_decrypt_loop\@:
258
+ vsetvli t0, LEN32, e32, m4, ta, ma
259
+ addi t1, t0, -4
260
+ vle32.v v20, (INP) // Load next set of ciphertext blocks
261
+ vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set
262
+ vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks
263
+ vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set
264
+ aes_decrypt v20, \keylen // Decrypt this set of blocks
265
+ vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks
266
+ vse32.v v24, (OUTP) // Store this set of plaintext blocks
267
+ sub LEN32, LEN32, t0
268
+ slli t0, t0, 2 // Words to bytes
269
+ add INP, INP, t0
270
+ add OUTP, OUTP, t0
271
+ bnez LEN32, .Lcts_decrypt_loop\@
272
+
273
+ vsetivli zero, 4 , e32, m4, ta, ma
274
+ vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block
275
+ addi t0, OUTP, -16 // Get pointer to last full plaintext block
276
+ bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@
277
+
278
+ // Special case: if the message is a single block, just do CBC.
279
+ li t1, 16
280
+ beq LEN, t1, .Lcts_decrypt_done\@
281
+
282
+ // Block-aligned message. Just fix up the last 2 blocks. We need:
283
+ //
284
+ // P[n-1] = Decrypt(C[n]) ^ C[n-2]
285
+ // P[n] = Decrypt(C[n-1]) ^ C[n]
286
+ //
287
+ // We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
288
+ // Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
289
+ // is everything needed to fix the output without re-decrypting blocks.
290
+ addi t1, OUTP, -32 // Get pointer to where P[n-1] should go
291
+ vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1]
292
+ vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2]
293
+ vse32.v v20, (t1) // Store P[n-1]
294
+ vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
295
+ j .Lcts_decrypt_finish\@
296
+
297
+ .Lcts_decrypt_non_block_aligned\@:
298
+ // Decrypt the last two blocks using ciphertext stealing as follows:
299
+ //
300
+ // P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
301
+ // P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
302
+ //
303
+ // We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
304
+ vmv.v.v v16, v20 // v16 = Decrypt(C[n-1])
305
+ vsetvli zero, LEN_MOD16, e8, m1, tu, ma
306
+ vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
307
+ vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n]
308
+ vse8.v v16, (OUTP) // Store P[n]
309
+ vsetivli zero, 4 , e32, m1, ta, ma
310
+ aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
311
+ .Lcts_decrypt_finish\@:
312
+ vxor.vv v20, v20, v28 // XOR with C[n-2]
313
+ vse32.v v20, (t0) // Store last full plaintext block
314
+ .Lcts_decrypt_done\@:
315
+ ret
316
+ .endm
317
+
318
+ .macro aes_cbc_cts_crypt keylen
319
+ vle32.v v16, (IVP) // Load IV
320
+ beqz a5, .Lcts_decrypt\@
321
+ aes_cbc_cts_encrypt \keylen
322
+ .Lcts_decrypt\@:
323
+ aes_cbc_cts_decrypt \keylen
324
+ .endm
325
+
326
+ // void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
327
+ // const u8 *in, u8 *out, size_t len,
328
+ // const u8 iv[16], bool enc);
329
+ //
330
+ // Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
331
+ // This is the variant that unconditionally swaps the last two blocks.
332
+ SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
333
+ aes_begin KEYP, 128f, 192f
334
+ aes_cbc_cts_crypt 256
335
+ 128:
336
+ aes_cbc_cts_crypt 128
337
+ 192:
338
+ aes_cbc_cts_crypt 192
339
+ SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)
0 commit comments