@@ -324,23 +324,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
324
324
; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
325
325
; RV32-NEXT: addi s0, sp, 384
326
326
; RV32-NEXT: andi sp, sp, -128
327
- ; RV32-NEXT: li a2, 128
328
- ; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
327
+ ; RV32-NEXT: zext.b a1, a1
328
+ ; RV32-NEXT: mv a2, sp
329
+ ; RV32-NEXT: li a3, 128
330
+ ; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
329
331
; RV32-NEXT: vle8.v v8, (a0)
330
332
; RV32-NEXT: addi a0, a0, 128
331
333
; RV32-NEXT: vle8.v v16, (a0)
334
+ ; RV32-NEXT: add a1, a2, a1
332
335
; RV32-NEXT: vmseq.vi v0, v8, 0
333
- ; RV32-NEXT: vmv.v.i v8, 0
334
- ; RV32-NEXT: vmerge.vim v24, v8, 1, v0
335
- ; RV32-NEXT: vmseq.vi v0, v16, 0
336
- ; RV32-NEXT: zext.b a0, a1
337
- ; RV32-NEXT: mv a1, sp
338
- ; RV32-NEXT: add a0, a1, a0
339
- ; RV32-NEXT: vse8.v v24, (a1)
340
- ; RV32-NEXT: vmerge.vim v8, v8, 1, v0
341
- ; RV32-NEXT: addi a1, sp, 128
342
- ; RV32-NEXT: vse8.v v8, (a1)
343
- ; RV32-NEXT: lbu a0, 0(a0)
336
+ ; RV32-NEXT: vmv.v.i v24, 0
337
+ ; RV32-NEXT: vmseq.vi v8, v16, 0
338
+ ; RV32-NEXT: vmerge.vim v16, v24, 1, v0
339
+ ; RV32-NEXT: vse8.v v16, (a2)
340
+ ; RV32-NEXT: vmv1r.v v0, v8
341
+ ; RV32-NEXT: vmerge.vim v8, v24, 1, v0
342
+ ; RV32-NEXT: addi a0, sp, 128
343
+ ; RV32-NEXT: vse8.v v8, (a0)
344
+ ; RV32-NEXT: lbu a0, 0(a1)
344
345
; RV32-NEXT: addi sp, s0, -384
345
346
; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
346
347
; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -354,23 +355,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
354
355
; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
355
356
; RV64-NEXT: addi s0, sp, 384
356
357
; RV64-NEXT: andi sp, sp, -128
357
- ; RV64-NEXT: li a2, 128
358
- ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
358
+ ; RV64-NEXT: zext.b a1, a1
359
+ ; RV64-NEXT: mv a2, sp
360
+ ; RV64-NEXT: li a3, 128
361
+ ; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
359
362
; RV64-NEXT: vle8.v v8, (a0)
360
363
; RV64-NEXT: addi a0, a0, 128
361
364
; RV64-NEXT: vle8.v v16, (a0)
365
+ ; RV64-NEXT: add a1, a2, a1
362
366
; RV64-NEXT: vmseq.vi v0, v8, 0
363
- ; RV64-NEXT: vmv.v.i v8, 0
364
- ; RV64-NEXT: vmerge.vim v24, v8, 1, v0
365
- ; RV64-NEXT: vmseq.vi v0, v16, 0
366
- ; RV64-NEXT: zext.b a0, a1
367
- ; RV64-NEXT: mv a1, sp
368
- ; RV64-NEXT: add a0, a1, a0
369
- ; RV64-NEXT: vse8.v v24, (a1)
370
- ; RV64-NEXT: vmerge.vim v8, v8, 1, v0
371
- ; RV64-NEXT: addi a1, sp, 128
372
- ; RV64-NEXT: vse8.v v8, (a1)
373
- ; RV64-NEXT: lbu a0, 0(a0)
367
+ ; RV64-NEXT: vmv.v.i v24, 0
368
+ ; RV64-NEXT: vmseq.vi v8, v16, 0
369
+ ; RV64-NEXT: vmerge.vim v16, v24, 1, v0
370
+ ; RV64-NEXT: vse8.v v16, (a2)
371
+ ; RV64-NEXT: vmv1r.v v0, v8
372
+ ; RV64-NEXT: vmerge.vim v8, v24, 1, v0
373
+ ; RV64-NEXT: addi a0, sp, 128
374
+ ; RV64-NEXT: vse8.v v8, (a0)
375
+ ; RV64-NEXT: lbu a0, 0(a1)
374
376
; RV64-NEXT: addi sp, s0, -384
375
377
; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
376
378
; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
@@ -384,23 +386,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
384
386
; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
385
387
; RV32ZBS-NEXT: addi s0, sp, 384
386
388
; RV32ZBS-NEXT: andi sp, sp, -128
387
- ; RV32ZBS-NEXT: li a2, 128
388
- ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
389
+ ; RV32ZBS-NEXT: zext.b a1, a1
390
+ ; RV32ZBS-NEXT: mv a2, sp
391
+ ; RV32ZBS-NEXT: li a3, 128
392
+ ; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
389
393
; RV32ZBS-NEXT: vle8.v v8, (a0)
390
394
; RV32ZBS-NEXT: addi a0, a0, 128
391
395
; RV32ZBS-NEXT: vle8.v v16, (a0)
396
+ ; RV32ZBS-NEXT: add a1, a2, a1
392
397
; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
393
- ; RV32ZBS-NEXT: vmv.v.i v8, 0
394
- ; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
395
- ; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
396
- ; RV32ZBS-NEXT: zext.b a0, a1
397
- ; RV32ZBS-NEXT: mv a1, sp
398
- ; RV32ZBS-NEXT: add a0, a1, a0
399
- ; RV32ZBS-NEXT: vse8.v v24, (a1)
400
- ; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
401
- ; RV32ZBS-NEXT: addi a1, sp, 128
402
- ; RV32ZBS-NEXT: vse8.v v8, (a1)
403
- ; RV32ZBS-NEXT: lbu a0, 0(a0)
398
+ ; RV32ZBS-NEXT: vmv.v.i v24, 0
399
+ ; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
400
+ ; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
401
+ ; RV32ZBS-NEXT: vse8.v v16, (a2)
402
+ ; RV32ZBS-NEXT: vmv1r.v v0, v8
403
+ ; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
404
+ ; RV32ZBS-NEXT: addi a0, sp, 128
405
+ ; RV32ZBS-NEXT: vse8.v v8, (a0)
406
+ ; RV32ZBS-NEXT: lbu a0, 0(a1)
404
407
; RV32ZBS-NEXT: addi sp, s0, -384
405
408
; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
406
409
; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -414,23 +417,24 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
414
417
; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
415
418
; RV64ZBS-NEXT: addi s0, sp, 384
416
419
; RV64ZBS-NEXT: andi sp, sp, -128
417
- ; RV64ZBS-NEXT: li a2, 128
418
- ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
420
+ ; RV64ZBS-NEXT: zext.b a1, a1
421
+ ; RV64ZBS-NEXT: mv a2, sp
422
+ ; RV64ZBS-NEXT: li a3, 128
423
+ ; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
419
424
; RV64ZBS-NEXT: vle8.v v8, (a0)
420
425
; RV64ZBS-NEXT: addi a0, a0, 128
421
426
; RV64ZBS-NEXT: vle8.v v16, (a0)
427
+ ; RV64ZBS-NEXT: add a1, a2, a1
422
428
; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
423
- ; RV64ZBS-NEXT: vmv.v.i v8, 0
424
- ; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
425
- ; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
426
- ; RV64ZBS-NEXT: zext.b a0, a1
427
- ; RV64ZBS-NEXT: mv a1, sp
428
- ; RV64ZBS-NEXT: add a0, a1, a0
429
- ; RV64ZBS-NEXT: vse8.v v24, (a1)
430
- ; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
431
- ; RV64ZBS-NEXT: addi a1, sp, 128
432
- ; RV64ZBS-NEXT: vse8.v v8, (a1)
433
- ; RV64ZBS-NEXT: lbu a0, 0(a0)
429
+ ; RV64ZBS-NEXT: vmv.v.i v24, 0
430
+ ; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
431
+ ; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
432
+ ; RV64ZBS-NEXT: vse8.v v16, (a2)
433
+ ; RV64ZBS-NEXT: vmv1r.v v0, v8
434
+ ; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
435
+ ; RV64ZBS-NEXT: addi a0, sp, 128
436
+ ; RV64ZBS-NEXT: vse8.v v8, (a0)
437
+ ; RV64ZBS-NEXT: lbu a0, 0(a1)
434
438
; RV64ZBS-NEXT: addi sp, s0, -384
435
439
; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
436
440
; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
0 commit comments