|
5 | 5 | const int64_t *restrict Bi = B->i;
|
6 | 6 | const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x;
|
7 | 7 | const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x;
|
8 |
| - size_t vl = __riscv_vsetvl_e64m8(m); |
| 8 | + size_t vl = VSETVL(m); |
9 | 9 | GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x;
|
10 | 10 |
|
11 | 11 | #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1)
|
|
22 | 22 | const int64_t pB_end = Bp[jB + 1];
|
23 | 23 | for (int64_t i = 0; i < m && (m - i) >= vl; i += vl)
|
24 | 24 | {
|
25 |
| - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, vl); |
26 |
| - |
| 25 | + VECTORTYPE vc = VLE(Cxj + i, vl); |
27 | 26 | for (int64_t pB = pB_start; pB < pB_end; pB++)
|
28 | 27 | {
|
29 | 28 | const int64_t k = Bi[pB];
|
30 | 29 | const GB_B_TYPE bkj = Bx[pB];
|
31 |
| - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, vl); |
32 |
| - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); |
| 30 | + VECTORTYPE va = VLE(Ax + i + k * m, vl); |
| 31 | + vc = VFMACC(vc, bkj, va, vl); |
33 | 32 | }
|
34 | 33 |
|
35 |
| - __riscv_vse64_v_f64m8(Cxj + i, vc, vl); |
| 34 | + VSE(Cxj + i, vc, vl); |
36 | 35 | }
|
37 | 36 | int64_t remaining = m % vl;
|
38 | 37 | if (remaining > 0)
|
39 | 38 | {
|
40 | 39 | int64_t i = m - remaining;
|
41 |
| - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, remaining); |
42 |
| - |
| 40 | + VECTORTYPE vc = VLE(Cxj + i, remaining); |
43 | 41 | for (int64_t pB = pB_start; pB < pB_end; pB++)
|
44 | 42 | {
|
45 | 43 | const int64_t k = Bi[pB];
|
46 | 44 | const GB_B_TYPE bkj = Bx[pB];
|
47 |
| - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, remaining); |
48 |
| - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); |
| 45 | + VECTORTYPE va = VLE(Ax + i + k * m, remaining); |
| 46 | + vc = VFMACC(vc, bkj, va, remaining); |
49 | 47 | }
|
50 | 48 |
|
51 |
| - __riscv_vse64_v_f64m8(Cxj + i, vc, remaining); |
| 49 | + VSE(Cxj + i, vc, remaining); |
52 | 50 | }
|
53 | 51 | }
|
54 | 52 | }
|
|
0 commit comments