@@ -27,6 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
27
#include "common.h"
28
28
#ifndef HAVE_ASM_KERNEL
29
29
#include <altivec.h>
30
+
31
+ #define offset_0 0
32
+ #define offset_1 16
33
+ #define offset_2 32
34
+ #define offset_3 48
35
+ #define offset_4 64
36
+ #define offset_5 80
37
+ #define offset_6 96
38
+ #define offset_7 112
39
+
30
40
static const unsigned char __attribute__((aligned (16 ))) swap_mask_arr []= { 4 ,5 ,6 ,7 ,0 ,1 ,2 ,3 , 12 ,13 ,14 ,15 , 8 ,9 ,10 ,11 };
31
41
32
42
static void caxpy_kernel_16 (BLASLONG n , FLOAT * x , FLOAT * y , FLOAT alpha_r , FLOAT alpha_i )
@@ -43,27 +53,28 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT
43
53
#endif
44
54
45
55
__vector unsigned char swap_mask = * ((__vector unsigned char * )swap_mask_arr );
46
- register __vector float * vy = (__vector float * ) y ;
47
- register __vector float * vx = (__vector float * ) x ;
56
+ register __vector float * vptr_y = (__vector float * ) y ;
57
+ register __vector float * vptr_x = (__vector float * ) x ;
48
58
BLASLONG i = 0 ;
49
- for (; i < n /2 ; i += 8 ) {
59
+ for (;i < n /2 ;i += 8 ){
60
+
61
+ register __vector float vy_0 = vec_vsx_ld ( offset_0 ,vptr_y ) ;
62
+ register __vector float vy_1 = vec_vsx_ld ( offset_1 ,vptr_y ) ;
63
+ register __vector float vy_2 = vec_vsx_ld ( offset_2 ,vptr_y ) ;
64
+ register __vector float vy_3 = vec_vsx_ld ( offset_3 ,vptr_y ) ;
65
+ register __vector float vy_4 = vec_vsx_ld ( offset_4 ,vptr_y ) ;
66
+ register __vector float vy_5 = vec_vsx_ld ( offset_5 ,vptr_y ) ;
67
+ register __vector float vy_6 = vec_vsx_ld ( offset_6 ,vptr_y ) ;
68
+ register __vector float vy_7 = vec_vsx_ld ( offset_7 ,vptr_y ) ;
50
69
51
- register __vector float vy_0 = vy [i ];
52
- register __vector float vy_1 = vy [i + 1 ];
53
- register __vector float vy_2 = vy [i + 2 ];
54
- register __vector float vy_3 = vy [i + 3 ];
55
- register __vector float vy_4 = vy [i + 4 ];
56
- register __vector float vy_5 = vy [i + 5 ];
57
- register __vector float vy_6 = vy [i + 6 ];
58
- register __vector float vy_7 = vy [i + 7 ];
59
- register __vector float vx_0 = vx [i ];
60
- register __vector float vx_1 = vx [i + 1 ];
61
- register __vector float vx_2 = vx [i + 2 ];
62
- register __vector float vx_3 = vx [i + 3 ];
63
- register __vector float vx_4 = vx [i + 4 ];
64
- register __vector float vx_5 = vx [i + 5 ];
65
- register __vector float vx_6 = vx [i + 6 ];
66
- register __vector float vx_7 = vx [i + 7 ];
70
+ register __vector float vx_0 = vec_vsx_ld ( offset_0 ,vptr_x ) ;
71
+ register __vector float vx_1 = vec_vsx_ld ( offset_1 ,vptr_x ) ;
72
+ register __vector float vx_2 = vec_vsx_ld ( offset_2 ,vptr_x ) ;
73
+ register __vector float vx_3 = vec_vsx_ld ( offset_3 ,vptr_x ) ;
74
+ register __vector float vx_4 = vec_vsx_ld ( offset_4 ,vptr_x ) ;
75
+ register __vector float vx_5 = vec_vsx_ld ( offset_5 ,vptr_x ) ;
76
+ register __vector float vx_6 = vec_vsx_ld ( offset_6 ,vptr_x ) ;
77
+ register __vector float vx_7 = vec_vsx_ld ( offset_7 ,vptr_x ) ;
67
78
vy_0 += vx_0 * valpha_r ;
68
79
vy_1 += vx_1 * valpha_r ;
69
80
vy_2 += vx_2 * valpha_r ;
@@ -88,15 +99,17 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT
88
99
vy_5 += vx_5 * valpha_i ;
89
100
vy_6 += vx_6 * valpha_i ;
90
101
vy_7 += vx_7 * valpha_i ;
91
- vy [ i ] = vy_0 ;
92
- vy [ i + 1 ] = vy_1 ;
93
- vy [ i + 2 ] = vy_2 ;
94
- vy [ i + 3 ] = vy_3 ;
95
- vy [ i + 4 ] = vy_4 ;
96
- vy [ i + 5 ] = vy_5 ;
97
- vy [ i + 6 ] = vy_6 ;
98
- vy [ i + 7 ] = vy_7 ;
102
+ vec_vsx_st ( vy_0 , offset_0 , vptr_y ) ;
103
+ vec_vsx_st ( vy_1 , offset_1 , vptr_y ) ;
104
+ vec_vsx_st ( vy_2 , offset_2 , vptr_y ) ;
105
+ vec_vsx_st ( vy_3 , offset_3 , vptr_y ) ;
106
+ vec_vsx_st ( vy_4 , offset_4 , vptr_y ) ;
107
+ vec_vsx_st ( vy_5 , offset_5 , vptr_y ) ;
108
+ vec_vsx_st ( vy_6 , offset_6 , vptr_y ) ;
109
+ vec_vsx_st ( vy_7 , offset_7 , vptr_y ) ;
99
110
111
+ vptr_x += 8 ;
112
+ vptr_y += 8 ;
100
113
}
101
114
}
102
115
#endif
0 commit comments