@@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
114
114
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
115
115
116
116
#define _MASK_STORE_C_2nx16 (addr , val0 , val1 ) \
117
- asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k "(mmask)); \
118
- asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k "(mmask)); \
119
- asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k "(mmask)); \
120
- asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k "(mmask))
117
+ asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk "(mmask)); \
118
+ asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk "(mmask)); \
119
+ asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk "(mmask)); \
120
+ asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk "(mmask))
121
121
122
122
#define _REORDER_C_2X (result_0 , result_1 ) { \
123
123
__m512 tmp0, tmp1; \
@@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
154
154
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
155
155
156
156
#define _MASK_STORE_C_16 (addr , val0 ) \
157
- asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k "(mmask)); \
158
- asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k "(mmask));
157
+ asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk "(mmask)); \
158
+ asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk "(mmask));
159
159
160
160
#define N_STORE_4X (A , Bx , By ) { \
161
161
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \
0 commit comments