@@ -20,9 +20,13 @@ $(HEADERNAV_TOC)
20
20
21
21
$(IMPLEMENTATION_DEFINED Which vector types are supported depends
22
22
on the target. The implementation is expected to only support
23
- the vector types that are implemented in the target's hardware.
23
+ the vector types and operations that are implemented in the target's hardware.
24
24
)
25
25
26
+ $(RATIONALE Emulating unsupported vector types and operations can exhibit
27
+ such poor performance that the user is likely better off selecting a different
28
+ algorithm than relying on emulation.)
29
+
26
30
$(BEST_PRACTICE Use the declarations in $(CORE_SIMD) instead of
27
31
the language $(GLINK2 type, Vector) grammar.
28
32
)
@@ -37,18 +41,17 @@ import core.simd;
37
41
---
38
42
39
43
$(IMPLEMENTATION_DEFINED
40
-
41
- $(P These types and operations will be the ones defined for the architecture
44
+ These types and operations will be the ones defined for the architecture
42
45
the compiler is targeting. If a particular CPU family has varying
43
46
support for vector types, an additional runtime check may be necessary.
44
47
The compiler does not emit runtime checks; those must be done by the
45
48
programmer.
46
49
)
47
50
48
- $(P Depending on the architecture, compiler flags may be required to
51
+ $(IMPLEMENTATION_DEFINED Depending on the target architecture, compiler flags
52
+ may be required to
49
53
activate support for SIMD types.
50
54
)
51
- )
52
55
53
56
$(P The types defined will all follow the naming convention:)
54
57
@@ -92,7 +95,8 @@ $(H3 $(LNAME2 properties, Properties))
92
95
93
96
$(H3 $(LNAME2 conversions, Conversions))
94
97
95
- $(P Vector types of the same size can be implicitly converted among
98
+ $(P Vector types of the same size (number_of_elements * size_of_element)
99
+ can be implicitly converted among
96
100
each other, this is done as a reinterpret cast (a type paint).
97
101
Vector types can be cast to their $(GLINK2 type, VectorBaseType).)
98
102
142
146
---
143
147
float4 a,b;
144
148
static if (__traits(compiles, a+b))
145
- ... yes, it is supported ...
149
+ ... yes, add is supported for float4 ...
146
150
else
147
151
... nope, use workaround ...
148
152
---
152
156
$(LINK2 $(ROOT_DIR)phobos/core_cpuid.html, core.cpuid).
153
157
)
154
158
155
- $(P A typical workaround would be to use array vector operations instead:)
159
+ $(P A typical workaround for unsupported vector operations would be to
160
+ use array operations instead:)
156
161
157
162
---
158
163
float4 a,b;
@@ -201,7 +206,7 @@ $(H2 $(LNAME2 x86_64_vec, X86 And X86$(UNDERSCORE)64 Vector Extension Implementa
201
206
$(TROW double4, 4 $(D double)s, $(D double __attribute__((vector_size(32)))))
202
207
)
203
208
204
- $(P Note: for 32 bit gcc, it's $(D long long) instead of $(D long).)
209
+ $(NOTE for 32 bit gcc and clang , it's $(D long long) instead of $(D long).)
205
210
206
211
$(TABLE2 Supported 128-bit Vector Operators,
207
212
$(THEAD Operator,void16,byte16,ubyte16,short8,ushort8,int4,uint4,long2,ulong2,float4,double2)
@@ -220,6 +225,12 @@ $(H2 $(LNAME2 x86_64_vec, X86 And X86$(UNDERSCORE)64 Vector Extension Implementa
220
225
$(TROW $(CODE_AMP)=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
221
226
$(TROW |=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
222
227
$(TROW $(D ^=),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
228
+ $(TROW ==,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
229
+ $(TROW !=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
230
+ $(TROW <, $(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
231
+ $(TROW <=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
232
+ $(TROW >=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
233
+ $(TROW >, $(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
223
234
$(TROW $(I unary)$(D ~),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
224
235
$(TROW $(I unary)+,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
225
236
$(TROW $(I unary)-,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
@@ -242,6 +253,12 @@ $(H2 $(LNAME2 x86_64_vec, X86 And X86$(UNDERSCORE)64 Vector Extension Implementa
242
253
$(TROW $(CODE_AMP)=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
243
254
$(TROW |=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
244
255
$(TROW $(D ^=),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
256
+ $(TROW ==,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
257
+ $(TROW !=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
258
+ $(TROW <, $(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
259
+ $(TROW <=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
260
+ $(TROW >=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
261
+ $(TROW >, $(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
245
262
$(TROW $(I unary)$(D ~),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
246
263
$(TROW $(I unary)+,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
247
264
$(TROW $(I unary)-,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
0 commit comments