Skip to content
This repository was archived by the owner on Oct 12, 2022. It is now read-only.

Commit 6cf1190

Browse files
ibuclawwilzbach
authored andcommitted
Add overrides for __simd intrinsics
1 parent e19771e commit 6cf1190

File tree

1 file changed

+118
-11
lines changed

1 file changed

+118
-11
lines changed

src/core/simd.d

Lines changed: 118 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -384,14 +384,58 @@ version ( D_SIMD )
384384
* Returns:
385385
* result of opcode
386386
*/
387-
pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2);
387+
pure @safe V1 simd(XMM opcode, V1, V2)(V1 op1, V2 op2)
388+
if (is(V1 == __vector) && is(V2 == __vector))
389+
{
390+
pragma(inline, true);
391+
return cast(V1)__simd(opcode, op1, op2);
392+
}
393+
394+
pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2); // intrinsic
395+
396+
///
397+
unittest
398+
{
399+
float4 a;
400+
a = simd!(XMM.PXOR)(a, a);
401+
}
388402

389403
/**
390404
* Unary SIMD instructions.
391405
*/
392-
pure @safe void16 __simd(XMM opcode, void16 op1);
393-
pure @safe void16 __simd(XMM opcode, double d); ///
394-
pure @safe void16 __simd(XMM opcode, float f); ///
406+
pure @safe V1 simd(XMM opcode, V1)(V1 op1)
407+
if (is(V1 == __vector))
408+
{
409+
pragma(inline, true);
410+
return cast(V1)__simd(opcode, op1);
411+
}
412+
413+
///
414+
pure @safe V1 simd(XMM opcode, V1)(double d)
415+
if (is(V1 == __vector))
416+
{
417+
pragma(inline, true);
418+
return cast(V1)__simd(opcode, d);
419+
}
420+
421+
///
422+
pure @safe V1 simd(XMM opcode, V1)(float f)
423+
if (is(V1 == __vector))
424+
{
425+
pragma(inline, true);
426+
return cast(V1)__simd(opcode, f);
427+
}
428+
429+
pure @safe void16 __simd(XMM opcode, void16 op1); // intrinsic
430+
pure @safe void16 __simd(XMM opcode, double d); // intrinsic
431+
pure @safe void16 __simd(XMM opcode, float f); // intrinsic
432+
433+
///
434+
unittest
435+
{
436+
float4 a;
437+
a = simd!(XMM.LODSS)(a);
438+
}
395439

396440
/****
397441
* For instructions:
@@ -408,7 +452,21 @@ version ( D_SIMD )
408452
* Returns:
409453
* result of opcode
410454
*/
411-
pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8);
455+
pure @safe V1 simd(XMM opcode, ubyte imm8, V1, V2)(V1 op1, V2 op2)
456+
if (is(V1 == __vector) && is(V2 == __vector))
457+
{
458+
pragma(inline, true);
459+
return cast(V1)__simd(opcode, op1, op2, imm8);
460+
}
461+
462+
pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8); // intrinsic
463+
464+
///
465+
unittest
466+
{
467+
float4 a;
468+
a = simd!(XMM.CMPPD, 0x7A)(a, a);
469+
}
412470

413471
/***
414472
* For instructions with the imm8 version:
@@ -421,7 +479,21 @@ version ( D_SIMD )
421479
* Returns:
422480
* result of opcode
423481
*/
424-
pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8);
482+
pure @safe V1 simd(XMM opcode, ubyte imm8, V1)(V1 op1)
483+
if (is(V1 == __vector))
484+
{
485+
pragma(inline, true);
486+
return cast(V1)__simd_ib(opcode, op1, imm8);
487+
}
488+
489+
pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8); // intrinsic
490+
491+
///
492+
unittest
493+
{
494+
float4 a;
495+
a = simd!(XMM.PSRLQ, 0x7A)(a);
496+
}
425497

426498
/*****
427499
* For "store" operations of the form:
@@ -430,22 +502,57 @@ version ( D_SIMD )
430502
* op2
431503
* These cannot be marked as pure, as semantic() doesn't check them.
432504
*/
433-
@safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2);
434-
@safe void16 __simd_sto(XMM opcode, double op1, void16 op2); ///
435-
@safe void16 __simd_sto(XMM opcode, float op1, void16 op2); ///
505+
@safe V1 simd_sto(XMM opcode, V1, V2)(V1 op1, V2 op2)
506+
if (is(V1 == __vector) && is(V2 == __vector))
507+
{
508+
pragma(inline, true);
509+
return cast(V1)__simd_sto(opcode, op1, op2);
510+
}
511+
512+
///
513+
@safe V1 simd_stod(XMM opcode, V1, V2)(double op1, V1 op2)
514+
if (is(V1 == __vector))
515+
{
516+
pragma(inline, true);
517+
return cast(V1)__simd_sto(opcode, op1, op2);
518+
}
519+
520+
///
521+
@safe V1 simd_stof(XMM opcode, V1)(float op1, V1 op2)
522+
if (is(V1 == __vector))
523+
{
524+
pragma(inline, true);
525+
return cast(V1)__simd_sto(opcode, op1, op2);
526+
}
527+
528+
@safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2); // intrinsic
529+
@safe void16 __simd_sto(XMM opcode, double op1, void16 op2); // intrinsic
530+
@safe void16 __simd_sto(XMM opcode, float op1, void16 op2); // intrinsic
531+
532+
///
533+
unittest
534+
{
535+
void16 a;
536+
float f = 1;
537+
double d = 1;
538+
539+
cast(void)simd_sto!(XMM.STOUPS)(a, a);
540+
//simd_sto!(XMM.STOUPS)(f, a);
541+
//simd_sto!(XMM.STOUPS)(d, a);
542+
}
436543

437544
/* The following use overloading to ensure correct typing.
438545
* Compile with inlining on for best performance.
439546
*/
440547

441548
pure @safe short8 pcmpeq()(short8 v1, short8 v2)
442549
{
443-
return __simd(XMM.PCMPEQW, v1, v2);
550+
return cast(short8)__simd(XMM.PCMPEQW, v1, v2);
444551
}
445552

446553
pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2)
447554
{
448-
return __simd(XMM.PCMPEQW, v1, v2);
555+
return cast(ushort8)__simd(XMM.PCMPEQW, v1, v2);
449556
}
450557

451558
/*********************

0 commit comments

Comments
 (0)