@@ -384,14 +384,58 @@ version ( D_SIMD )
384
384
* Returns:
385
385
* result of opcode
386
386
*/
387
- pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2);
387
+ pure @safe V1 simd(XMM opcode, V1 , V2 )(V1 op1, V2 op2)
388
+ if (is (V1 == __vector ) && is (V2 == __vector ))
389
+ {
390
+ pragma (inline, true );
391
+ return cast (V1 )__simd(opcode, op1, op2);
392
+ }
393
+
394
+ pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2); // intrinsic
395
+
396
+ // /
397
+ unittest
398
+ {
399
+ float4 a;
400
+ a = simd! (XMM .PXOR )(a, a);
401
+ }
388
402
389
403
/**
390
404
* Unary SIMD instructions.
391
405
*/
392
- pure @safe void16 __simd(XMM opcode, void16 op1);
393
- pure @safe void16 __simd(XMM opcode, double d); // /
394
- pure @safe void16 __simd(XMM opcode, float f); // /
406
+ pure @safe V1 simd(XMM opcode, V1 )(V1 op1)
407
+ if (is (V1 == __vector ))
408
+ {
409
+ pragma (inline, true );
410
+ return cast (V1 )__simd(opcode, op1);
411
+ }
412
+
413
+ // /
414
+ pure @safe V1 simd(XMM opcode, V1 )(double d)
415
+ if (is (V1 == __vector ))
416
+ {
417
+ pragma (inline, true );
418
+ return cast (V1 )__simd(opcode, d);
419
+ }
420
+
421
+ // /
422
+ pure @safe V1 simd(XMM opcode, V1 )(float f)
423
+ if (is (V1 == __vector ))
424
+ {
425
+ pragma (inline, true );
426
+ return cast (V1 )__simd(opcode, f);
427
+ }
428
+
429
+ pure @safe void16 __simd(XMM opcode, void16 op1); // intrinsic
430
+ pure @safe void16 __simd(XMM opcode, double d); // intrinsic
431
+ pure @safe void16 __simd(XMM opcode, float f); // intrinsic
432
+
433
+ // /
434
+ unittest
435
+ {
436
+ float4 a;
437
+ a = simd! (XMM .LODSS )(a);
438
+ }
395
439
396
440
/* ***
397
441
* For instructions:
@@ -408,7 +452,21 @@ version ( D_SIMD )
408
452
* Returns:
409
453
* result of opcode
410
454
*/
411
- pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8);
455
+ pure @safe V1 simd(XMM opcode, ubyte imm8, V1 , V2 )(V1 op1, V2 op2)
456
+ if (is (V1 == __vector ) && is (V2 == __vector ))
457
+ {
458
+ pragma (inline, true );
459
+ return cast (V1 )__simd(opcode, op1, op2, imm8);
460
+ }
461
+
462
+ pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8); // intrinsic
463
+
464
+ // /
465
+ unittest
466
+ {
467
+ float4 a;
468
+ a = simd! (XMM .CMPPD , 0x7A )(a, a);
469
+ }
412
470
413
471
/* **
414
472
* For instructions with the imm8 version:
@@ -421,7 +479,21 @@ version ( D_SIMD )
421
479
* Returns:
422
480
* result of opcode
423
481
*/
424
- pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8);
482
+ pure @safe V1 simd(XMM opcode, ubyte imm8, V1 )(V1 op1)
483
+ if (is (V1 == __vector ))
484
+ {
485
+ pragma (inline, true );
486
+ return cast (V1 )__simd_ib(opcode, op1, imm8);
487
+ }
488
+
489
+ pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8); // intrinsic
490
+
491
+ // /
492
+ unittest
493
+ {
494
+ float4 a;
495
+ a = simd! (XMM .PSRLQ , 0x7A )(a);
496
+ }
425
497
426
498
/* ****
427
499
* For "store" operations of the form:
@@ -430,22 +502,57 @@ version ( D_SIMD )
430
502
* op2
431
503
* These cannot be marked as pure, as semantic() doesn't check them.
432
504
*/
433
- @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2);
434
- @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); // /
435
- @safe void16 __simd_sto(XMM opcode, float op1, void16 op2); // /
505
+ @safe V1 simd_sto(XMM opcode, V1 , V2 )(V1 op1, V2 op2)
506
+ if (is (V1 == __vector ) && is (V2 == __vector ))
507
+ {
508
+ pragma (inline, true );
509
+ return cast (V1 )__simd_sto(opcode, op1, op2);
510
+ }
511
+
512
+ // /
513
+ @safe V1 simd_stod(XMM opcode, V1 , V2 )(double op1, V1 op2)
514
+ if (is (V1 == __vector ))
515
+ {
516
+ pragma (inline, true );
517
+ return cast (V1 )__simd_sto(opcode, op1, op2);
518
+ }
519
+
520
+ // /
521
+ @safe V1 simd_stof(XMM opcode, V1 )(float op1, V1 op2)
522
+ if (is (V1 == __vector ))
523
+ {
524
+ pragma (inline, true );
525
+ return cast (V1 )__simd_sto(opcode, op1, op2);
526
+ }
527
+
528
+ @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2); // intrinsic
529
+ @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); // intrinsic
530
+ @safe void16 __simd_sto(XMM opcode, float op1, void16 op2); // intrinsic
531
+
532
+ // /
533
+ unittest
534
+ {
535
+ void16 a;
536
+ float f = 1 ;
537
+ double d = 1 ;
538
+
539
+ cast (void )simd_sto! (XMM .STOUPS )(a, a);
540
+ // simd_sto!(XMM.STOUPS)(f, a);
541
+ // simd_sto!(XMM.STOUPS)(d, a);
542
+ }
436
543
437
544
/* The following use overloading to ensure correct typing.
438
545
* Compile with inlining on for best performance.
439
546
*/
440
547
441
548
pure @safe short8 pcmpeq()(short8 v1, short8 v2)
442
549
{
443
- return __simd (XMM .PCMPEQW , v1, v2);
550
+ return cast (short8) __simd(XMM .PCMPEQW , v1, v2);
444
551
}
445
552
446
553
pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2)
447
554
{
448
- return __simd (XMM .PCMPEQW , v1, v2);
555
+ return cast (ushort8) __simd(XMM .PCMPEQW , v1, v2);
449
556
}
450
557
451
558
/* ********************
0 commit comments