Skip to content

Commit bec6dde

Browse files
v2.1.2
### Known Issues - half8 "equals" and "not equals" operators don't conform to the IEEE 754 standard - Unity has not yet reacted to my bug-report in regards to their "half" implementation ### Fixes - fixed undefined behavior of "vshr" functions for vector types smaller than 128 bits - fixed SSE2 implementations of "vrol" and "vror" functions for the (u)short16 type ### Additions - implemented Bmi1 and Bmi2 intrinsics as functions with a "bits_" prefix (except for "andn", which has already been implemented as "andnot") - added high performance and/or SIMD "isdivisible" functions for all integer vector types and scalar value types - added high performance and/or SIMD "intpow" - integer exponantiation - functions for (u)int, (u)long and all integer vector types - added high performance and/or SIMD "floorpow2" functions for all integer vector types - added "nabs" - negative absolute value functions for all non-boolean vector- and single value types - added "indexof(vector v, value x)" functions for all non-boolean vector types ### Improvements - aggressivley optimized away global variables (shuffle masks) and thus memory access and usage where appropriate - improved performance of 256 bit vector subvector getters - added Sse2 fallback code for all (u)long2/3/4 operators - improved performance of mulitplication, division and modulo operations for all (s)byte- and (u)short vector- and matrix types when dividing by a single non-compile time constant value - added overloads for (s)byte- and (u)short vectors' "divrem" functions with a scalar value as the divisor parameter, improving performance when it is a compile time constant - improved performance of "intsqrt" functions for most types ### Changes - bump com.unity.burst to version 1.5 ### Fixed Oversights - added bitmask8 and bitmask16 functions for (s)byte and (u)short vector types, respectively
1 parent e3eff09 commit bec6dde

File tree

143 files changed

+18670
-8206
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+18670
-8206
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ Note:
148148

149149
- Division and modulo operations of (s)byte and (u)short vectors _by_ _other_ _vectors_ are implemented as either a long division algorithm ((s)byte32, (s)byte16 and (s)byte8 if not compiling for Avx2) or reciprocal multiplication after converting the vectors to float vectors (up to (s)byte8, all (u)short vectors) - it is very fast and, of course, 100% accurate!
150150

151-
- This library uses Wojciech Mula's SIMD population count algorithm. Population count functions for (s)byte and (u)short types are very fast, and this library contains a function to sum up the number of 1-bits in a given block of memory based on it, too
151+
- This library uses Wojciech Mula's SIMD population count algorithm. You can count the amount of set bits of a contiguous block of memory very efficiently using either the (s)byte32 (Avx2) or (s)byte16 (Ssse3) type
152152

153153
### Notes
154154

Runtime/AssemblyInfo.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
[assembly: AssemblyTrademark("")]
1616
[assembly: AssemblyCulture("")]
1717
[assembly: InternalsVisibleTo("MaxMath.Tests")]
18+
[assembly: InternalsVisibleTo("NativeArrayExtensions")]
1819

1920
// Setting ComVisible to false makes the types in this assembly not visible
2021
// to COM components. If you need to access a type in this assembly from
@@ -31,8 +32,8 @@
3132
// Build Number
3233
// Revision
3334
//
34-
[assembly: AssemblyVersion("2.1.1")]
35-
[assembly: AssemblyFileVersion("2.1.1")]
35+
[assembly: AssemblyVersion("2.1.2")]
36+
[assembly: AssemblyFileVersion("2.1.2")]
3637
[assembly: AssemblyInformationalVersion("2.1 Release")]
3738

3839
[assembly: SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "Unity.Mathematics API consistency")]

Runtime/Functions/Arithmetic/Absolute.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,9 @@ public static int8 abs(int8 x)
237237
[MethodImpl(MethodImplOptions.AggressiveInlining)]
238238
public static long2 abs(long2 x)
239239
{
240-
if (Sse4_2.IsSse42Supported)
240+
if (Sse2.IsSse2Supported)
241241
{
242-
long2 mask = Sse4_2.cmpgt_epi64(default(v128), x);
242+
long2 mask = Operator.greater_mask_long(default(v128), x);
243243

244244
return (x + mask) ^ mask;
245245
}

Runtime/Functions/Arithmetic/Average.cs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,14 @@ public static byte avg(byte x, byte y)
1919
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2020
public static byte avg(byte2 c)
2121
{
22-
return (byte)((1u + csum(c)) / 2u);
22+
if (Sse2.IsSse2Supported)
23+
{
24+
return Sse2.avg_epu8(c, Sse2.bsrli_si128(c, 1 * sizeof(byte))).Byte0;
25+
}
26+
else
27+
{
28+
return (byte)((1u + csum(c)) / 2u);
29+
}
2330
}
2431

2532
/// <summary> Returns the componentwise average value of two byte2 vectors with rounding from |x| + 0.5 to |x| + 1. </summary>
@@ -349,7 +356,14 @@ public static ushort2 avg(ushort2 x, ushort2 y)
349356
[MethodImpl(MethodImplOptions.AggressiveInlining)]
350357
public static ushort avg(ushort2 c)
351358
{
352-
return (ushort)((1u + csum(c)) / 2u);
359+
if (Sse2.IsSse2Supported)
360+
{
361+
return Sse2.avg_epu16(c, Sse2.bsrli_si128(c, 1 * sizeof(ushort))).UShort0;
362+
}
363+
else
364+
{
365+
return (ushort)((1u + csum(c)) / 2u);
366+
}
353367
}
354368

355369
/// <summary> Returns the componentwise average value of two ushort3 vectors with rounding from |x| + 0.5 to |x| + 1. </summary>
@@ -855,9 +869,9 @@ public static long2 avg(long2 x, long2 y)
855869
long2 result = x + y;
856870

857871
// if the intermediate sum is positive add 1
858-
if (Sse4_2.IsSse42Supported)
872+
if (Sse2.IsSse2Supported)
859873
{
860-
result -= Sse4_2.cmpgt_epi64(result, default(v128));
874+
result -= Operator.greater_mask_long(result, default(v128));
861875
}
862876
else
863877
{

0 commit comments

Comments
 (0)