MrUnbelievable92
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎Runtime/AssemblyInfo.cs
Lines changed: 3 additions & 2 deletions b/‎Runtime/AssemblyInfo.cs
Lines changed: 3 additions & 2 deletions
diff --git a/‎Runtime/Functions/Arithmetic/Absolute.cs
Lines changed: 2 additions & 2 deletions b/‎Runtime/Functions/Arithmetic/Absolute.cs
Lines changed: 2 additions & 2 deletions
diff --git a/‎Runtime/Functions/Arithmetic/Average.cs
Lines changed: 18 additions & 4 deletions b/‎Runtime/Functions/Arithmetic/Average.cs
Lines changed: 18 additions & 4 deletions
@@ -148,7 +148,7 @@ Note:
 
 - Division and modulo operations of (s)byte and (u)short vectors _by_ _other_ _vectors_ are implemented as either a long division algorithm ((s)byte32, (s)byte16 and (s)byte8 if not compiling for Avx2) or reciprocal multiplication after converting the vectors to float vectors (up to (s)byte8, all (u)short vectors) - it is very fast and, of course, 100% accurate!
 
-- This library uses Wojciech Mula's SIMD population count algorithm. Population count functions for (s)byte and (u)short types are very fast, and this library contains a function to sum up the number of 1-bits in a given block of memory based on it, too
+- This library uses Wojciech Mula's SIMD population count algorithm. You can count the amount of set bits of a contiguous block of memory very efficiently using either the (s)byte32 (Avx2) or (s)byte16 (Ssse3) type
 
 ### Notes
 
 
@@ -15,6 +15,7 @@
 [assembly: AssemblyTrademark("")]
 [assembly: AssemblyCulture("")]
 [assembly: InternalsVisibleTo("MaxMath.Tests")]
+[assembly: InternalsVisibleTo("NativeArrayExtensions")]
 
 // Setting ComVisible to false makes the types in this assembly not visible 
 // to COM components.  If you need to access a type in this assembly from 
@@ -31,8 +32,8 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("2.1.1")]
-[assembly: AssemblyFileVersion("2.1.1")]
+[assembly: AssemblyVersion("2.1.2")]
+[assembly: AssemblyFileVersion("2.1.2")]
 [assembly: AssemblyInformationalVersion("2.1 Release")]
 
 [assembly: SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "Unity.Mathematics API consistency")]
@@ -237,9 +237,9 @@ public static int8 abs(int8 x)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static long2 abs(long2 x)
         {
-            if (Sse4_2.IsSse42Supported)
+            if (Sse2.IsSse2Supported)
             {
-                long2 mask = Sse4_2.cmpgt_epi64(default(v128), x);
+                long2 mask = Operator.greater_mask_long(default(v128), x);
 
                 return (x + mask) ^ mask;
             }
 
@@ -19,7 +19,14 @@ public static byte avg(byte x, byte y)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static byte avg(byte2 c)
         {
-            return (byte)((1u + csum(c)) / 2u);
+            if (Sse2.IsSse2Supported)
+            {
+                return Sse2.avg_epu8(c, Sse2.bsrli_si128(c, 1 * sizeof(byte))).Byte0;
+            }
+            else
+            {
+                return (byte)((1u + csum(c)) / 2u);
+            }
         }
 
         /// <summary>       Returns the componentwise average value of two byte2 vectors with rounding from |x| + 0.5 to |x| + 1.      </summary>
@@ -349,7 +356,14 @@ public static ushort2 avg(ushort2 x, ushort2 y)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static ushort avg(ushort2 c)
         {
-            return (ushort)((1u + csum(c)) / 2u);
+            if (Sse2.IsSse2Supported)
+            {
+                return Sse2.avg_epu16(c, Sse2.bsrli_si128(c, 1 * sizeof(ushort))).UShort0;
+            }
+            else
+            {
+                return (ushort)((1u + csum(c)) / 2u);
+            }
         }
 
         /// <summary>       Returns the componentwise average value of two ushort3 vectors with rounding from |x| + 0.5 to |x| + 1.      </summary>
@@ -855,9 +869,9 @@ public static long2 avg(long2 x, long2 y)
             long2 result = x + y;
 
             // if the intermediate sum is positive add 1
-            if (Sse4_2.IsSse42Supported)
+            if (Sse2.IsSse2Supported)
             {
-                result -= Sse4_2.cmpgt_epi64(result, default(v128));
+                result -= Operator.greater_mask_long(result, default(v128));
             }
             else
             {
Original file line number	Diff line number	Diff line change
`@@ -237,9 +237,9 @@ public static int8 abs(int8 x)`
`237`	`237`	`[MethodImpl(MethodImplOptions.AggressiveInlining)]`
`238`	`238`	`public static long2 abs(long2 x)`
`239`	`239`	`{`
`240`		`- if (Sse4_2.IsSse42Supported)`
	`240`	`+ if (Sse2.IsSse2Supported)`
`241`	`241`	`{`
`242`		`- long2 mask = Sse4_2.cmpgt_epi64(default(v128), x);`
	`242`	`+ long2 mask = Operator.greater_mask_long(default(v128), x);`
`243`	`243`
`244`	`244`	`return (x + mask) ^ mask;`
`245`	`245`	`}`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,14 @@ public static byte avg(byte x, byte y)`
`19`	`19`	`[MethodImpl(MethodImplOptions.AggressiveInlining)]`
`20`	`20`	`public static byte avg(byte2 c)`
`21`	`21`	`{`
`22`		`- return (byte)((1u + csum(c)) / 2u);`
	`22`	`+ if (Sse2.IsSse2Supported)`
	`23`	`+ {`
	`24`	`+ return Sse2.avg_epu8(c, Sse2.bsrli_si128(c, 1 * sizeof(byte))).Byte0;`
	`25`	`+ }`
	`26`	`+ else`
	`27`	`+ {`
	`28`	`+ return (byte)((1u + csum(c)) / 2u);`
	`29`	`+ }`
`23`	`30`	`}`
`24`	`31`
`25`	`32`	`/// <summary> Returns the componentwise average value of two byte2 vectors with rounding from \|x\| + 0.5 to \|x\| + 1. </summary>`
`@@ -349,7 +356,14 @@ public static ushort2 avg(ushort2 x, ushort2 y)`
`349`	`356`	`[MethodImpl(MethodImplOptions.AggressiveInlining)]`
`350`	`357`	`public static ushort avg(ushort2 c)`
`351`	`358`	`{`
`352`		`- return (ushort)((1u + csum(c)) / 2u);`
	`359`	`+ if (Sse2.IsSse2Supported)`
	`360`	`+ {`
	`361`	`+ return Sse2.avg_epu16(c, Sse2.bsrli_si128(c, 1 * sizeof(ushort))).UShort0;`
	`362`	`+ }`
	`363`	`+ else`
	`364`	`+ {`
	`365`	`+ return (ushort)((1u + csum(c)) / 2u);`
	`366`	`+ }`
`353`	`367`	`}`
`354`	`368`
`355`	`369`	`/// <summary> Returns the componentwise average value of two ushort3 vectors with rounding from \|x\| + 0.5 to \|x\| + 1. </summary>`
`@@ -855,9 +869,9 @@ public static long2 avg(long2 x, long2 y)`
`855`	`869`	`long2 result = x + y;`
`856`	`870`
`857`	`871`	`// if the intermediate sum is positive add 1`
`858`		`- if (Sse4_2.IsSse42Supported)`
	`872`	`+ if (Sse2.IsSse2Supported)`
`859`	`873`	`{`
`860`		`- result -= Sse4_2.cmpgt_epi64(result, default(v128));`
	`874`	`+ result -= Operator.greater_mask_long(result, default(v128));`
`861`	`875`	`}`
`862`	`876`	`else`
`863`	`877`	`{`