Skip to content

Commit 8539a57

Browse files
erincattozero-meta
authored andcommitted
Cast benchmark (erincatto#817)
- optimized ray and shape cast : 2x faster - update mass options instead of automatic mass - fixes for 32-bit Windows build - b2TreeStats for measuring query performance - reduced tree node size from 48-bytes to 40-bytes - fixes for 32-bit Neon - MSVC warning level 4 and fixes
1 parent 006c548 commit 8539a57

25 files changed

+965
-251
lines changed

.github/workflows/build.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ jobs:
7575
arch: x64
7676

7777
- name: Configure CMake
78-
# enkiTS is failing ASAN on windows
7978
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF
8079
# run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF
8180

@@ -85,4 +84,22 @@ jobs:
8584
- name: Test
8685
working-directory: ${{github.workspace}}/build
8786
run: ./bin/${{env.BUILD_TYPE}}/test
87+
88+
samples-windows:
89+
name: windows
90+
runs-on: windows-latest
91+
steps:
92+
93+
- uses: actions/checkout@v4
94+
95+
- name: Setup MSVC dev command prompt
96+
uses: TheMrMilchmann/setup-msvc-dev@v3
97+
with:
98+
arch: x64
99+
100+
- name: Configure CMake
101+
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=Release -DBOX2D_SAMPLES=ON -DBUILD_SHARED_LIBS=OFF -DBOX2D_UNIT_TESTS=OFF
102+
103+
- name: Build
104+
run: cmake --build ${{github.workspace}}/build --config Release
88105

include/box2d/box2d.h

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,51 +51,56 @@ B2_API b2SensorEvents b2World_GetSensorEvents( b2WorldId worldId );
5151
B2_API b2ContactEvents b2World_GetContactEvents( b2WorldId worldId );
5252

5353
/// Overlap test for all shapes that *potentially* overlap the provided AABB
54-
B2_API void b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );
54+
B2_API b2TreeStats b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn,
55+
void* context );
5556

5657
/// Overlap test for all shapes that *potentially* overlap the provided AABB
5758
B2_API void b2World_OverlapAABBForLiquidFun( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );
5859

5960
/// Overlap test for for all shapes that overlap the provided circle
60-
B2_API void b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform, b2QueryFilter filter,
61-
b2OverlapResultFcn* fcn, void* context );
61+
B2_API b2TreeStats b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform,
62+
b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );
6263

6364
/// Overlap test for all shapes that overlap the provided capsule
64-
B2_API void b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform, b2QueryFilter filter,
65-
b2OverlapResultFcn* fcn, void* context );
65+
B2_API b2TreeStats b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform,
66+
b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );
6667

6768
/// Overlap test for all shapes that overlap the provided polygon
68-
B2_API void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform, b2QueryFilter filter,
69-
b2OverlapResultFcn* fcn, void* context );
69+
B2_API b2TreeStats b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform,
70+
b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );
7071

7172
/// Cast a ray into the world to collect shapes in the path of the ray.
7273
/// Your callback function controls whether you get the closest point, any point, or n-points.
7374
/// The ray-cast ignores shapes that contain the starting point.
75+
/// @note The callback function may receive shapes in any order
7476
/// @param worldId The world to cast the ray against
7577
/// @param origin The start point of the ray
7678
/// @param translation The translation of the ray from the start point to the end point
7779
/// @param filter Contains bit flags to filter unwanted shapes from the results
7880
/// @param fcn A user implemented callback function
7981
/// @param context A user context that is passed along to the callback function
80-
/// @note The callback function may receive shapes in any order
81-
B2_API void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn,
82-
void* context );
82+
/// @return traversal performance counters
83+
B2_API b2TreeStats b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter,
84+
b2CastResultFcn* fcn, void* context );
8385

8486
/// Cast a ray into the world to collect the closest hit. This is a convenience function.
8587
/// This is less general than b2World_CastRay() and does not allow for custom filtering.
8688
B2_API b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter );
8789

8890
/// Cast a circle through the world. Similar to a cast ray except that a circle is cast instead of a point.
89-
B2_API void b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform, b2Vec2 translation,
90-
b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
91+
/// @see b2World_CastRay
92+
B2_API b2TreeStats b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform,
93+
b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
9194

9295
/// Cast a capsule through the world. Similar to a cast ray except that a capsule is cast instead of a point.
93-
B2_API void b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform, b2Vec2 translation,
94-
b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
96+
/// @see b2World_CastRay
97+
B2_API b2TreeStats b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform,
98+
b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
9599

96100
/// Cast a polygon through the world. Similar to a cast ray except that a polygon is cast instead of a point.
97-
B2_API void b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform, b2Vec2 translation,
98-
b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
101+
/// @see b2World_CastRay
102+
B2_API b2TreeStats b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform,
103+
b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
99104

100105
/// Enable/disable sleep. If your application does not need sleeping, you can gain some performance
101106
/// by disabling sleep completely at the world level.
@@ -189,6 +194,9 @@ B2_API void b2World_DumpMemoryStats( b2WorldId worldId );
189194
/// Is locked?
190195
B2_API bool b2World_IsLocked( b2WorldId worldId );
191196

197+
/// todo testing
198+
B2_API void b2World_RebuildStaticTree( b2WorldId worldId );
199+
192200
/** @} */
193201

194202
/**
@@ -475,8 +483,10 @@ B2_API b2ShapeId b2CreateCapsuleShape( b2BodyId bodyId, const b2ShapeDef* def, c
475483
/// @return the shape id for accessing the shape
476484
B2_API b2ShapeId b2CreatePolygonShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon );
477485

478-
/// Destroy a shape
479-
B2_API void b2DestroyShape( b2ShapeId shapeId );
486+
/// Destroy a shape. You may defer the body mass update which can improve performance if several shapes on a
487+
/// body are destroyed at once.
488+
/// @see b2Body_ApplyMassFromShapes
489+
B2_API void b2DestroyShape( b2ShapeId shapeId, bool updateBodyMass );
480490

481491
/// Shape identifier validation. Provides validation for up to 64K allocations.
482492
B2_API bool b2Shape_IsValid( b2ShapeId id );
@@ -504,9 +514,9 @@ B2_API void b2Shape_SetUserData( b2ShapeId shapeId, void* userData );
504514
B2_API void* b2Shape_GetUserData( b2ShapeId shapeId );
505515

506516
/// Set the mass density of a shape, typically in kg/m^2.
507-
/// This will not update the mass properties on the parent body.
517+
/// This will optionally update the mass properties on the parent body.
508518
/// @see b2ShapeDef::density, b2Body_ApplyMassFromShapes
509-
B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density );
519+
B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density, bool updateBodyMass );
510520

511521
/// Get the density of a shape, typically in kg/m^2
512522
B2_API float b2Shape_GetDensity( b2ShapeId shapeId );

include/box2d/collision.h

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ typedef struct b2Hull b2Hull;
2929
/// don't use more vertices.
3030
#define b2_maxPolygonVertices 8
3131

32-
/// Low level ray-cast input data
32+
/// Low level ray cast input data
3333
typedef struct b2RayCastInput
3434
{
3535
/// Start point of the ray cast
@@ -63,7 +63,7 @@ typedef struct b2ShapeCastInput
6363
float maxFraction;
6464
} b2ShapeCastInput;
6565

66-
/// Low level ray-cast or shape-cast output data
66+
/// Low level ray cast or shape-cast output data
6767
typedef struct b2CastOutput
6868
{
6969
/// The surface normal at the hit point
@@ -566,16 +566,16 @@ B2_API b2Manifold b2CollideSegmentAndPolygon( const b2Segment* segmentA, b2Trans
566566
b2Transform xfB );
567567

568568
/// Compute the contact manifold between a chain segment and a circle
569-
B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA,
570-
const b2Circle* circleB, b2Transform xfB );
569+
B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA, const b2Circle* circleB,
570+
b2Transform xfB );
571571

572572
/// Compute the contact manifold between a chain segment and a capsule
573-
B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA,
574-
const b2Capsule* capsuleB, b2Transform xfB, b2DistanceCache* cache );
573+
B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA, const b2Capsule* capsuleB,
574+
b2Transform xfB, b2DistanceCache* cache );
575575

576576
/// Compute the contact manifold between a chain segment and a rounded polygon
577-
B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA,
578-
const b2Polygon* polygonB, b2Transform xfB, b2DistanceCache* cache );
577+
B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA, const b2Polygon* polygonB,
578+
b2Transform xfB, b2DistanceCache* cache );
579579

580580
/**@}*/
581581

@@ -602,8 +602,7 @@ B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segment
602602
/// The default category bit for a tree proxy. Used for collision filtering.
603603
#define b2_defaultCategoryBits ( 1 )
604604

605-
/// Convenience mask bits to use when you don't need collision filtering and just want
606-
/// all results.
605+
/// Convenience mask bits to use when you don't need collision filtering and just want all results.
607606
#define b2_defaultMaskBits ( UINT64_MAX )
608607

609608
/// A node in the dynamic tree. This is private data placed here for performance reasons.
@@ -617,31 +616,27 @@ typedef struct b2TreeNode
617616

618617
union
619618
{
620-
/// The node parent index
619+
/// The node parent index (allocated node)
621620
int32_t parent;
622621

623-
/// The node freelist next index
622+
/// The node freelist next index (free node)
624623
int32_t next;
625624
}; // 4
626625

627-
/// Child 1 index
626+
/// Child 1 index (internal node)
628627
int32_t child1; // 4
629628

630-
/// Child 2 index
631-
int32_t child2; // 4
632-
633-
/// User data
634-
// todo could be union with child index
635-
int32_t userData; // 4
636-
637-
/// Leaf = 0, free node = -1
638-
int16_t height; // 2
629+
union
630+
{
631+
/// Child 2 index (internal node)
632+
int32_t child2;
639633

640-
/// Has the AABB been enlarged?
641-
bool enlarged; // 1
634+
/// User data (leaf node)
635+
int32_t userData;
636+
}; // 4
642637

643-
/// Padding for clarity
644-
char pad[5];
638+
uint16_t height; // 2
639+
uint16_t flags; // 2
645640
} b2TreeNode;
646641

647642
/// The dynamic tree structure. This should be considered private data.
@@ -682,6 +677,16 @@ typedef struct b2DynamicTree
682677
int32_t rebuildCapacity;
683678
} b2DynamicTree;
684679

680+
/// These are performance results returned by dynamic tree queries.
681+
typedef struct b2TreeStats
682+
{
683+
/// Number of internal nodes visited during the query
684+
int32_t nodeVisits;
685+
686+
/// Number of leaf nodes visited during the query
687+
int32_t leafVisits;
688+
} b2TreeStats;
689+
685690
/// Constructing the tree initializes the node pool.
686691
B2_API b2DynamicTree b2DynamicTree_Create( void );
687692

@@ -705,49 +710,53 @@ B2_API void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2
705710
typedef bool b2TreeQueryCallbackFcn( int32_t proxyId, int32_t userData, void* context );
706711

707712
/// Query an AABB for overlapping proxies. The callback class is called for each proxy that overlaps the supplied AABB.
708-
B2_API void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback,
709-
void* context );
713+
/// @return performance data
714+
B2_API b2TreeStats b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits,
715+
b2TreeQueryCallbackFcn* callback, void* context );
710716

711-
/// This function receives clipped raycast input for a proxy. The function
717+
/// This function receives clipped ray cast input for a proxy. The function
712718
/// returns the new ray fraction.
713719
/// - return a value of 0 to terminate the ray cast
714720
/// - return a value less than input->maxFraction to clip the ray
715721
/// - return a value of input->maxFraction to continue the ray cast without clipping
716722
typedef float b2TreeRayCastCallbackFcn( const b2RayCastInput* input, int32_t proxyId, int32_t userData, void* context );
717723

718-
/// Ray-cast against the proxies in the tree. This relies on the callback
719-
/// to perform a exact ray-cast in the case were the proxy contains a shape.
724+
/// Ray cast against the proxies in the tree. This relies on the callback
725+
/// to perform a exact ray cast in the case were the proxy contains a shape.
720726
/// The callback also performs the any collision filtering. This has performance
721727
/// roughly equal to k * log(n), where k is the number of collisions and n is the
722728
/// number of proxies in the tree.
723729
/// Bit-wise filtering using mask bits can greatly improve performance in some scenarios.
730+
/// However, this filtering may be approximate, so the user should still apply filtering to results.
724731
/// @param tree the dynamic tree to ray cast
725-
/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1)
726-
/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;`
732+
/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1)
733+
/// @param maskBits mask bit hint: `bool accept = (maskBits & node->categoryBits) != 0;`
727734
/// @param callback a callback class that is called for each proxy that is hit by the ray
728735
/// @param context user context that is passed to the callback
729-
B2_API void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits,
730-
b2TreeRayCastCallbackFcn* callback, void* context );
736+
/// @return performance data
737+
B2_API b2TreeStats b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits,
738+
b2TreeRayCastCallbackFcn* callback, void* context );
731739

732-
/// This function receives clipped ray-cast input for a proxy. The function
740+
/// This function receives clipped ray cast input for a proxy. The function
733741
/// returns the new ray fraction.
734-
/// - return a value of 0 to terminate the ray-cast
742+
/// - return a value of 0 to terminate the ray cast
735743
/// - return a value less than input->maxFraction to clip the ray
736744
/// - return a value of input->maxFraction to continue the ray cast without clipping
737745
typedef float b2TreeShapeCastCallbackFcn( const b2ShapeCastInput* input, int32_t proxyId, int32_t userData, void* context );
738746

739-
/// Ray-cast against the proxies in the tree. This relies on the callback
740-
/// to perform a exact ray-cast in the case were the proxy contains a shape.
747+
/// Ray cast against the proxies in the tree. This relies on the callback
748+
/// to perform a exact ray cast in the case were the proxy contains a shape.
741749
/// The callback also performs the any collision filtering. This has performance
742750
/// roughly equal to k * log(n), where k is the number of collisions and n is the
743751
/// number of proxies in the tree.
744752
/// @param tree the dynamic tree to ray cast
745-
/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1).
753+
/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1).
746754
/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;`
747755
/// @param callback a callback class that is called for each proxy that is hit by the shape
748756
/// @param context user context that is passed to the callback
749-
B2_API void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits,
750-
b2TreeShapeCastCallbackFcn* callback, void* context );
757+
/// @return performance data
758+
B2_API b2TreeStats b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits,
759+
b2TreeShapeCastCallbackFcn* callback, void* context );
751760

752761
/// Validate this tree. For testing.
753762
B2_API void b2DynamicTree_Validate( const b2DynamicTree* tree );
@@ -781,7 +790,6 @@ B2_API void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin );
781790
B2_API int b2DynamicTree_GetByteCount( const b2DynamicTree* tree );
782791

783792
/// Get proxy user data
784-
/// @return the proxy user data or 0 if the id is invalid
785793
B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId )
786794
{
787795
return tree->nodes[proxyId].userData;

include/box2d/types.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ typedef struct b2RayResult
5858
b2Vec2 point;
5959
b2Vec2 normal;
6060
float fraction;
61+
int nodeVisits;
62+
int leafVisits;
6163
bool hit;
6264
} b2RayResult;
6365

@@ -220,10 +222,6 @@ typedef struct b2BodyDef
220222
/// Used to disable a body. A disabled body does not move or collide.
221223
bool isEnabled;
222224

223-
/// Automatically compute mass and related properties on this body from shapes.
224-
/// Triggers whenever a shape is add/removed/changed. Default is true.
225-
bool automaticMass;
226-
227225
/// This allows this body to bypass rotational speed limits. Should only be used
228226
/// for circular objects, like wheels.
229227
bool allowFastRotation;
@@ -367,6 +365,9 @@ typedef struct b2ShapeDef
367365
/// This is implicitly always true for sensors.
368366
bool forceContactCreation;
369367

368+
/// Should the body update the mass properties when this shape is created. Default is true.
369+
bool updateBodyMass;
370+
370371
/// Used internally to detect a valid definition. DO NOT SET.
371372
int32_t internalValue;
372373
} b2ShapeDef;

samples/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void* AllocFcn( uint32_t size, int32_t alignment )
6868
size_t sizeAligned = ( ( size - 1 ) | ( alignment - 1 ) ) + 1;
6969
assert( ( sizeAligned & ( alignment - 1 ) ) == 0 );
7070

71-
#if defined( _WIN64 )
71+
#if defined( _WIN64 ) || defined( _WIN32 )
7272
void* ptr = _aligned_malloc( sizeAligned, alignment );
7373
#else
7474
void* ptr = aligned_alloc( alignment, sizeAligned );
@@ -79,7 +79,7 @@ void* AllocFcn( uint32_t size, int32_t alignment )
7979

8080
void FreeFcn( void* mem )
8181
{
82-
#if defined( _WIN64 )
82+
#if defined( _WIN64 ) || defined( _WIN32 )
8383
_aligned_free( mem );
8484
#else
8585
free( mem );

0 commit comments

Comments
 (0)