From 6336023af8f34a009f2c88d142cf2260af529b02 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 4 Oct 2024 22:37:47 -0700 Subject: [PATCH 01/11] initial cast benchmark update mass options instead of automatic mass --- include/box2d/box2d.h | 10 +- include/box2d/types.h | 7 +- samples/sample_benchmark.cpp | 227 ++++++++++++++++++++++++++++++++++- samples/sample_events.cpp | 10 +- src/body.c | 1 - src/shape.c | 14 ++- src/types.c | 2 +- 7 files changed, 253 insertions(+), 18 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index 7b65841b5..a23d3f1dd 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -466,8 +466,10 @@ B2_API b2ShapeId b2CreateCapsuleShape( b2BodyId bodyId, const b2ShapeDef* def, c /// @return the shape id for accessing the shape B2_API b2ShapeId b2CreatePolygonShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon ); -/// Destroy a shape -B2_API void b2DestroyShape( b2ShapeId shapeId ); +/// Destroy a shape. You may defer the body mass update which can improve performance if several shapes on a +/// body are destroyed at once. +/// @see b2Body_ApplyMassFromShapes +B2_API void b2DestroyShape( b2ShapeId shapeId, bool updateBodyMass ); /// Shape identifier validation. Provides validation for up to 64K allocations. B2_API bool b2Shape_IsValid( b2ShapeId id ); @@ -492,9 +494,9 @@ B2_API void b2Shape_SetUserData( b2ShapeId shapeId, void* userData ); B2_API void* b2Shape_GetUserData( b2ShapeId shapeId ); /// Set the mass density of a shape, typically in kg/m^2. -/// This will not update the mass properties on the parent body. +/// This will optionally update the mass properties on the parent body. /// @see b2ShapeDef::density, b2Body_ApplyMassFromShapes -B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density ); +B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density, bool updateBodyMass ); /// Get the density of a shape, typically in kg/m^2 B2_API float b2Shape_GetDensity( b2ShapeId shapeId ); diff --git a/include/box2d/types.h b/include/box2d/types.h index 9faf6b84c..ead84a045 100644 --- a/include/box2d/types.h +++ b/include/box2d/types.h @@ -220,10 +220,6 @@ typedef struct b2BodyDef /// Used to disable a body. A disabled body does not move or collide. bool isEnabled; - /// Automatically compute mass and related properties on this body from shapes. - /// Triggers whenever a shape is add/removed/changed. Default is true. - bool automaticMass; - /// This allows this body to bypass rotational speed limits. Should only be used /// for circular objects, like wheels. bool allowFastRotation; @@ -367,6 +363,9 @@ typedef struct b2ShapeDef /// This is implicitly always true for sensors. bool forceContactCreation; + /// Should the body update the mass properties when this shape is created. Default is true. + bool updateBodyMass; + /// Used internally to detect a valid definition. DO NOT SET. int32_t internalValue; } b2ShapeDef; diff --git a/samples/sample_benchmark.cpp b/samples/sample_benchmark.cpp index d761803b3..af21f438d 100644 --- a/samples/sample_benchmark.cpp +++ b/samples/sample_benchmark.cpp @@ -329,6 +329,17 @@ class BenchmarkTumbler : public Sample polygon = b2MakeOffsetBox( 10.0f, 0.5f, { 0.0f, -10.0f }, b2Rot_identity ); b2CreatePolygonShape( bodyId, &shapeDef, &polygon ); + shapeDef.customColor = b2_colorBlueViolet; + b2Circle circle = { { 5.0f, 5.0f }, 1.0f }; + b2CreateCircleShape( bodyId, &shapeDef, &circle ); + circle = { { 5.0f, -5.0f }, 1.0f }; + b2CreateCircleShape( bodyId, &shapeDef, &circle ); + circle = { { -5.0f, -5.0f }, 1.0f }; + b2CreateCircleShape( bodyId, &shapeDef, &circle ); + circle = { { -5.0f, 5.0f }, 1.0f }; + b2CreateCircleShape( bodyId, &shapeDef, &circle ); + + // m_motorSpeed = 9.0f; m_motorSpeed = 25.0f; @@ -1436,8 +1447,8 @@ class BenchmarkCompound : public Sample b2BodyDef bodyDef = b2DefaultBodyDef(); bodyDef.type = b2_dynamicBody; // defer mass properties to avoid n-squared mass computations - bodyDef.automaticMass = false; b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.updateBodyMass = false; for ( int m = 0; m < count; ++m ) { @@ -1498,13 +1509,14 @@ class BenchmarkKinematic : public Sample b2BodyDef bodyDef = b2DefaultBodyDef(); bodyDef.type = b2_kinematicBody; bodyDef.angularVelocity = 1.0f; - // defer mass properties to avoid n-squared mass computations - bodyDef.automaticMass = false; b2ShapeDef shapeDef = b2DefaultShapeDef(); shapeDef.filter.categoryBits = 1; shapeDef.filter.maskBits = 2; + // defer mass properties to avoid n-squared mass computations + shapeDef.updateBodyMass = false; + b2BodyId bodyId = b2CreateBody( m_worldId, &bodyDef ); for ( int i = -span; i < span; ++i ) @@ -1529,3 +1541,212 @@ class BenchmarkKinematic : public Sample }; static int sampleKinematic = RegisterSample( "Benchmark", "Kinematic", BenchmarkKinematic::Create ); + +#if 1 + +enum QueryType +{ + e_rayCast, + e_shapeCast, + e_overlap, +}; + +class BenchmarkCast : public Sample +{ +public: + explicit BenchmarkCast( Settings& settings ) + : Sample( settings ) + { + if ( settings.restart == false ) + { + g_camera.m_center = { 500.0f, 500.0f }; + g_camera.m_zoom = 25.0f * 21.0f; + } + + m_queryType = e_rayCast; + m_ratio = 5.0f; + m_grid = 1.0f; + m_fill = 0.1f; + m_rowCount = g_sampleDebug ? 100 : 1000; + m_columnCount = g_sampleDebug ? 100 : 1000; + m_categoryBits = true; + + BuildScene(); + } + + void BuildScene() + { + g_seed = 1234; + b2DestroyWorld( m_worldId ); + b2WorldDef worldDef = b2DefaultWorldDef(); + m_worldId = b2CreateWorld( &worldDef ); + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2ShapeDef shapeDef = b2DefaultShapeDef(); + + float y = 0.0f; + + for ( int i = 0; i < m_rowCount; ++i ) + { + float x = 0.0f; + + for ( int j = 0; j < m_columnCount; ++j ) + { + float fillTest = RandomFloat( 0.0f, 1.0f ); + if ( fillTest <= m_fill ) + { + bodyDef.position = { x, y }; + b2BodyId bodyId = b2CreateBody( m_worldId, &bodyDef ); + + float ratio = RandomFloat( 1.0f, m_ratio ); + float halfWidth = RandomFloat( 0.05f, 0.25f ); + + b2Polygon box; + if ( RandomFloat() > 0.0f ) + { + box = b2MakeBox( ratio * halfWidth, halfWidth ); + } + else + { + box = b2MakeBox( halfWidth, ratio * halfWidth ); + } + + int category = RandomInt( 1, 3 ); + shapeDef.filter.categoryBits = category; + if ( category == 1 ) + { + shapeDef.customColor = b2_colorBox2DBlue; + } + else if ( category == 2 ) + { + shapeDef.customColor = b2_colorBox2DYellow; + } + else + { + shapeDef.customColor = b2_colorBox2DGreen; + } + + b2CreatePolygonShape( bodyId, &shapeDef, &box ); + } + + x += m_grid; + } + + y += m_grid; + } + } + + void UpdateUI() override + { + float height = 320.0f; + ImGui::SetNextWindowPos( ImVec2( 10.0f, g_camera.m_height - height - 50.0f ), ImGuiCond_Once ); + ImGui::SetNextWindowSize( ImVec2( 200.0f, height ) ); + + ImGui::Begin( "Cast", nullptr, ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoResize ); + + ImGui::PushItemWidth( 100.0f ); + + bool changed = false; + if ( ImGui::SliderInt( "rows", &m_rowCount, 0, 1000, "%d" ) ) + { + changed = true; + } + + if ( ImGui::SliderInt( "columns", &m_columnCount, 0, 1000, "%d" ) ) + { + changed = true; + } + + if ( ImGui::SliderFloat( "fill", &m_fill, 0.0f, 1.0f, "%.2f" ) ) + { + changed = true; + } + + if ( ImGui::SliderFloat( "grid", &m_grid, 0.5f, 2.0f, "%.2f" ) ) + { + changed = true; + } + + if ( ImGui::SliderFloat( "ratio", &m_ratio, 1.0f, 10.0f, "%.2f" ) ) + { + changed = true; + } + + if ( ImGui::Checkbox( "categories", &m_categoryBits) ) + { + changed = true; + } + + const char* queryTypes[] = { "Ray Cast", "Circle Cast", "Overlap" }; + int queryType = int( m_queryType ); + changed = changed || ImGui::Combo( "Query", &queryType, queryTypes, IM_ARRAYSIZE( queryTypes ) ); + m_queryType = QueryType( queryType ); + + ImGui::PopItemWidth(); + ImGui::End(); + + if ( changed ) + { + BuildScene(); + } + } + + void Step( Settings& settings) override + { + Sample::Step( settings ); + + int sampleCount = g_sampleDebug ? 10 : 1000; + + float extent = m_rowCount * m_grid; + b2QueryFilter filter = b2DefaultQueryFilter(); + filter.maskBits = 1; + int hitCount = 0; + float ms = 0.0f; + + if (m_queryType == e_rayCast) + { + b2Timer timer = b2CreateTimer(); + + b2Vec2 rayStart = b2Vec2_zero; + b2Vec2 rayEnd = b2Vec2_zero; + for (int i = 0; i < sampleCount; ++i) + { + rayStart = RandomVec2( 0.0f, extent ); + rayEnd = RandomVec2( 0.0f, extent ); + + b2RayResult result = b2World_CastRayClosest( m_worldId, rayStart, b2Sub( rayEnd, rayStart ), filter ); + hitCount += result.hit ? 1 : 0; + } + + ms = b2GetMilliseconds( &timer ); + + g_draw.DrawSegment( rayStart, rayEnd, b2_colorBeige ); + } + + g_draw.DrawString( 5, m_textLine, "hit count = %03d", hitCount ); + m_textLine += m_textIncrement; + + g_draw.DrawString( 5, m_textLine, "ms = %.3f",ms ); + m_textLine += m_textIncrement; + } + + static Sample* Create( Settings& settings ) + { + return new BenchmarkCast( settings ); + } + + QueryType m_queryType; + + std::vector m_origins; + std::vector m_translations; + + int m_rowCount, m_columnCount; + int m_updateType; + float m_fill; + float m_ratio; + float m_grid; + bool m_categoryBits; +}; + +static int sampleCast = RegisterSample( "Benchmark", "Cast", BenchmarkCast::Create ); +#endif diff --git a/samples/sample_events.cpp b/samples/sample_events.cpp index 94d03e5a6..8e87121aa 100644 --- a/samples/sample_events.cpp +++ b/samples/sample_events.cpp @@ -672,9 +672,17 @@ class ContactEvent : public Sample m_debrisIds[index] = b2_nullBodyId; } + for ( int i = 0; i < destroyCount; ++i ) { - b2DestroyShape( shapesToDestroy[i] ); + bool updateMass = false; + b2DestroyShape( shapesToDestroy[i], updateMass ); + } + + if (destroyCount > 0) + { + // Update mass just once + b2Body_ApplyMassFromShapes( m_playerId ); } if ( settings.hertz > 0.0f && settings.pause == false ) diff --git a/src/body.c b/src/body.c index 639914258..273b04c34 100644 --- a/src/body.c +++ b/src/body.c @@ -294,7 +294,6 @@ b2BodyId b2CreateBody( b2WorldId worldId, const b2BodyDef* def ) body->fixedRotation = def->fixedRotation; body->isSpeedCapped = false; body->isMarked = false; - body->automaticMass = def->automaticMass; // dynamic and kinematic bodies that are enabled need a island if ( setId >= b2_awakeSet ) diff --git a/src/shape.c b/src/shape.c index b63602253..f2f3900ad 100644 --- a/src/shape.c +++ b/src/shape.c @@ -165,7 +165,7 @@ b2ShapeId b2CreateShape( b2BodyId bodyId, const b2ShapeDef* def, const void* geo b2Shape* shape = b2CreateShapeInternal( world, body, transform, def, geometry, shapeType ); - if ( body->automaticMass == true ) + if ( def->updateBodyMass == true ) { b2UpdateBodyMassData( world, body ); } @@ -262,7 +262,7 @@ void b2DestroyShapeInternal( b2World* world, b2Shape* shape, b2Body* body, bool b2ValidateSolverSets( world ); } -void b2DestroyShape( b2ShapeId shapeId ) +void b2DestroyShape( b2ShapeId shapeId, bool updateBodyMass ) { b2World* world = b2GetWorldLocked( shapeId.world0 ); @@ -274,7 +274,7 @@ void b2DestroyShape( b2ShapeId shapeId ) b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId ); b2DestroyShapeInternal( world, shape, body, wakeBodies ); - if ( body->automaticMass == true ) + if ( updateBodyMass == true ) { b2UpdateBodyMassData( world, body ); } @@ -911,7 +911,7 @@ b2CastOutput b2Shape_RayCast( b2ShapeId shapeId, const b2RayCastInput* input ) return output; } -void b2Shape_SetDensity( b2ShapeId shapeId, float density ) +void b2Shape_SetDensity( b2ShapeId shapeId, float density, bool updateBodyMass ) { B2_ASSERT( b2IsValid( density ) && density >= 0.0f ); @@ -929,6 +929,12 @@ void b2Shape_SetDensity( b2ShapeId shapeId, float density ) } shape->density = density; + + if (updateBodyMass == true) + { + b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId ); + b2UpdateBodyMassData( world, body ); + } } float b2Shape_GetDensity( b2ShapeId shapeId ) diff --git a/src/types.c b/src/types.c index 42556d4fe..5dd8e45a9 100644 --- a/src/types.c +++ b/src/types.c @@ -37,7 +37,6 @@ b2BodyDef b2DefaultBodyDef( void ) def.enableSleep = true; def.isAwake = true; def.isEnabled = true; - def.automaticMass = true; def.internalValue = B2_SECRET_COOKIE; return def; } @@ -62,6 +61,7 @@ b2ShapeDef b2DefaultShapeDef( void ) def.filter = b2DefaultFilter(); def.enableSensorEvents = true; def.enableContactEvents = true; + def.updateBodyMass = true; def.internalValue = B2_SECRET_COOKIE; return def; } From 8f22f3f8a92acae59d5bfb8f0a1989afba819392 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 4 Oct 2024 22:53:50 -0700 Subject: [PATCH 02/11] increased warning level to W4 on MSVC for #816 --- src/CMakeLists.txt | 3 +++ src/body.c | 1 + src/dynamic_tree.c | 29 ++++++++++++++--------------- src/hull.c | 2 +- src/joint.h | 9 +++++---- src/manifold.c | 2 +- src/motor_joint.c | 1 + src/weld_joint.c | 2 -- src/wheel_joint.c | 5 ----- src/world.c | 6 +++--- 10 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 695fff850..0bc507726 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -135,6 +135,9 @@ if (MSVC) # Atomics are still considered experimental in Visual Studio 17.8 target_compile_options(box2d PRIVATE /experimental:c11atomics) + # All warnings + target_compile_options(box2d PRIVATE /W4) + if (BOX2D_AVX2) message(STATUS "Box2D using AVX2") target_compile_definitions(box2d PRIVATE BOX2D_AVX2) diff --git a/src/body.c b/src/body.c index 273b04c34..7dc6f469b 100644 --- a/src/body.c +++ b/src/body.c @@ -309,6 +309,7 @@ b2BodyId b2CreateBody( b2WorldId worldId, const b2BodyDef* def ) bool b2IsBodyAwake( b2World* world, b2Body* body ) { + B2_MAYBE_UNUSED( world ); return body->setIndex == b2_awakeSet; } diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 322b5221c..92885d1fc 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -871,7 +871,7 @@ static int b2ComputeHeight( const b2DynamicTree* tree, int32_t nodeId ) int32_t height1 = b2ComputeHeight( tree, node->child1 ); int32_t height2 = b2ComputeHeight( tree, node->child2 ); - return 1 + b2MaxInt16( height1, height2 ); + return 1 + b2MaxInt( height1, height2 ); } int b2DynamicTree_ComputeHeight( const b2DynamicTree* tree ) @@ -930,8 +930,8 @@ static void b2ValidateMetrics( const b2DynamicTree* tree, int32_t index ) const b2TreeNode* node = tree->nodes + index; - int32_t child1 = node->child1; - int32_t child2 = node->child2; + int child1 = node->child1; + int child2 = node->child2; if ( b2IsLeaf( node ) ) { @@ -944,10 +944,9 @@ static void b2ValidateMetrics( const b2DynamicTree* tree, int32_t index ) B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); - int32_t height1 = tree->nodes[child1].height; - int32_t height2 = tree->nodes[child2].height; - int32_t height; - height = 1 + b2MaxInt16( height1, height2 ); + int height1 = tree->nodes[child1].height; + int height2 = tree->nodes[child2].height; + int height = 1 + b2MaxInt( height1, height2 ); B2_ASSERT( node->height == height ); // b2AABB aabb = b2AABB_Union(tree->nodes[child1].aabb, tree->nodes[child2].aabb); @@ -1000,8 +999,8 @@ void b2DynamicTree_Validate( const b2DynamicTree* tree ) int32_t b2DynamicTree_GetMaxBalance( const b2DynamicTree* tree ) { - int32_t maxBalance = 0; - for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) + int maxBalance = 0; + for ( int i = 0; i < tree->nodeCapacity; ++i ) { const b2TreeNode* node = tree->nodes + i; if ( node->height <= 1 ) @@ -1011,9 +1010,9 @@ int32_t b2DynamicTree_GetMaxBalance( const b2DynamicTree* tree ) B2_ASSERT( b2IsLeaf( node ) == false ); - int32_t child1 = node->child1; - int32_t child2 = node->child2; - int32_t balance = b2AbsFloat( tree->nodes[child2].height - tree->nodes[child1].height ); + int child1 = node->child1; + int child2 = node->child2; + int balance = b2AbsInt( tree->nodes[child2].height - tree->nodes[child1].height ); maxBalance = b2MaxInt( maxBalance, balance ); } @@ -1022,11 +1021,11 @@ int32_t b2DynamicTree_GetMaxBalance( const b2DynamicTree* tree ) void b2DynamicTree_RebuildBottomUp( b2DynamicTree* tree ) { - int32_t* nodes = b2Alloc( tree->nodeCount * sizeof( int32_t ) ); - int32_t count = 0; + int* nodes = b2Alloc( tree->nodeCount * sizeof( int ) ); + int count = 0; // Build array of leaves. Free the rest. - for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) + for ( int i = 0; i < tree->nodeCapacity; ++i ) { if ( tree->nodes[i].height < 0 ) { diff --git a/src/hull.c b/src/hull.c index 41fd76de1..eb0f4c5c7 100644 --- a/src/hull.c +++ b/src/hull.c @@ -94,7 +94,7 @@ b2Hull b2ComputeHull( const b2Vec2* points, int count ) return hull; } - count = b2MinFloat( count, b2_maxPolygonVertices ); + count = b2MinInt( count, b2_maxPolygonVertices ); b2AABB aabb = { { FLT_MAX, FLT_MAX }, { -FLT_MAX, -FLT_MAX } }; diff --git a/src/joint.h b/src/joint.h index 80ac04bfb..441e4394c 100644 --- a/src/joint.h +++ b/src/joint.h @@ -47,13 +47,14 @@ typedef struct b2Joint int islandPrev; int islandNext; - // This is monotonically advanced when a body is allocated in this slot - // Used to check for invalid b2JointId - int revision; - float drawSize; b2JointType type; + + // This is monotonically advanced when a body is allocated in this slot + // Used to check for invalid b2JointId + uint16_t revision; + bool isMarked; bool collideConnected; diff --git a/src/manifold.c b/src/manifold.c index b66785d28..38f7c94d2 100644 --- a/src/manifold.c +++ b/src/manifold.c @@ -328,7 +328,7 @@ b2Manifold b2CollideCapsules( const b2Capsule* capsuleA, b2Transform xfA, const return manifold; } - float distance = sqrt( distanceSquared ); + float distance = sqrtf( distanceSquared ); float length1, length2; b2Vec2 u1 = b2GetLengthAndNormalize( &length1, d1 ); diff --git a/src/motor_joint.c b/src/motor_joint.c index b41f685a8..721e98974 100644 --- a/src/motor_joint.c +++ b/src/motor_joint.c @@ -186,6 +186,7 @@ void b2WarmStartMotorJoint( b2JointSim* base, b2StepContext* context ) void b2SolveMotorJoint( b2JointSim* base, b2StepContext* context, bool useBias ) { + B2_MAYBE_UNUSED( useBias ); B2_ASSERT( base->type == b2_motorJoint ); float mA = base->invMassA; diff --git a/src/weld_joint.c b/src/weld_joint.c index e772fde22..965a5210f 100644 --- a/src/weld_joint.c +++ b/src/weld_joint.c @@ -150,8 +150,6 @@ void b2PrepareWeldJoint( b2JointSim* base, b2StepContext* context ) float ka = iA + iB; joint->axialMass = ka > 0.0f ? 1.0f / ka : 0.0f; - const float h = context->dt; - if ( joint->linearHertz == 0.0f ) { joint->linearSoftness = context->jointSoftness; diff --git a/src/wheel_joint.c b/src/wheel_joint.c index e4d96b401..b2a971dfc 100644 --- a/src/wheel_joint.c +++ b/src/wheel_joint.c @@ -315,9 +315,6 @@ void b2SolveWheelJoint( b2JointSim* base, b2StepContext* context, bool useBias ) b2WheelJoint* joint = &base->wheelJoint; - // This is a dummy body to represent a static body since static bodies don't have a solver body. - b2BodyState dummyBody = { 0 }; - b2BodyState* stateA = joint->indexA == B2_NULL_INDEX ? &dummyState : context->states + joint->indexA; b2BodyState* stateB = joint->indexB == B2_NULL_INDEX ? &dummyState : context->states + joint->indexB; @@ -378,8 +375,6 @@ void b2SolveWheelJoint( b2JointSim* base, b2StepContext* context, bool useBias ) if ( joint->enableLimit ) { - float translation = b2Dot( axisA, d ); - // Lower limit { float C = translation - joint->lowerTranslation; diff --git a/src/world.c b/src/world.c index 68be3286e..e9b7d718a 100644 --- a/src/world.c +++ b/src/world.c @@ -294,7 +294,7 @@ void b2DestroyWorld( b2WorldId worldId ) // Wipe world but preserve revision uint16_t revision = world->revision; *world = ( b2World ){ 0 }; - world->worldId = B2_NULL_INDEX; + world->worldId = 0; world->revision = revision + 1; } @@ -304,7 +304,7 @@ static void b2CollideTask( int startIndex, int endIndex, uint32_t threadIndex, v b2StepContext* stepContext = context; b2World* world = stepContext->world; - B2_ASSERT( threadIndex < world->workerCount ); + B2_ASSERT( (int)threadIndex < world->workerCount ); b2TaskContext* taskContext = world->taskContexts.data + threadIndex; b2ContactSim** contactSims = stepContext->contacts; b2Shape* shapes = world->shapes.data; @@ -2891,7 +2891,7 @@ void b2ValidateSolverSets( b2World* world ) int contactIdCount = b2GetIdCount( &world->contactIdPool ); B2_ASSERT( totalContactCount == contactIdCount ); - B2_ASSERT( totalContactCount == world->broadPhase.pairSet.count ); + B2_ASSERT( totalContactCount == (int)world->broadPhase.pairSet.count ); int jointIdCount = b2GetIdCount( &world->jointIdPool ); B2_ASSERT( totalJointCount == jointIdCount ); From 33474fb77fa66ebd5843e4be99e7db837584bf05 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 5 Oct 2024 21:42:45 -0700 Subject: [PATCH 03/11] testing 32-byte tree node --- include/box2d/box2d.h | 5 +- include/box2d/collision.h | 113 ++- include/box2d/types.h | 2 + samples/sample_benchmark.cpp | 106 +- samples/sample_collision.cpp | 5 +- src/broad_phase.c | 9 +- src/core.c | 2 +- src/core.h | 8 + src/dynamic_tree.c | 1786 +++++++++++++++++++++++++++++++++- src/solver.c | 3 +- src/world.c | 18 +- 11 files changed, 1979 insertions(+), 78 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index a23d3f1dd..af3eec88a 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -68,14 +68,15 @@ B2_API void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, /// Cast a ray into the world to collect shapes in the path of the ray. /// Your callback function controls whether you get the closest point, any point, or n-points. /// The ray-cast ignores shapes that contain the starting point. +/// @note The callback function may receive shapes in any order /// @param worldId The world to cast the ray against /// @param origin The start point of the ray /// @param translation The translation of the ray from the start point to the end point /// @param filter Contains bit flags to filter unwanted shapes from the results /// @param fcn A user implemented callback function /// @param context A user context that is passed along to the callback function -/// @note The callback function may receive shapes in any order -B2_API void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, +/// @return traversal performance counters +B2_API b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); /// Cast a ray into the world to collect the closest hit. This is a convenience function. diff --git a/include/box2d/collision.h b/include/box2d/collision.h index 0e55b9aa3..73e1ec250 100644 --- a/include/box2d/collision.h +++ b/include/box2d/collision.h @@ -29,7 +29,7 @@ typedef struct b2Hull b2Hull; /// don't use more vertices. #define b2_maxPolygonVertices 8 -/// Low level ray-cast input data +/// Low level ray cast input data typedef struct b2RayCastInput { /// Start point of the ray cast @@ -63,7 +63,7 @@ typedef struct b2ShapeCastInput float maxFraction; } b2ShapeCastInput; -/// Low level ray-cast or shape-cast output data +/// Low level ray cast or shape-cast output data typedef struct b2CastOutput { /// The surface normal at the hit point @@ -602,10 +602,12 @@ B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segment /// The default category bit for a tree proxy. Used for collision filtering. #define b2_defaultCategoryBits ( 1 ) -/// Convenience mask bits to use when you don't need collision filtering and just want -/// all results. +/// Convenience mask bits to use when you don't need collision filtering and just want all results. #define b2_defaultMaskBits ( UINT64_MAX ) +#define B2_TREE_32 0 + +#if B2_TREE_32 == 0 /// A node in the dynamic tree. This is private data placed here for performance reasons. typedef struct b2TreeNode { @@ -627,23 +629,62 @@ typedef struct b2TreeNode /// Child 1 index int32_t child1; // 4 - /// Child 2 index - int32_t child2; // 4 + union + { + /// Child 2 index + int32_t child2; - /// User data - // todo could be union with child index - int32_t userData; // 4 + /// User data + int32_t userData; + }; // 4 /// Leaf = 0, free node = -1 - int16_t height; // 2 + uint16_t height; // 2 + uint16_t flags; +} b2TreeNode; - /// Has the AABB been enlarged? - bool enlarged; // 1 +#else + +struct b2InternalNode +{ + int32_t child1; + int32_t child2; +}; + +struct b2LeafNode +{ + // limited to 32 bits, see b2TreeNode32::e_category64 + uint32_t categoryBits; + int32_t userData; +}; + +typedef struct b2TreeNode +{ + /// The node bounding box + b2AABB aabb; // 16 + + union + { + struct b2InternalNode internal; + struct b2LeafNode leaf; + }; // 8 + + union + { + /// The node parent index + int32_t parent; + + /// The node freelist next index + int32_t next; + }; // 4 + + uint16_t height; // 2 + uint16_t flags; // 2 - /// Padding for clarity - char pad[5]; } b2TreeNode; +#endif + /// The dynamic tree structure. This should be considered private data. /// It is placed here for performance reasons. typedef struct b2DynamicTree @@ -682,6 +723,13 @@ typedef struct b2DynamicTree int32_t rebuildCapacity; } b2DynamicTree; +/// These are performance results returned by BVH queries. +typedef struct b2TraversalResult +{ + int32_t nodeVisits; + int32_t leafVisits; +} b2TraversalResult; + /// Constructing the tree initializes the node pool. B2_API b2DynamicTree b2DynamicTree_Create( void ); @@ -708,41 +756,43 @@ typedef bool b2TreeQueryCallbackFcn( int32_t proxyId, int32_t userData, void* co B2_API void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, void* context ); -/// This function receives clipped raycast input for a proxy. The function +/// This function receives clipped ray cast input for a proxy. The function /// returns the new ray fraction. /// - return a value of 0 to terminate the ray cast /// - return a value less than input->maxFraction to clip the ray /// - return a value of input->maxFraction to continue the ray cast without clipping typedef float b2TreeRayCastCallbackFcn( const b2RayCastInput* input, int32_t proxyId, int32_t userData, void* context ); -/// Ray-cast against the proxies in the tree. This relies on the callback -/// to perform a exact ray-cast in the case were the proxy contains a shape. +/// Ray cast against the proxies in the tree. This relies on the callback +/// to perform a exact ray cast in the case were the proxy contains a shape. /// The callback also performs the any collision filtering. This has performance /// roughly equal to k * log(n), where k is the number of collisions and n is the /// number of proxies in the tree. /// Bit-wise filtering using mask bits can greatly improve performance in some scenarios. +/// However, this filtering may be approximate, so the user should still apply filtering to results. /// @param tree the dynamic tree to ray cast -/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1) -/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;` +/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1) +/// @param maskBits mask bit hint: `bool accept = (maskBits & node->categoryBits) != 0;` /// @param callback a callback class that is called for each proxy that is hit by the ray /// @param context user context that is passed to the callback -B2_API void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, +/// @return performance data +B2_API b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, b2TreeRayCastCallbackFcn* callback, void* context ); -/// This function receives clipped ray-cast input for a proxy. The function +/// This function receives clipped ray cast input for a proxy. The function /// returns the new ray fraction. -/// - return a value of 0 to terminate the ray-cast +/// - return a value of 0 to terminate the ray cast /// - return a value less than input->maxFraction to clip the ray /// - return a value of input->maxFraction to continue the ray cast without clipping typedef float b2TreeShapeCastCallbackFcn( const b2ShapeCastInput* input, int32_t proxyId, int32_t userData, void* context ); -/// Ray-cast against the proxies in the tree. This relies on the callback -/// to perform a exact ray-cast in the case were the proxy contains a shape. +/// Ray cast against the proxies in the tree. This relies on the callback +/// to perform a exact ray cast in the case were the proxy contains a shape. /// The callback also performs the any collision filtering. This has performance /// roughly equal to k * log(n), where k is the number of collisions and n is the /// number of proxies in the tree. /// @param tree the dynamic tree to ray cast -/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1). +/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1). /// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;` /// @param callback a callback class that is called for each proxy that is hit by the shape /// @param context user context that is passed to the callback @@ -780,6 +830,8 @@ B2_API void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin ); /// Get the number of bytes used by this tree B2_API int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ); +#if B2_TREE_32 == 0 + /// Get proxy user data /// @return the proxy user data or 0 if the id is invalid B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId ) @@ -787,6 +839,17 @@ B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t return tree->nodes[proxyId].userData; } +#else + +/// Get proxy user data +/// @return the proxy user data or 0 if the id is invalid +B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId ) +{ + return tree->nodes[proxyId].leaf.userData; +} + +#endif + /// Get the AABB of a proxy B2_INLINE b2AABB b2DynamicTree_GetAABB( const b2DynamicTree* tree, int32_t proxyId ) { diff --git a/include/box2d/types.h b/include/box2d/types.h index ead84a045..672e105b8 100644 --- a/include/box2d/types.h +++ b/include/box2d/types.h @@ -58,6 +58,8 @@ typedef struct b2RayResult b2Vec2 point; b2Vec2 normal; float fraction; + int nodeVisits; + int leafVisits; bool hit; } b2RayResult; diff --git a/samples/sample_benchmark.cpp b/samples/sample_benchmark.cpp index af21f438d..158153f66 100644 --- a/samples/sample_benchmark.cpp +++ b/samples/sample_benchmark.cpp @@ -339,7 +339,6 @@ class BenchmarkTumbler : public Sample circle = { { -5.0f, 5.0f }, 1.0f }; b2CreateCircleShape( bodyId, &shapeDef, &circle ); - // m_motorSpeed = 9.0f; m_motorSpeed = 25.0f; @@ -1554,6 +1553,7 @@ enum QueryType class BenchmarkCast : public Sample { public: + explicit BenchmarkCast( Settings& settings ) : Sample( settings ) { @@ -1561,6 +1561,7 @@ class BenchmarkCast : public Sample { g_camera.m_center = { 500.0f, 500.0f }; g_camera.m_zoom = 25.0f * 21.0f; + settings.drawShapes = g_sampleDebug; } m_queryType = e_rayCast; @@ -1569,7 +1570,24 @@ class BenchmarkCast : public Sample m_fill = 0.1f; m_rowCount = g_sampleDebug ? 100 : 1000; m_columnCount = g_sampleDebug ? 100 : 1000; - m_categoryBits = true; + m_minTime = 1e6f; + m_drawIndex = 0; + + g_seed = 1234; + int sampleCount = g_sampleDebug ? 100 : 10000; + m_origins.resize( sampleCount ); + m_translations.resize( sampleCount ); + float extent = m_rowCount * m_grid; + + // Pre-compute rays to avoid randomizer overhead + for ( int i = 0; i < sampleCount; ++i ) + { + b2Vec2 rayStart = RandomVec2( 0.0f, extent ); + b2Vec2 rayEnd = RandomVec2( 0.0f, extent ); + + m_origins[i] = rayStart; + m_translations[i] = rayEnd - rayStart; + } BuildScene(); } @@ -1611,13 +1629,13 @@ class BenchmarkCast : public Sample box = b2MakeBox( halfWidth, ratio * halfWidth ); } - int category = RandomInt( 1, 3 ); - shapeDef.filter.categoryBits = category; - if ( category == 1 ) + int category = RandomInt( 0, 2 ); + shapeDef.filter.categoryBits = 1 << category; + if ( category == 0 ) { shapeDef.customColor = b2_colorBox2DBlue; } - else if ( category == 2 ) + else if ( category == 1 ) { shapeDef.customColor = b2_colorBox2DYellow; } @@ -1638,7 +1656,7 @@ class BenchmarkCast : public Sample void UpdateUI() override { - float height = 320.0f; + float height = 220.0f; ImGui::SetNextWindowPos( ImVec2( 10.0f, g_camera.m_height - height - 50.0f ), ImGuiCond_Once ); ImGui::SetNextWindowSize( ImVec2( 200.0f, height ) ); @@ -1672,16 +1690,16 @@ class BenchmarkCast : public Sample changed = true; } - if ( ImGui::Checkbox( "categories", &m_categoryBits) ) - { - changed = true; - } - const char* queryTypes[] = { "Ray Cast", "Circle Cast", "Overlap" }; int queryType = int( m_queryType ); changed = changed || ImGui::Combo( "Query", &queryType, queryTypes, IM_ARRAYSIZE( queryTypes ) ); m_queryType = QueryType( queryType ); + if ( ImGui::Button( "Draw Next" ) ) + { + m_drawIndex = ( m_drawIndex + 1 ) % m_origins.size(); + } + ImGui::PopItemWidth(); ImGui::End(); @@ -1691,42 +1709,73 @@ class BenchmarkCast : public Sample } } - void Step( Settings& settings) override + void Step( Settings& settings ) override { Sample::Step( settings ); - int sampleCount = g_sampleDebug ? 10 : 1000; - - float extent = m_rowCount * m_grid; b2QueryFilter filter = b2DefaultQueryFilter(); filter.maskBits = 1; int hitCount = 0; + int nodeVisits = 0; + int leafVisits = 0; float ms = 0.0f; + int sampleCount = m_origins.size(); - if (m_queryType == e_rayCast) + if ( m_queryType == e_rayCast ) { b2Timer timer = b2CreateTimer(); - b2Vec2 rayStart = b2Vec2_zero; - b2Vec2 rayEnd = b2Vec2_zero; - for (int i = 0; i < sampleCount; ++i) + b2RayResult drawResult = {}; + + for ( int i = 0; i < sampleCount; ++i ) { - rayStart = RandomVec2( 0.0f, extent ); - rayEnd = RandomVec2( 0.0f, extent ); + b2Vec2 origin = m_origins[i]; + b2Vec2 translation = m_translations[i]; + + // todo for breakpoint + if (i == 2) + { + i += 0; + } + + b2RayResult result = b2World_CastRayClosest( m_worldId, origin, translation, filter ); + + if (i == m_drawIndex) + { + drawResult = result; + } - b2RayResult result = b2World_CastRayClosest( m_worldId, rayStart, b2Sub( rayEnd, rayStart ), filter ); + nodeVisits += result.nodeVisits; + leafVisits += result.leafVisits; hitCount += result.hit ? 1 : 0; } ms = b2GetMilliseconds( &timer ); - - g_draw.DrawSegment( rayStart, rayEnd, b2_colorBeige ); + + m_minTime = b2MinFloat( m_minTime, ms ); + + b2Vec2 p1 = m_origins[m_drawIndex]; + b2Vec2 p2 = p1 + m_translations[m_drawIndex]; + g_draw.DrawSegment( p1, p2, b2_colorWhite ); + g_draw.DrawPoint( p1, 5.0f, b2_colorGreen ); + g_draw.DrawPoint( p2, 5.0f, b2_colorRed ); + if (drawResult.hit) + { + g_draw.DrawPoint( drawResult.point, 5.0f, b2_colorWhite ); + } } - g_draw.DrawString( 5, m_textLine, "hit count = %03d", hitCount ); + g_draw.DrawString( 5, m_textLine, "hit count = %d, node visits = %d, leaf visits = %d", hitCount, nodeVisits, leafVisits ); + m_textLine += m_textIncrement; + + g_draw.DrawString( 5, m_textLine, "total ms = %.3f", ms ); + m_textLine += m_textIncrement; + + g_draw.DrawString( 5, m_textLine, "min total ms = %.3f", m_minTime ); m_textLine += m_textIncrement; - g_draw.DrawString( 5, m_textLine, "ms = %.3f",ms ); + float aveRayCost = 1000.0f * m_minTime / float( sampleCount ); + g_draw.DrawString( 5, m_textLine, "average ray us = %.2f", aveRayCost ); m_textLine += m_textIncrement; } @@ -1739,13 +1788,14 @@ class BenchmarkCast : public Sample std::vector m_origins; std::vector m_translations; + float m_minTime; int m_rowCount, m_columnCount; int m_updateType; + int m_drawIndex; float m_fill; float m_ratio; float m_grid; - bool m_categoryBits; }; static int sampleCast = RegisterSample( "Benchmark", "Cast", BenchmarkCast::Create ); diff --git a/samples/sample_collision.cpp b/samples/sample_collision.cpp index 9241152e4..74760010b 100644 --- a/samples/sample_collision.cpp +++ b/samples/sample_collision.cpp @@ -691,11 +691,14 @@ class DynamicTree : public Sample if ( m_rayDrag ) { b2RayCastInput input = { m_startPoint, b2Sub( m_endPoint, m_startPoint ), 1.0f }; - b2DynamicTree_RayCast( &m_tree, &input, b2_defaultMaskBits, RayCallback, this ); + b2TraversalResult result = b2DynamicTree_RayCast( &m_tree, &input, b2_defaultMaskBits, RayCallback, this ); g_draw.DrawSegment( m_startPoint, m_endPoint, b2_colorWhite ); g_draw.DrawPoint( m_startPoint, 5.0f, b2_colorGreen ); g_draw.DrawPoint( m_endPoint, 5.0f, b2_colorRed ); + + g_draw.DrawString( 5, m_textLine, "node visits = %d, leaf visits = %d", result.nodeVisits, result.leafVisits ); + m_textLine += m_textIncrement; } b2HexColor c = b2_colorBlue; diff --git a/src/broad_phase.c b/src/broad_phase.c index 7502c2f9f..1dcc1504f 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -479,12 +479,11 @@ void b2ValidateNoEnlarged( const b2BroadPhase* bp ) continue; } - if ( node->enlarged == true ) - { - capacity += 0; - } - +#if B2_TREE_32 == 0 B2_ASSERT( node->enlarged == false ); +#else + B2_ASSERT( (node->flags & b2_enlargedNode) == 0 ); +#endif } } #else diff --git a/src/core.c b/src/core.c index 030fb7282..8e8701d8f 100644 --- a/src/core.c +++ b/src/core.c @@ -78,7 +78,7 @@ void b2SetAllocator( b2AllocFcn* allocFcn, b2FreeFcn* freeFcn ) } // Use 32 byte alignment for everything. Works with 256bit SIMD. -#define B2_ALIGNMENT 32 +#define B2_ALIGNMENT 64 void* b2Alloc( int size ) { diff --git a/src/core.h b/src/core.h index a37ec42a0..1890c8b4c 100644 --- a/src/core.h +++ b/src/core.h @@ -178,6 +178,14 @@ extern float b2_lengthUnitsPerMeter; #define b2CheckDef( DEF ) B2_ASSERT( DEF->internalValue == B2_SECRET_COOKIE ) +enum b2TreeNodeFlags +{ + b2_allocatedNode = 0x0001, + b2_enlargedNode = 0x0002, + b2_leafNode = 0x0004, + b2_category64 = 0x0008, +}; + void* b2Alloc( int size ); void b2Free( void* mem, int size ); void* b2GrowAlloc( void* oldMem, int oldSize, int newSize ); diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 92885d1fc..4769dc30a 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -15,9 +15,11 @@ // TODO_ERIN // - try incrementally sorting internal nodes by height for better cache efficiency during depth first traversal. +#if B2_TREE_32 == 0 + static b2TreeNode b2_defaultTreeNode = { - { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, 0, { B2_NULL_INDEX }, B2_NULL_INDEX, B2_NULL_INDEX, -1, -2, false, - { 0, 0, 0, 0, 0 } }; + .aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, + 0, { B2_NULL_INDEX }, B2_NULL_INDEX, B2_NULL_INDEX, -1, -2, false, { 0, 0, 0, 0, 0 } }; static inline bool b2IsLeaf( const b2TreeNode* node ) { @@ -1157,9 +1159,11 @@ void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskB } } -void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, - b2TreeRayCastCallbackFcn* callback, void* context ) +b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, + b2TreeRayCastCallbackFcn* callback, void* context ) { + b2TraversalResult result = { 0 }; + b2Vec2 p1 = input->origin; b2Vec2 d = input->translation; @@ -1194,7 +1198,11 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp } const b2TreeNode* node = tree->nodes + nodeId; - if ( b2AABB_Overlaps( node->aabb, segmentAABB ) == false || ( node->categoryBits & maskBits ) == 0 ) + result.nodeVisits += 1; + + b2AABB nodeAABB = node->aabb; + + if ( ( node->categoryBits & maskBits ) == 0 || b2AABB_Overlaps( nodeAABB, segmentAABB ) == false ) { continue; } @@ -1202,8 +1210,8 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp // Separating axis for segment (Gino, p80). // |dot(v, p1 - c)| > dot(|v|, h) // radius extension is added to the node in this case - b2Vec2 c = b2AABB_Center( node->aabb ); - b2Vec2 h = b2AABB_Extents( node->aabb ); + b2Vec2 c = b2AABB_Center( nodeAABB ); + b2Vec2 h = b2AABB_Extents( nodeAABB ); float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); float term2 = b2Dot( abs_v, h ); if ( term2 < term1 ) @@ -1216,11 +1224,12 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp subInput.maxFraction = maxFraction; float value = callback( &subInput, nodeId, node->userData, context ); + result.leafVisits += 1; if ( value == 0.0f ) { // The client has terminated the ray cast. - return; + return result; } if ( 0.0f < value && value < maxFraction ) @@ -1244,6 +1253,8 @@ void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* inp } } } + + return result; } void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, @@ -1472,7 +1483,7 @@ static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count #else - #define B2_BIN_COUNT 8 +#define B2_BIN_COUNT 8 typedef struct b2TreeBin { @@ -1923,3 +1934,1760 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) return leafCount; } + +#else + +/* + typedef struct b2TreeNode +{ + enum + { + e_enlarged = 0x0001, + e_free = 0x0002, + e_leaf = 0x0004, + e_category64 = 0x0008, + }; + + /// The node bounding box + b2AABB aabb; // 16 + + union + { + struct b2InternalNode internal; + struct b2LeafNode leaf; + }; // 8 + + union + { + /// The node parent index + int32_t parent; + + /// The node freelist next index + int32_t next; + }; // 4 + + uint16_t height; // 2 + uint16_t flags; // 2 + +} b2TreeNode; + + */ + +static b2TreeNode b2_defaultTreeNode = { + .aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, + .internal = { B2_NULL_INDEX, B2_NULL_INDEX }, + .parent = B2_NULL_INDEX, + .height = 0, + .flags = b2_allocatedNode, +}; + +static inline bool b2IsLeaf( const b2TreeNode* node ) +{ + return (bool)(node->flags & b2_leafNode); +} + +static inline uint16_t b2MaxUInt16( uint16_t a, uint16_t b ) +{ + return a > b ? a : b; +} + +b2DynamicTree b2DynamicTree_Create( void ) +{ + _Static_assert( ( sizeof( b2TreeNode ) & 0xF ) == 0, "tree node size not a multiple of 16" ); + + b2DynamicTree tree; + tree.root = B2_NULL_INDEX; + tree.nodeCapacity = 16; + tree.nodeCount = 0; + tree.nodes = (b2TreeNode*)b2Alloc( tree.nodeCapacity * sizeof( b2TreeNode ) ); + memset( tree.nodes, 0, tree.nodeCapacity * sizeof( b2TreeNode ) ); + + // Build a linked list for the free list. + for ( int32_t i = 0; i < tree.nodeCapacity - 1; ++i ) + { + tree.nodes[i].next = i + 1; + } + + tree.nodes[tree.nodeCapacity - 1].next = B2_NULL_INDEX; + tree.freeList = 0; + tree.proxyCount = 0; + tree.leafIndices = NULL; + tree.leafBoxes = NULL; + tree.leafCenters = NULL; + tree.binIndices = NULL; + tree.rebuildCapacity = 0; + + return tree; +} + +void b2DynamicTree_Destroy( b2DynamicTree* tree ) +{ + b2Free( tree->nodes, tree->nodeCapacity * sizeof( b2TreeNode ) ); + b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) ); + b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) ); + b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + + memset( tree, 0, sizeof( b2DynamicTree ) ); +} + +// Allocate a node from the pool. Grow the pool if necessary. +static int32_t b2AllocateNode( b2DynamicTree* tree ) +{ + // Expand the node pool as needed. + if ( tree->freeList == B2_NULL_INDEX ) + { + B2_ASSERT( tree->nodeCount == tree->nodeCapacity ); + + // The free list is empty. Rebuild a bigger pool. + b2TreeNode* oldNodes = tree->nodes; + int32_t oldCapcity = tree->nodeCapacity; + tree->nodeCapacity += oldCapcity >> 1; + tree->nodes = (b2TreeNode*)b2Alloc( tree->nodeCapacity * sizeof( b2TreeNode ) ); + B2_ASSERT( oldNodes != NULL ); + memcpy( tree->nodes, oldNodes, tree->nodeCount * sizeof( b2TreeNode ) ); + b2Free( oldNodes, oldCapcity * sizeof( b2TreeNode ) ); + + // Build a linked list for the free list + for ( int32_t i = tree->nodeCount; i < tree->nodeCapacity - 1; ++i ) + { + tree->nodes[i].next = i + 1; + } + tree->nodes[tree->nodeCapacity - 1].next = B2_NULL_INDEX; + tree->freeList = tree->nodeCount; + } + + // Peel a node off the free list. + int32_t nodeIndex = tree->freeList; + b2TreeNode* node = tree->nodes + nodeIndex; + tree->freeList = node->next; + *node = b2_defaultTreeNode; + ++tree->nodeCount; + return nodeIndex; +} + +// Return a node to the pool. +static void b2FreeNode( b2DynamicTree* tree, int32_t nodeId ) +{ + B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); + B2_ASSERT( 0 < tree->nodeCount ); + tree->nodes[nodeId].next = tree->freeList; + tree->nodes[nodeId].flags = 0; + tree->freeList = nodeId; + --tree->nodeCount; +} + +// Greedy algorithm for sibling selection using the SAH +// We have three nodes A-(B,C) and want to add a leaf D, there are three choices. +// 1: make a new parent for A and D : E-(A-(B,C), D) +// 2: associate D with B +// a: B is a leaf : A-(E-(B,D), C) +// b: B is an internal node: A-(B{D},C) +// 3: associate D with C +// a: C is a leaf : A-(B, E-(C,D)) +// b: C is an internal node: A-(B, C{D}) +// All of these have a clear cost except when B or C is an internal node. Hence we need to be greedy. + +// The cost for cases 1, 2a, and 3a can be computed using the sibling cost formula. +// cost of sibling H = area(union(H, D)) + increased are of ancestors + +// Suppose B (or C) is an internal node, then the lowest cost would be one of two cases: +// case1: D becomes a sibling of B +// case2: D becomes a descendant of B along with a new internal node of area(D). +static int32_t b2FindBestSibling( const b2DynamicTree* tree, b2AABB boxD ) +{ + b2Vec2 centerD = b2AABB_Center( boxD ); + float areaD = b2Perimeter( boxD ); + + const b2TreeNode* nodes = tree->nodes; + int32_t rootIndex = tree->root; + + b2AABB rootBox = nodes[rootIndex].aabb; + + // Area of current node + float areaBase = b2Perimeter( rootBox ); + + // Area of inflated node + float directCost = b2Perimeter( b2AABB_Union( rootBox, boxD ) ); + float inheritedCost = 0.0f; + + int32_t bestSibling = rootIndex; + float bestCost = directCost; + + // Descend the tree from root, following a single greedy path. + int32_t index = rootIndex; + while ( nodes[index].height > 0 ) + { + int32_t child1 = nodes[index].internal.child1; + int32_t child2 = nodes[index].internal.child2; + + // Cost of creating a new parent for this node and the new leaf + float cost = directCost + inheritedCost; + + // Sometimes there are multiple identical costs within tolerance. + // This breaks the ties using the centroid distance. + if ( cost < bestCost ) + { + bestSibling = index; + bestCost = cost; + } + + // Inheritance cost seen by children + inheritedCost += directCost - areaBase; + + bool leaf1 = nodes[child1].height == 0; + bool leaf2 = nodes[child2].height == 0; + + // Cost of descending into child 1 + float lowerCost1 = FLT_MAX; + b2AABB box1 = nodes[child1].aabb; + float directCost1 = b2Perimeter( b2AABB_Union( box1, boxD ) ); + float area1 = 0.0f; + if ( leaf1 ) + { + // Child 1 is a leaf + // Cost of creating new node and increasing area of node P + float cost1 = directCost1 + inheritedCost; + + // Need this here due to while condition above + if ( cost1 < bestCost ) + { + bestSibling = child1; + bestCost = cost1; + } + } + else + { + // Child 1 is an internal node + area1 = b2Perimeter( box1 ); + + // Lower bound cost of inserting under child 1. + lowerCost1 = inheritedCost + directCost1 + b2MinFloat( areaD - area1, 0.0f ); + } + + // Cost of descending into child 2 + float lowerCost2 = FLT_MAX; + b2AABB box2 = nodes[child2].aabb; + float directCost2 = b2Perimeter( b2AABB_Union( box2, boxD ) ); + float area2 = 0.0f; + if ( leaf2 ) + { + // Child 2 is a leaf + // Cost of creating new node and increasing area of node P + float cost2 = directCost2 + inheritedCost; + + // Need this here due to while condition above + if ( cost2 < bestCost ) + { + bestSibling = child2; + bestCost = cost2; + } + } + else + { + // Child 2 is an internal node + area2 = b2Perimeter( box2 ); + + // Lower bound cost of inserting under child 2. This is not the cost + // of child 2, it is the best we can hope for under child 2. + lowerCost2 = inheritedCost + directCost2 + b2MinFloat( areaD - area2, 0.0f ); + } + + if ( leaf1 && leaf2 ) + { + break; + } + + // Can the cost possibly be decreased? + if ( bestCost <= lowerCost1 && bestCost <= lowerCost2 ) + { + break; + } + + if ( lowerCost1 == lowerCost2 && leaf1 == false ) + { + B2_ASSERT( lowerCost1 < FLT_MAX ); + B2_ASSERT( lowerCost2 < FLT_MAX ); + + // No clear choice based on lower bound surface area. This can happen when both + // children fully contain D. Fall back to node distance. + b2Vec2 d1 = b2Sub( b2AABB_Center( box1 ), centerD ); + b2Vec2 d2 = b2Sub( b2AABB_Center( box2 ), centerD ); + lowerCost1 = b2LengthSquared( d1 ); + lowerCost2 = b2LengthSquared( d2 ); + } + + // Descend + if ( lowerCost1 < lowerCost2 && leaf1 == false ) + { + index = child1; + areaBase = area1; + directCost = directCost1; + } + else + { + index = child2; + areaBase = area2; + directCost = directCost2; + } + + B2_ASSERT( nodes[index].height > 0 ); + } + + return bestSibling; +} + +enum b2RotateType +{ + b2_rotateNone, + b2_rotateBF, + b2_rotateBG, + b2_rotateCD, + b2_rotateCE +}; + +// Perform a left or right rotation if node A is imbalanced. +// Returns the new root index. +static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) +{ + B2_ASSERT( iA != B2_NULL_INDEX ); + + b2TreeNode* nodes = tree->nodes; + + b2TreeNode* A = nodes + iA; + if ( A->height < 2 ) + { + return; + } + + int32_t iB = A->internal.child1; + int32_t iC = A->internal.child2; + B2_ASSERT( 0 <= iB && iB < tree->nodeCapacity ); + B2_ASSERT( 0 <= iC && iC < tree->nodeCapacity ); + + b2TreeNode* B = nodes + iB; + b2TreeNode* C = nodes + iC; + + if ( B->height == 0 ) + { + // B is a leaf and C is internal + B2_ASSERT( C->height > 0 ); + + int32_t iF = C->internal.child1; + int32_t iG = C->internal.child2; + b2TreeNode* F = nodes + iF; + b2TreeNode* G = nodes + iG; + B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity ); + B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity ); + + // Base cost + float costBase = b2Perimeter( C->aabb ); + + // Cost of swapping B and F + b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb ); + float costBF = b2Perimeter( aabbBG ); + + // Cost of swapping B and G + b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb ); + float costBG = b2Perimeter( aabbBF ); + + if ( costBase < costBF && costBase < costBG ) + { + // Rotation does not improve cost + return; + } + + if ( costBF < costBG ) + { + // Swap B and F + A->internal.child1 = iF; + C->internal.child1 = iB; + + B->parent = iC; + F->parent = iA; + + C->aabb = aabbBG; + + C->height = 1 + b2MaxUInt16( B->height, G->height ); + A->height = 1 + b2MaxUInt16( C->height, F->height ); + C->flags |= (B->flags | G->flags) & b2_enlargedNode; + A->flags |= (C->flags | F->flags) & b2_enlargedNode; + } + else + { + // Swap B and G + A->internal.child1 = iG; + C->internal.child2 = iB; + + B->parent = iC; + G->parent = iA; + + C->aabb = aabbBF; + + C->height = 1 + b2MaxUInt16( B->height, F->height ); + A->height = 1 + b2MaxUInt16( C->height, G->height ); + C->flags |= (B->flags | F->flags) & b2_enlargedNode; + A->flags |= (C->flags | G->flags) & b2_enlargedNode; + } + } + else if ( C->height == 0 ) + { + // C is a leaf and B is internal + B2_ASSERT( B->height > 0 ); + + int iD = B->internal.child1; + int iE = B->internal.child2; + b2TreeNode* D = nodes + iD; + b2TreeNode* E = nodes + iE; + B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity ); + B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity ); + + // Base cost + float costBase = b2Perimeter( B->aabb ); + + // Cost of swapping C and D + b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb ); + float costCD = b2Perimeter( aabbCE ); + + // Cost of swapping C and E + b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb ); + float costCE = b2Perimeter( aabbCD ); + + if ( costBase < costCD && costBase < costCE ) + { + // Rotation does not improve cost + return; + } + + if ( costCD < costCE ) + { + // Swap C and D + A->internal.child2 = iD; + B->internal.child1 = iC; + + C->parent = iB; + D->parent = iA; + + B->aabb = aabbCE; + + B->height = 1 + b2MaxUInt16( C->height, E->height ); + A->height = 1 + b2MaxUInt16( B->height, D->height ); + B->flags |= (C->flags | E->flags) & b2_enlargedNode; + A->flags |= (B->flags | D->flags) & b2_enlargedNode; + } + else + { + // Swap C and E + A->internal.child2 = iE; + B->internal.child2 = iC; + + C->parent = iB; + E->parent = iA; + + B->aabb = aabbCD; + B->height = 1 + b2MaxUInt16( C->height, D->height ); + A->height = 1 + b2MaxUInt16( B->height, E->height ); + B->flags |= (C->flags | D->flags) & b2_enlargedNode; + A->flags |= (B->flags | E->flags) & b2_enlargedNode; + } + } + else + { + int iD = B->internal.child1; + int iE = B->internal.child2; + int iF = C->internal.child1; + int iG = C->internal.child2; + + b2TreeNode* D = nodes + iD; + b2TreeNode* E = nodes + iE; + b2TreeNode* F = nodes + iF; + b2TreeNode* G = nodes + iG; + + B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity ); + B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity ); + B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity ); + B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity ); + + // Base cost + float areaB = b2Perimeter( B->aabb ); + float areaC = b2Perimeter( C->aabb ); + float costBase = areaB + areaC; + enum b2RotateType bestRotation = b2_rotateNone; + float bestCost = costBase; + + // Cost of swapping B and F + b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb ); + float costBF = areaB + b2Perimeter( aabbBG ); + if ( costBF < bestCost ) + { + bestRotation = b2_rotateBF; + bestCost = costBF; + } + + // Cost of swapping B and G + b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb ); + float costBG = areaB + b2Perimeter( aabbBF ); + if ( costBG < bestCost ) + { + bestRotation = b2_rotateBG; + bestCost = costBG; + } + + // Cost of swapping C and D + b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb ); + float costCD = areaC + b2Perimeter( aabbCE ); + if ( costCD < bestCost ) + { + bestRotation = b2_rotateCD; + bestCost = costCD; + } + + // Cost of swapping C and E + b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb ); + float costCE = areaC + b2Perimeter( aabbCD ); + if ( costCE < bestCost ) + { + bestRotation = b2_rotateCE; + // bestCost = costCE; + } + + switch ( bestRotation ) + { + case b2_rotateNone: + break; + + case b2_rotateBF: + A->internal.child1 = iF; + C->internal.child1 = iB; + + B->parent = iC; + F->parent = iA; + + C->aabb = aabbBG; + C->height = 1 + b2MaxUInt16( B->height, G->height ); + A->height = 1 + b2MaxUInt16( C->height, F->height ); + C->flags |= (B->flags | G->flags) & b2_enlargedNode; + A->flags |= (C->flags | F->flags) & b2_enlargedNode; + break; + + case b2_rotateBG: + A->internal.child1 = iG; + C->internal.child2 = iB; + + B->parent = iC; + G->parent = iA; + + C->aabb = aabbBF; + C->height = 1 + b2MaxUInt16( B->height, F->height ); + A->height = 1 + b2MaxUInt16( C->height, G->height ); + C->flags |= (B->flags | F->flags) & b2_enlargedNode; + A->flags |= (C->flags | G->flags) & b2_enlargedNode; + break; + + case b2_rotateCD: + A->internal.child2 = iD; + B->internal.child1 = iC; + + C->parent = iB; + D->parent = iA; + + B->aabb = aabbCE; + B->height = 1 + b2MaxUInt16( C->height, E->height ); + A->height = 1 + b2MaxUInt16( B->height, D->height ); + B->flags |= (C->flags | E->flags) & b2_enlargedNode; + A->flags |= (B->flags | D->flags) & b2_enlargedNode; + break; + + case b2_rotateCE: + A->internal.child2 = iE; + B->internal.child2 = iC; + + C->parent = iB; + E->parent = iA; + + B->aabb = aabbCD; + B->height = 1 + b2MaxUInt16( C->height, D->height ); + A->height = 1 + b2MaxUInt16( B->height, E->height ); + B->flags |= (C->flags | D->flags) & b2_enlargedNode; + A->flags |= (B->flags | E->flags) & b2_enlargedNode; + break; + + default: + B2_ASSERT( false ); + break; + } + } +} + +static void b2InsertLeaf( b2DynamicTree* tree, int32_t leaf, bool shouldRotate ) +{ + if ( tree->root == B2_NULL_INDEX ) + { + tree->root = leaf; + tree->nodes[tree->root].parent = B2_NULL_INDEX; + return; + } + + // Stage 1: find the best sibling for this node + b2AABB leafAABB = tree->nodes[leaf].aabb; + int32_t sibling = b2FindBestSibling( tree, leafAABB ); + + // Stage 2: create a new parent for the leaf and sibling + int32_t oldParent = tree->nodes[sibling].parent; + int32_t newParent = b2AllocateNode( tree ); + + // warning: node pointer can change after allocation + b2TreeNode* nodes = tree->nodes; + nodes[newParent].parent = oldParent; + nodes[newParent].aabb = b2AABB_Union( leafAABB, nodes[sibling].aabb ); + nodes[newParent].height = nodes[sibling].height + 1; + + if ( oldParent != B2_NULL_INDEX ) + { + // The sibling was not the root. + if ( nodes[oldParent].internal.child1 == sibling ) + { + nodes[oldParent].internal.child1 = newParent; + } + else + { + nodes[oldParent].internal.child2 = newParent; + } + + nodes[newParent].internal.child1 = sibling; + nodes[newParent].internal.child2 = leaf; + nodes[sibling].parent = newParent; + nodes[leaf].parent = newParent; + } + else + { + // The sibling was the root. + nodes[newParent].internal.child1 = sibling; + nodes[newParent].internal.child2 = leaf; + nodes[sibling].parent = newParent; + nodes[leaf].parent = newParent; + tree->root = newParent; + } + + // Stage 3: walk back up the tree fixing heights and AABBs + int32_t index = nodes[leaf].parent; + while ( index != B2_NULL_INDEX ) + { + int32_t child1 = nodes[index].internal.child1; + int32_t child2 = nodes[index].internal.child2; + + B2_ASSERT( child1 != B2_NULL_INDEX ); + B2_ASSERT( child2 != B2_NULL_INDEX ); + + nodes[index].aabb = b2AABB_Union( nodes[child1].aabb, nodes[child2].aabb ); + nodes[index].height = 1 + b2MaxUInt16( nodes[child1].height, nodes[child2].height ); + nodes[index].flags |= (nodes[child1].flags | nodes[child2].flags) & b2_enlargedNode; + + if ( shouldRotate ) + { + b2RotateNodes( tree, index ); + } + + index = nodes[index].parent; + } +} + +static void b2RemoveLeaf( b2DynamicTree* tree, int32_t leaf ) +{ + if ( leaf == tree->root ) + { + tree->root = B2_NULL_INDEX; + return; + } + + b2TreeNode* nodes = tree->nodes; + + int32_t parent = nodes[leaf].parent; + int32_t grandParent = nodes[parent].parent; + int32_t sibling; + if ( nodes[parent].internal.child1 == leaf ) + { + sibling = nodes[parent].internal.child2; + } + else + { + sibling = nodes[parent].internal.child1; + } + + if ( grandParent != B2_NULL_INDEX ) + { + // Destroy parent and connect sibling to grandParent. + if ( nodes[grandParent].internal.child1 == parent ) + { + nodes[grandParent].internal.child1 = sibling; + } + else + { + nodes[grandParent].internal.child2 = sibling; + } + nodes[sibling].parent = grandParent; + b2FreeNode( tree, parent ); + + // Adjust ancestor bounds. + int32_t index = grandParent; + while ( index != B2_NULL_INDEX ) + { + b2TreeNode* node = nodes + index; + b2TreeNode* child1 = nodes + node->internal.child1; + b2TreeNode* child2 = nodes + node->internal.child2; + + // Fast union using SSE + //__m128 aabb1 = _mm_load_ps(&child1->aabb.lowerBound.x); + //__m128 aabb2 = _mm_load_ps(&child2->aabb.lowerBound.x); + //__m128 lower = _mm_min_ps(aabb1, aabb2); + //__m128 upper = _mm_max_ps(aabb1, aabb2); + //__m128 aabb = _mm_shuffle_ps(lower, upper, _MM_SHUFFLE(3, 2, 1, 0)); + //_mm_store_ps(&node->aabb.lowerBound.x, aabb); + + node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + node->height = 1 + b2MaxUInt16( child1->height, child2->height ); + + index = node->parent; + } + } + else + { + tree->root = sibling; + tree->nodes[sibling].parent = B2_NULL_INDEX; + b2FreeNode( tree, parent ); + } +} + +// Create a proxy in the tree as a leaf node. We return the index of the node instead of a pointer so that we can grow +// the node pool. +int32_t b2DynamicTree_CreateProxy( b2DynamicTree* tree, b2AABB aabb, uint64_t categoryBits, int32_t userData ) +{ + B2_ASSERT( -b2_huge < aabb.lowerBound.x && aabb.lowerBound.x < b2_huge ); + B2_ASSERT( -b2_huge < aabb.lowerBound.y && aabb.lowerBound.y < b2_huge ); + B2_ASSERT( -b2_huge < aabb.upperBound.x && aabb.upperBound.x < b2_huge ); + B2_ASSERT( -b2_huge < aabb.upperBound.y && aabb.upperBound.y < b2_huge ); + + int32_t proxyId = b2AllocateNode( tree ); + b2TreeNode* node = tree->nodes + proxyId; + + node->aabb = aabb; + node->leaf.userData = userData; + node->leaf.categoryBits = (uint32_t)(categoryBits & UINT32_MAX); + node->height = 0; + node->flags |= b2_leafNode; + + bool shouldRotate = true; + b2InsertLeaf( tree, proxyId, shouldRotate ); + + tree->proxyCount += 1; + + // todo temp testing + //b2DynamicTree_Validate( tree ); + + return proxyId; +} + +void b2DynamicTree_DestroyProxy( b2DynamicTree* tree, int32_t proxyId ) +{ + B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); + B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); + + b2RemoveLeaf( tree, proxyId ); + b2FreeNode( tree, proxyId ); + + B2_ASSERT( tree->proxyCount > 0 ); + tree->proxyCount -= 1; +} + +int32_t b2DynamicTree_GetProxyCount( const b2DynamicTree* tree ) +{ + return tree->proxyCount; +} + +void b2DynamicTree_MoveProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aabb ) +{ + B2_ASSERT( b2AABB_IsValid( aabb ) ); + B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < b2_huge ); + B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < b2_huge ); + B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); + B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); + + b2RemoveLeaf( tree, proxyId ); + + tree->nodes[proxyId].aabb = aabb; + + bool shouldRotate = false; + b2InsertLeaf( tree, proxyId, shouldRotate ); +} + +void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aabb ) +{ + b2TreeNode* nodes = tree->nodes; + + B2_ASSERT( b2AABB_IsValid( aabb ) ); + B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < b2_huge ); + B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < b2_huge ); + B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); + B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); + + // Caller must ensure this + B2_ASSERT( b2AABB_Contains( nodes[proxyId].aabb, aabb ) == false ); + + nodes[proxyId].aabb = aabb; + + int32_t parentIndex = nodes[proxyId].parent; + while ( parentIndex != B2_NULL_INDEX ) + { + bool changed = b2EnlargeAABB( &nodes[parentIndex].aabb, aabb ); + nodes[parentIndex].flags |= b2_enlargedNode; + parentIndex = nodes[parentIndex].parent; + + if ( changed == false ) + { + break; + } + } + + while ( parentIndex != B2_NULL_INDEX ) + { + if ( nodes[parentIndex].flags & b2_enlargedNode ) + { + // early out because this ancestor was previously ascended and marked as enlarged + break; + } + + nodes[parentIndex].flags |= b2_enlargedNode; + parentIndex = nodes[parentIndex].parent; + } +} + +int b2DynamicTree_GetHeight( const b2DynamicTree* tree ) +{ + if ( tree->root == B2_NULL_INDEX ) + { + return 0; + } + + return tree->nodes[tree->root].height; +} + +float b2DynamicTree_GetAreaRatio( const b2DynamicTree* tree ) +{ + if ( tree->root == B2_NULL_INDEX ) + { + return 0.0f; + } + + const b2TreeNode* root = tree->nodes + tree->root; + float rootArea = b2Perimeter( root->aabb ); + + float totalArea = 0.0f; + for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) + { + const b2TreeNode* node = tree->nodes + i; + if ( node->height < 0 || b2IsLeaf( node ) || i == tree->root ) + { + // Free node in pool + continue; + } + + totalArea += b2Perimeter( node->aabb ); + } + + return totalArea / rootArea; +} + +// Compute the height of a sub-tree. +static int b2ComputeHeight( const b2DynamicTree* tree, int32_t nodeId ) +{ + B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); + b2TreeNode* node = tree->nodes + nodeId; + + if ( b2IsLeaf( node ) ) + { + return 0; + } + + int32_t height1 = b2ComputeHeight( tree, node->internal.child1 ); + int32_t height2 = b2ComputeHeight( tree, node->internal.child2 ); + return 1 + b2MaxInt( height1, height2 ); +} + +int b2DynamicTree_ComputeHeight( const b2DynamicTree* tree ) +{ + int height = b2ComputeHeight( tree, tree->root ); + return height; +} + +#if B2_VALIDATE +static void b2ValidateStructure( const b2DynamicTree* tree, int32_t index ) +{ + if ( index == B2_NULL_INDEX ) + { + return; + } + + if ( index == tree->root ) + { + B2_ASSERT( tree->nodes[index].parent == B2_NULL_INDEX ); + } + + const b2TreeNode* node = tree->nodes + index; + + int32_t child1 = node->internal.child1; + int32_t child2 = node->internal.child2; + + if ( b2IsLeaf( node ) ) + { + B2_ASSERT( node->height == 0 ); + return; + } + + B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); + B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); + + B2_ASSERT( tree->nodes[child1].parent == index ); + B2_ASSERT( tree->nodes[child2].parent == index ); + + if ( (tree->nodes[child1].flags | tree->nodes[child2].flags) & b2_enlargedNode ) + { + B2_ASSERT( node->flags & b2_enlargedNode ); + } + + b2ValidateStructure( tree, child1 ); + b2ValidateStructure( tree, child2 ); +} + +static void b2ValidateMetrics( const b2DynamicTree* tree, int32_t index ) +{ + if ( index == B2_NULL_INDEX ) + { + return; + } + + const b2TreeNode* node = tree->nodes + index; + + B2_ASSERT( node->flags & b2_allocatedNode ); + + if ( b2IsLeaf( node ) ) + { + B2_ASSERT( node->height == 0 ); + return; + } + + int child1 = node->internal.child1; + int child2 = node->internal.child2; + + B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); + B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); + + int height1 = tree->nodes[child1].height; + int height2 = tree->nodes[child2].height; + int height = 1 + b2MaxInt( height1, height2 ); + B2_ASSERT( node->height == height ); + + // b2AABB aabb = b2AABB_Union(tree->nodes[child1].aabb, tree->nodes[child2].aabb); + + B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child1].aabb ) ); + B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child2].aabb ) ); + + // B2_ASSERT(aabb.lowerBound.x == node->aabb.lowerBound.x); + // B2_ASSERT(aabb.lowerBound.y == node->aabb.lowerBound.y); + // B2_ASSERT(aabb.upperBound.x == node->aabb.upperBound.x); + // B2_ASSERT(aabb.upperBound.y == node->aabb.upperBound.y); + + b2ValidateMetrics( tree, child1 ); + b2ValidateMetrics( tree, child2 ); +} +#endif + +void b2DynamicTree_Validate( const b2DynamicTree* tree ) +{ +#if B2_VALIDATE + if ( tree->root == B2_NULL_INDEX ) + { + return; + } + + b2ValidateStructure( tree, tree->root ); + b2ValidateMetrics( tree, tree->root ); + + int32_t freeCount = 0; + int32_t freeIndex = tree->freeList; + while ( freeIndex != B2_NULL_INDEX ) + { + B2_ASSERT( 0 <= freeIndex && freeIndex < tree->nodeCapacity ); + freeIndex = tree->nodes[freeIndex].next; + ++freeCount; + } + + int32_t height = b2DynamicTree_GetHeight( tree ); + int32_t computedHeight = b2DynamicTree_ComputeHeight( tree ); + B2_ASSERT( height == computedHeight ); + + B2_ASSERT( tree->nodeCount + freeCount == tree->nodeCapacity ); +#else + B2_MAYBE_UNUSED( tree ); +#endif +} + +int32_t b2DynamicTree_GetMaxBalance( const b2DynamicTree* tree ) +{ + int maxBalance = 0; + for ( int i = 0; i < tree->nodeCapacity; ++i ) + { + const b2TreeNode* node = tree->nodes + i; + if ( node->height <= 1 ) + { + continue; + } + + B2_ASSERT( b2IsLeaf( node ) == false ); + + int child1 = node->internal.child1; + int child2 = node->internal.child2; + int balance = b2AbsInt( tree->nodes[child2].height - tree->nodes[child1].height ); + maxBalance = b2MaxInt( maxBalance, balance ); + } + + return maxBalance; +} + +void b2DynamicTree_RebuildBottomUp( b2DynamicTree* tree ) +{ + int* nodes = b2Alloc( tree->nodeCount * sizeof( int ) ); + int count = 0; + + // Build array of leaves. Free the rest. + for ( int i = 0; i < tree->nodeCapacity; ++i ) + { + if ( (tree->nodes[i].flags & b2_allocatedNode) == 0 ) + { + // free node in pool + continue; + } + + if ( b2IsLeaf( tree->nodes + i ) ) + { + tree->nodes[i].parent = B2_NULL_INDEX; + nodes[count] = i; + ++count; + } + else + { + b2FreeNode( tree, i ); + } + } + + while ( count > 1 ) + { + float minCost = FLT_MAX; + int32_t iMin = -1, jMin = -1; + for ( int32_t i = 0; i < count; ++i ) + { + b2AABB aabbi = tree->nodes[nodes[i]].aabb; + + for ( int32_t j = i + 1; j < count; ++j ) + { + b2AABB aabbj = tree->nodes[nodes[j]].aabb; + b2AABB b = b2AABB_Union( aabbi, aabbj ); + float cost = b2Perimeter( b ); + if ( cost < minCost ) + { + iMin = i; + jMin = j; + minCost = cost; + } + } + } + + int32_t index1 = nodes[iMin]; + int32_t index2 = nodes[jMin]; + b2TreeNode* child1 = tree->nodes + index1; + b2TreeNode* child2 = tree->nodes + index2; + + int32_t parentIndex = b2AllocateNode( tree ); + b2TreeNode* parent = tree->nodes + parentIndex; + parent->internal.child1 = index1; + parent->internal.child2 = index2; + parent->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + parent->height = 1 + b2MaxUInt16( child1->height, child2->height ); + parent->parent = B2_NULL_INDEX; + + child1->parent = parentIndex; + child2->parent = parentIndex; + + nodes[jMin] = nodes[count - 1]; + nodes[iMin] = parentIndex; + --count; + } + + tree->root = nodes[0]; + b2Free( nodes, tree->nodeCount * sizeof( b2TreeNode ) ); + + b2DynamicTree_Validate( tree ); +} + +void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin ) +{ + // shift all AABBs + for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) + { + b2TreeNode* n = tree->nodes + i; + n->aabb.lowerBound.x -= newOrigin.x; + n->aabb.lowerBound.y -= newOrigin.y; + n->aabb.upperBound.x -= newOrigin.x; + n->aabb.upperBound.y -= newOrigin.y; + } +} + +int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ) +{ + size_t size = sizeof( b2DynamicTree ) + sizeof( b2TreeNode ) * tree->nodeCapacity + + tree->rebuildCapacity * ( sizeof( int32_t ) + sizeof( b2AABB ) + sizeof( b2Vec2 ) + sizeof( int32_t ) ); + + return (int)size; +} + +void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, + void* context ) +{ + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + stack[stackCount++] = tree->root; + + uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); + + while ( stackCount > 0 ) + { + int32_t nodeId = stack[--stackCount]; + if ( nodeId == B2_NULL_INDEX ) + { + continue; + } + + const b2TreeNode* node = tree->nodes + nodeId; + + if ( b2AABB_Overlaps( node->aabb, aabb ) ) + { + if ( b2IsLeaf( node ) && ( node->leaf.categoryBits & maskBits32 ) != 0 ) + { + // callback to user code with proxy id + bool proceed = callback( nodeId, node->leaf.userData, context ); + if ( proceed == false ) + { + return; + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + stack[stackCount++] = node->internal.child1; + stack[stackCount++] = node->internal.child2; + } + } + } + } +} + +b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, + b2TreeRayCastCallbackFcn* callback, void* context ) +{ + b2TraversalResult result = { 0 }; + + b2Vec2 p1 = input->origin; + b2Vec2 d = input->translation; + + b2Vec2 r = b2Normalize( d ); + + // v is perpendicular to the segment. + b2Vec2 v = b2CrossSV( 1.0f, r ); + b2Vec2 abs_v = b2Abs( v ); + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + + float maxFraction = input->maxFraction; + + b2Vec2 p2 = b2MulAdd( p1, maxFraction, d ); + + // Build a bounding box for the segment. + b2AABB segmentAABB = { b2Min( p1, p2 ), b2Max( p1, p2 ) }; + + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + stack[stackCount++] = tree->root; + + b2RayCastInput subInput = *input; + uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); + + while ( stackCount > 0 ) + { + int32_t nodeId = stack[--stackCount]; + if ( nodeId == B2_NULL_INDEX ) + { + continue; + } + + const b2TreeNode* node = tree->nodes + nodeId; + result.nodeVisits += 1; + + b2AABB nodeAABB = node->aabb; + + if ( b2AABB_Overlaps( nodeAABB, segmentAABB ) == false ) + { + continue; + } + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + // radius extension is added to the node in this case + b2Vec2 c = b2AABB_Center( nodeAABB ); + b2Vec2 h = b2AABB_Extents( nodeAABB ); + float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); + float term2 = b2Dot( abs_v, h ); + if ( term2 < term1 ) + { + continue; + } + + if ( b2IsLeaf( node ) ) + { + if ( ( node->leaf.categoryBits & maskBits32 ) == 0 ) + { + continue; + } + + subInput.maxFraction = maxFraction; + + float value = callback( &subInput, nodeId, node->leaf.userData, context ); + result.leafVisits += 1; + + if ( value == 0.0f ) + { + // The client has terminated the ray cast. + return result; + } + + if ( 0.0f < value && value < maxFraction ) + { + // Update segment bounding box. + maxFraction = value; + p2 = b2MulAdd( p1, maxFraction, d ); + segmentAABB.lowerBound = b2Min( p1, p2 ); + segmentAABB.upperBound = b2Max( p1, p2 ); + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + // TODO_ERIN just put one node on the stack, continue on a child node + // TODO_ERIN test ordering children by nearest to ray origin + stack[stackCount++] = node->internal.child1; + stack[stackCount++] = node->internal.child2; + } + } + } + + return result; +} + +void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, + b2TreeShapeCastCallbackFcn* callback, void* context ) +{ + if ( input->count == 0 ) + { + return; + } + + b2AABB originAABB = { input->points[0], input->points[0] }; + for ( int i = 1; i < input->count; ++i ) + { + originAABB.lowerBound = b2Min( originAABB.lowerBound, input->points[i] ); + originAABB.upperBound = b2Max( originAABB.upperBound, input->points[i] ); + } + + b2Vec2 radius = { input->radius, input->radius }; + + originAABB.lowerBound = b2Sub( originAABB.lowerBound, radius ); + originAABB.upperBound = b2Add( originAABB.upperBound, radius ); + + b2Vec2 p1 = b2AABB_Center( originAABB ); + b2Vec2 extension = b2AABB_Extents( originAABB ); + + // v is perpendicular to the segment. + b2Vec2 r = input->translation; + b2Vec2 v = b2CrossSV( 1.0f, r ); + b2Vec2 abs_v = b2Abs( v ); + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + + float maxFraction = input->maxFraction; + + // Build total box for the shape cast + b2Vec2 t = b2MulSV( maxFraction, input->translation ); + b2AABB totalAABB = { + b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ), + b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ), + }; + + b2ShapeCastInput subInput = *input; + uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); + + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + stack[stackCount++] = tree->root; + + while ( stackCount > 0 ) + { + int32_t nodeId = stack[--stackCount]; + if ( nodeId == B2_NULL_INDEX ) + { + continue; + } + + const b2TreeNode* node = tree->nodes + nodeId; + if ( b2AABB_Overlaps( node->aabb, totalAABB ) == false ) + { + continue; + } + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + // radius extension is added to the node in this case + b2Vec2 c = b2AABB_Center( node->aabb ); + b2Vec2 h = b2Add( b2AABB_Extents( node->aabb ), extension ); + float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); + float term2 = b2Dot( abs_v, h ); + if ( term2 < term1 ) + { + continue; + } + + if ( b2IsLeaf( node ) && ( node->leaf.categoryBits & maskBits32 ) != 0 ) + { + subInput.maxFraction = maxFraction; + + float value = callback( &subInput, nodeId, node->leaf.userData, context ); + + if ( value == 0.0f ) + { + // The client has terminated the ray cast. + return; + } + + if ( 0.0f < value && value < maxFraction ) + { + // Update segment bounding box. + maxFraction = value; + t = b2MulSV( maxFraction, input->translation ); + totalAABB.lowerBound = b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ); + totalAABB.upperBound = b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ); + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + // TODO_ERIN just put one node on the stack, continue on a child node + // TODO_ERIN test ordering children by nearest to ray origin + stack[stackCount++] = node->internal.child1; + stack[stackCount++] = node->internal.child2; + } + } + } +} + +// Median split heuristic +static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count ) +{ + // Handle trivial case + if ( count <= 2 ) + { + return count / 2; + } + + // todo SIMD? + b2Vec2 lowerBound = centers[0]; + b2Vec2 upperBound = centers[0]; + + for ( int32_t i = 1; i < count; ++i ) + { + lowerBound = b2Min( lowerBound, centers[i] ); + upperBound = b2Max( upperBound, centers[i] ); + } + + b2Vec2 d = b2Sub( upperBound, lowerBound ); + b2Vec2 c = { 0.5f * ( lowerBound.x + upperBound.x ), 0.5f * ( lowerBound.y + upperBound.y ) }; + + // Partition longest axis using the Hoare partition scheme + // https://en.wikipedia.org/wiki/Quicksort + // https://nicholasvadivelu.com/2021/01/11/array-partition/ + int32_t i1 = 0, i2 = count; + if ( d.x > d.y ) + { + float pivot = c.x; + + while ( i1 < i2 ) + { + while ( i1 < i2 && centers[i1].x < pivot ) + { + i1 += 1; + }; + + while ( i1 < i2 && centers[i2 - 1].x >= pivot ) + { + i2 -= 1; + }; + + if ( i1 < i2 ) + { + // Swap indices + { + int32_t temp = indices[i1]; + indices[i1] = indices[i2 - 1]; + indices[i2 - 1] = temp; + } + + // Swap centers + { + b2Vec2 temp = centers[i1]; + centers[i1] = centers[i2 - 1]; + centers[i2 - 1] = temp; + } + + i1 += 1; + i2 -= 1; + } + } + } + else + { + float pivot = c.y; + + while ( i1 < i2 ) + { + while ( i1 < i2 && centers[i1].y < pivot ) + { + i1 += 1; + }; + + while ( i1 < i2 && centers[i2 - 1].y >= pivot ) + { + i2 -= 1; + }; + + if ( i1 < i2 ) + { + // Swap indices + { + int32_t temp = indices[i1]; + indices[i1] = indices[i2 - 1]; + indices[i2 - 1] = temp; + } + + // Swap centers + { + b2Vec2 temp = centers[i1]; + centers[i1] = centers[i2 - 1]; + centers[i2 - 1] = temp; + } + + i1 += 1; + i2 -= 1; + } + } + } + B2_ASSERT( i1 == i2 ); + + if ( i1 > 0 && i1 < count ) + { + return i1; + } + else + { + return count / 2; + } +} + +// Temporary data used to track the rebuild of a tree node +struct b2RebuildItem +{ + int32_t nodeIndex; + int32_t childCount; + + // Leaf indices + int32_t startIndex; + int32_t splitIndex; + int32_t endIndex; +}; + +// Returns root node index +static int32_t b2BuildTree( b2DynamicTree* tree, int32_t leafCount ) +{ + b2TreeNode* nodes = tree->nodes; + int32_t* leafIndices = tree->leafIndices; + + if ( leafCount == 1 ) + { + nodes[leafIndices[0]].parent = B2_NULL_INDEX; + return leafIndices[0]; + } + + b2Vec2* leafCenters = tree->leafCenters; + + // todo large stack item + struct b2RebuildItem stack[b2_treeStackSize]; + int32_t top = 0; + + stack[0].nodeIndex = b2AllocateNode( tree ); + stack[0].childCount = -1; + stack[0].startIndex = 0; + stack[0].endIndex = leafCount; + stack[0].splitIndex = b2PartitionMid( leafIndices, leafCenters, leafCount ); + + while ( true ) + { + struct b2RebuildItem* item = stack + top; + + item->childCount += 1; + + if ( item->childCount == 2 ) + { + // This internal node has both children established + + if ( top == 0 ) + { + // all done + break; + } + + struct b2RebuildItem* parentItem = stack + ( top - 1 ); + b2TreeNode* parentNode = nodes + parentItem->nodeIndex; + + if ( parentItem->childCount == 0 ) + { + B2_ASSERT( parentNode->internal.child1 == B2_NULL_INDEX ); + parentNode->internal.child1 = item->nodeIndex; + } + else + { + B2_ASSERT( parentItem->childCount == 1 ); + B2_ASSERT( parentNode->internal.child2 == B2_NULL_INDEX ); + parentNode->internal.child2 = item->nodeIndex; + } + + b2TreeNode* node = nodes + item->nodeIndex; + + B2_ASSERT( node->parent == B2_NULL_INDEX ); + node->parent = parentItem->nodeIndex; + + B2_ASSERT( node->internal.child1 != B2_NULL_INDEX ); + B2_ASSERT( node->internal.child2 != B2_NULL_INDEX ); + b2TreeNode* child1 = nodes + node->internal.child1; + b2TreeNode* child2 = nodes + node->internal.child2; + + node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + node->height = 1 + b2MaxUInt16( child1->height, child2->height ); + + // Pop stack + top -= 1; + } + else + { + int32_t startIndex, endIndex; + if ( item->childCount == 0 ) + { + startIndex = item->startIndex; + endIndex = item->splitIndex; + } + else + { + B2_ASSERT( item->childCount == 1 ); + startIndex = item->splitIndex; + endIndex = item->endIndex; + } + + int32_t count = endIndex - startIndex; + + if ( count == 1 ) + { + int32_t childIndex = leafIndices[startIndex]; + b2TreeNode* node = nodes + item->nodeIndex; + + if ( item->childCount == 0 ) + { + B2_ASSERT( node->internal.child1 == B2_NULL_INDEX ); + node->internal.child1 = childIndex; + } + else + { + B2_ASSERT( item->childCount == 1 ); + B2_ASSERT( node->internal.child2 == B2_NULL_INDEX ); + node->internal.child2 = childIndex; + } + + b2TreeNode* childNode = nodes + childIndex; + B2_ASSERT( childNode->parent == B2_NULL_INDEX ); + childNode->parent = item->nodeIndex; + } + else + { + B2_ASSERT( count > 0 ); + B2_ASSERT( top < b2_treeStackSize ); + + top += 1; + struct b2RebuildItem* newItem = stack + top; + newItem->nodeIndex = b2AllocateNode( tree ); + newItem->childCount = -1; + newItem->startIndex = startIndex; + newItem->endIndex = endIndex; +#if B2_TREE_HEURISTIC == 0 + newItem->splitIndex = b2PartitionMid( leafIndices + startIndex, leafCenters + startIndex, count ); +#else + newItem->splitIndex = + b2PartitionSAH( leafIndices + startIndex, binIndices + startIndex, leafBoxes + startIndex, count ); +#endif + newItem->splitIndex += startIndex; + } + } + } + + b2TreeNode* rootNode = nodes + stack[0].nodeIndex; + B2_ASSERT( rootNode->parent == B2_NULL_INDEX ); + B2_ASSERT( rootNode->internal.child1 != B2_NULL_INDEX ); + B2_ASSERT( rootNode->internal.child2 != B2_NULL_INDEX ); + + b2TreeNode* child1 = nodes + rootNode->internal.child1; + b2TreeNode* child2 = nodes + rootNode->internal.child2; + + rootNode->aabb = b2AABB_Union( child1->aabb, child2->aabb ); + rootNode->height = 1 + b2MaxUInt16( child1->height, child2->height ); + + return stack[0].nodeIndex; +} + +// Not safe to access tree during this operation because it may grow +int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) +{ + int32_t proxyCount = tree->proxyCount; + if ( proxyCount == 0 ) + { + return 0; + } + + // Ensure capacity for rebuild space + if ( proxyCount > tree->rebuildCapacity ) + { + int32_t newCapacity = proxyCount + proxyCount / 2; + + b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + tree->leafIndices = b2Alloc( newCapacity * sizeof( int32_t ) ); + +#if B2_TREE_HEURISTIC == 0 + b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) ); + tree->leafCenters = b2Alloc( newCapacity * sizeof( b2Vec2 ) ); +#else + b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) ); + tree->leafBoxes = b2Alloc( newCapacity * sizeof( b2AABB ) ); + b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) ); + tree->binIndices = b2Alloc( newCapacity * sizeof( int32_t ) ); +#endif + tree->rebuildCapacity = newCapacity; + } + + int32_t leafCount = 0; + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + + int32_t nodeIndex = tree->root; + b2TreeNode* nodes = tree->nodes; + b2TreeNode* node = nodes + nodeIndex; + + // These are the nodes that get sorted to rebuild the tree. + // I'm using indices because the node pool may grow during the build. + int32_t* leafIndices = tree->leafIndices; + +#if B2_TREE_HEURISTIC == 0 + b2Vec2* leafCenters = tree->leafCenters; +#else + b2AABB* leafBoxes = tree->leafBoxes; +#endif + + // Gather all proxy nodes that have grown and all internal nodes that haven't grown. Both are + // considered leaves in the tree rebuild. + // Free all internal nodes that have grown. + // todo use a node growth metric instead of simply enlarged to reduce rebuild size and frequency + // this should be weighed against b2_aabbMargin + while ( true ) + { + if ( node->height == 0 || ( ( node->flags & b2_enlargedNode) == 0 && fullBuild == false ) ) + { + leafIndices[leafCount] = nodeIndex; +#if B2_TREE_HEURISTIC == 0 + leafCenters[leafCount] = b2AABB_Center( node->aabb ); +#else + leafBoxes[leafCount] = node->aabb; +#endif + leafCount += 1; + + // Detach + node->parent = B2_NULL_INDEX; + } + else + { + int32_t doomedNodeIndex = nodeIndex; + + // Handle children + nodeIndex = node->internal.child1; + + B2_ASSERT( stackCount < b2_treeStackSize ); + if ( stackCount < b2_treeStackSize ) + { + stack[stackCount++] = node->internal.child2; + } + + node = nodes + nodeIndex; + + // Remove doomed node + b2FreeNode( tree, doomedNodeIndex ); + + continue; + } + + if ( stackCount == 0 ) + { + break; + } + + nodeIndex = stack[--stackCount]; + node = nodes + nodeIndex; + } + +#if B2_VALIDATE == 1 + int32_t capacity = tree->nodeCapacity; + for ( int32_t i = 0; i < capacity; ++i ) + { + if ( nodes[i].flags & b2_allocatedNode ) + { + B2_ASSERT( (nodes[i].flags & b2_enlargedNode) == 0 ); + } + } +#endif + + B2_ASSERT( leafCount <= proxyCount ); + + tree->root = b2BuildTree( tree, leafCount ); + + b2DynamicTree_Validate( tree ); + + return leafCount; +} + +#endif diff --git a/src/solver.c b/src/solver.c index bd1a81eef..0603f2a31 100644 --- a/src/solver.c +++ b/src/solver.c @@ -1515,8 +1515,7 @@ void b2Solve( b2World* world, b2StepContext* stepContext ) } } - ptrdiff_t blockDiff = baseGraphBlock - graphBlocks; - B2_ASSERT( blockDiff == graphBlockCount ); + B2_ASSERT( (ptrdiff_t)(baseGraphBlock - graphBlocks) == graphBlockCount ); b2SolverStage* stage = stages; diff --git a/src/world.c b/src/world.c index e9b7d718a..1a89e193f 100644 --- a/src/world.c +++ b/src/world.c @@ -2065,14 +2065,16 @@ static float RayCastCallback( const b2RayCastInput* input, int proxyId, int shap return input->maxFraction; } -void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, +b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ) { + b2TraversalResult traversalResult = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return traversalResult; } B2_ASSERT( b2Vec2_IsValid( origin ) ); @@ -2084,15 +2086,19 @@ void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2Qu for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + b2TraversalResult treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + traversalResult.nodeVisits += treeResult.nodeVisits; + traversalResult.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { - return; + return traversalResult; } input.maxFraction = worldContext.fraction; } + + return traversalResult; } // This callback finds the closest hit. This is the most common callback used in games. @@ -2126,7 +2132,9 @@ b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 tra for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + b2TraversalResult treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + result.nodeVisits += treeResult.nodeVisits; + result.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { From 2d0dd20660788e8bbf14367e3c786aa3c8f9e039 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 5 Oct 2024 22:22:25 -0700 Subject: [PATCH 04/11] 40 byte tree node --- include/box2d/collision.h | 72 +- src/broad_phase.c | 4 - src/core.h | 1 - src/dynamic_tree.c | 1894 ++----------------------------------- 4 files changed, 72 insertions(+), 1899 deletions(-) diff --git a/include/box2d/collision.h b/include/box2d/collision.h index 73e1ec250..4b8ef8880 100644 --- a/include/box2d/collision.h +++ b/include/box2d/collision.h @@ -605,9 +605,6 @@ B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segment /// Convenience mask bits to use when you don't need collision filtering and just want all results. #define b2_defaultMaskBits ( UINT64_MAX ) -#define B2_TREE_32 0 - -#if B2_TREE_32 == 0 /// A node in the dynamic tree. This is private data placed here for performance reasons. typedef struct b2TreeNode { @@ -619,72 +616,29 @@ typedef struct b2TreeNode union { - /// The node parent index + /// The node parent index (allocated node) int32_t parent; - /// The node freelist next index + /// The node freelist next index (free node) int32_t next; }; // 4 - /// Child 1 index + /// Child 1 index (internal node) int32_t child1; // 4 union { - /// Child 2 index + /// Child 2 index (internal node) int32_t child2; - /// User data + /// User data (leaf node) int32_t userData; }; // 4 - /// Leaf = 0, free node = -1 - uint16_t height; // 2 - uint16_t flags; -} b2TreeNode; - -#else - -struct b2InternalNode -{ - int32_t child1; - int32_t child2; -}; - -struct b2LeafNode -{ - // limited to 32 bits, see b2TreeNode32::e_category64 - uint32_t categoryBits; - int32_t userData; -}; - -typedef struct b2TreeNode -{ - /// The node bounding box - b2AABB aabb; // 16 - - union - { - struct b2InternalNode internal; - struct b2LeafNode leaf; - }; // 8 - - union - { - /// The node parent index - int32_t parent; - - /// The node freelist next index - int32_t next; - }; // 4 - uint16_t height; // 2 - uint16_t flags; // 2 - + uint16_t flags; // 2 } b2TreeNode; -#endif - /// The dynamic tree structure. This should be considered private data. /// It is placed here for performance reasons. typedef struct b2DynamicTree @@ -830,26 +784,12 @@ B2_API void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin ); /// Get the number of bytes used by this tree B2_API int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ); -#if B2_TREE_32 == 0 - /// Get proxy user data -/// @return the proxy user data or 0 if the id is invalid B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId ) { return tree->nodes[proxyId].userData; } -#else - -/// Get proxy user data -/// @return the proxy user data or 0 if the id is invalid -B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId ) -{ - return tree->nodes[proxyId].leaf.userData; -} - -#endif - /// Get the AABB of a proxy B2_INLINE b2AABB b2DynamicTree_GetAABB( const b2DynamicTree* tree, int32_t proxyId ) { diff --git a/src/broad_phase.c b/src/broad_phase.c index 1dcc1504f..23a3a4057 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -479,11 +479,7 @@ void b2ValidateNoEnlarged( const b2BroadPhase* bp ) continue; } -#if B2_TREE_32 == 0 - B2_ASSERT( node->enlarged == false ); -#else B2_ASSERT( (node->flags & b2_enlargedNode) == 0 ); -#endif } } #else diff --git a/src/core.h b/src/core.h index 1890c8b4c..f0e64e126 100644 --- a/src/core.h +++ b/src/core.h @@ -183,7 +183,6 @@ enum b2TreeNodeFlags b2_allocatedNode = 0x0001, b2_enlargedNode = 0x0002, b2_leafNode = 0x0004, - b2_category64 = 0x0008, }; void* b2Alloc( int size ); diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 4769dc30a..394af6e8f 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -12,29 +12,28 @@ #define b2_treeStackSize 1024 -// TODO_ERIN -// - try incrementally sorting internal nodes by height for better cache efficiency during depth first traversal. - -#if B2_TREE_32 == 0 - static b2TreeNode b2_defaultTreeNode = { .aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, - 0, { B2_NULL_INDEX }, B2_NULL_INDEX, B2_NULL_INDEX, -1, -2, false, { 0, 0, 0, 0, 0 } }; + .categoryBits = b2_defaultCategoryBits, + .parent = B2_NULL_INDEX, + .child1 = B2_NULL_INDEX, + .child2 = B2_NULL_INDEX, + .height = 0, + .flags = b2_allocatedNode, +}; static inline bool b2IsLeaf( const b2TreeNode* node ) { - return node->height == 0; + return node->flags & b2_leafNode; } -static inline int16_t b2MaxInt16( int16_t a, int16_t b ) +static inline uint16_t b2MaxUInt16( uint16_t a, uint16_t b ) { return a > b ? a : b; } b2DynamicTree b2DynamicTree_Create( void ) { - _Static_assert( ( sizeof( b2TreeNode ) & 0xF ) == 0, "tree node size not a multiple of 16" ); - b2DynamicTree tree; tree.root = B2_NULL_INDEX; @@ -47,10 +46,9 @@ b2DynamicTree b2DynamicTree_Create( void ) for ( int32_t i = 0; i < tree.nodeCapacity - 1; ++i ) { tree.nodes[i].next = i + 1; - tree.nodes[i].height = -1; } + tree.nodes[tree.nodeCapacity - 1].next = B2_NULL_INDEX; - tree.nodes[tree.nodeCapacity - 1].height = -1; tree.freeList = 0; tree.proxyCount = 0; @@ -97,10 +95,9 @@ static int32_t b2AllocateNode( b2DynamicTree* tree ) for ( int32_t i = tree->nodeCount; i < tree->nodeCapacity - 1; ++i ) { tree->nodes[i].next = i + 1; - tree->nodes[i].height = -1; } + tree->nodes[tree->nodeCapacity - 1].next = B2_NULL_INDEX; - tree->nodes[tree->nodeCapacity - 1].height = -1; tree->freeList = tree->nodeCount; } @@ -119,7 +116,7 @@ static void b2FreeNode( b2DynamicTree* tree, int32_t nodeId ) B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); B2_ASSERT( 0 < tree->nodeCount ); tree->nodes[nodeId].next = tree->freeList; - tree->nodes[nodeId].height = -1; + tree->nodes[nodeId].flags = 0; tree->freeList = nodeId; --tree->nodeCount; } @@ -136,7 +133,7 @@ static void b2FreeNode( b2DynamicTree* tree, int32_t nodeId ) // All of these have a clear cost except when B or C is an internal node. Hence we need to be greedy. // The cost for cases 1, 2a, and 3a can be computed using the sibling cost formula. -// cost of sibling H = area(union(H, D)) + increased are of ancestors +// cost of sibling H = area(union(H, D)) + increased area of ancestors // Suppose B (or C) is an internal node, then the lowest cost would be one of two cases: // case1: D becomes a sibling of B @@ -355,12 +352,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) C->aabb = aabbBG; - C->height = 1 + b2MaxInt16( B->height, G->height ); - A->height = 1 + b2MaxInt16( C->height, F->height ); + C->height = 1 + b2MaxUInt16( B->height, G->height ); + A->height = 1 + b2MaxUInt16( C->height, F->height ); C->categoryBits = B->categoryBits | G->categoryBits; A->categoryBits = C->categoryBits | F->categoryBits; - C->enlarged = B->enlarged || G->enlarged; - A->enlarged = C->enlarged || F->enlarged; + C->flags |= (B->flags | G->flags) & b2_enlargedNode; + A->flags |= (C->flags | F->flags) & b2_enlargedNode; } else { @@ -373,12 +370,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) C->aabb = aabbBF; - C->height = 1 + b2MaxInt16( B->height, F->height ); - A->height = 1 + b2MaxInt16( C->height, G->height ); + C->height = 1 + b2MaxUInt16( B->height, F->height ); + A->height = 1 + b2MaxUInt16( C->height, G->height ); C->categoryBits = B->categoryBits | F->categoryBits; A->categoryBits = C->categoryBits | G->categoryBits; - C->enlarged = B->enlarged || F->enlarged; - A->enlarged = C->enlarged || G->enlarged; + C->flags |= (B->flags | F->flags) & b2_enlargedNode; + A->flags |= (C->flags | G->flags) & b2_enlargedNode; } } else if ( C->height == 0 ) @@ -421,12 +418,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) B->aabb = aabbCE; - B->height = 1 + b2MaxInt16( C->height, E->height ); - A->height = 1 + b2MaxInt16( B->height, D->height ); + B->height = 1 + b2MaxUInt16( C->height, E->height ); + A->height = 1 + b2MaxUInt16( B->height, D->height ); B->categoryBits = C->categoryBits | E->categoryBits; A->categoryBits = B->categoryBits | D->categoryBits; - B->enlarged = C->enlarged || E->enlarged; - A->enlarged = B->enlarged || D->enlarged; + B->flags |= (C->flags | E->flags) & b2_enlargedNode; + A->flags |= (B->flags | D->flags) & b2_enlargedNode; } else { @@ -438,12 +435,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) E->parent = iA; B->aabb = aabbCD; - B->height = 1 + b2MaxInt16( C->height, D->height ); - A->height = 1 + b2MaxInt16( B->height, E->height ); + B->height = 1 + b2MaxUInt16( C->height, D->height ); + A->height = 1 + b2MaxUInt16( B->height, E->height ); B->categoryBits = C->categoryBits | D->categoryBits; A->categoryBits = B->categoryBits | E->categoryBits; - B->enlarged = C->enlarged || D->enlarged; - A->enlarged = B->enlarged || E->enlarged; + B->flags |= (C->flags | D->flags) & b2_enlargedNode; + A->flags |= (B->flags | E->flags) & b2_enlargedNode; } } else @@ -519,12 +516,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) F->parent = iA; C->aabb = aabbBG; - C->height = 1 + b2MaxInt16( B->height, G->height ); - A->height = 1 + b2MaxInt16( C->height, F->height ); + C->height = 1 + b2MaxUInt16( B->height, G->height ); + A->height = 1 + b2MaxUInt16( C->height, F->height ); C->categoryBits = B->categoryBits | G->categoryBits; A->categoryBits = C->categoryBits | F->categoryBits; - C->enlarged = B->enlarged || G->enlarged; - A->enlarged = C->enlarged || F->enlarged; + C->flags |= (B->flags | G->flags) & b2_enlargedNode; + A->flags |= (C->flags | F->flags) & b2_enlargedNode; break; case b2_rotateBG: @@ -535,12 +532,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) G->parent = iA; C->aabb = aabbBF; - C->height = 1 + b2MaxInt16( B->height, F->height ); - A->height = 1 + b2MaxInt16( C->height, G->height ); + C->height = 1 + b2MaxUInt16( B->height, F->height ); + A->height = 1 + b2MaxUInt16( C->height, G->height ); C->categoryBits = B->categoryBits | F->categoryBits; A->categoryBits = C->categoryBits | G->categoryBits; - C->enlarged = B->enlarged || F->enlarged; - A->enlarged = C->enlarged || G->enlarged; + C->flags |= (B->flags | F->flags) & b2_enlargedNode; + A->flags |= (C->flags | G->flags) & b2_enlargedNode; break; case b2_rotateCD: @@ -551,12 +548,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) D->parent = iA; B->aabb = aabbCE; - B->height = 1 + b2MaxInt16( C->height, E->height ); - A->height = 1 + b2MaxInt16( B->height, D->height ); + B->height = 1 + b2MaxUInt16( C->height, E->height ); + A->height = 1 + b2MaxUInt16( B->height, D->height ); B->categoryBits = C->categoryBits | E->categoryBits; A->categoryBits = B->categoryBits | D->categoryBits; - B->enlarged = C->enlarged || E->enlarged; - A->enlarged = B->enlarged || D->enlarged; + B->flags |= (C->flags | E->flags) & b2_enlargedNode; + A->flags |= (B->flags | D->flags) & b2_enlargedNode; break; case b2_rotateCE: @@ -567,12 +564,12 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) E->parent = iA; B->aabb = aabbCD; - B->height = 1 + b2MaxInt16( C->height, D->height ); - A->height = 1 + b2MaxInt16( B->height, E->height ); + B->height = 1 + b2MaxUInt16( C->height, D->height ); + A->height = 1 + b2MaxUInt16( B->height, E->height ); B->categoryBits = C->categoryBits | D->categoryBits; A->categoryBits = B->categoryBits | E->categoryBits; - B->enlarged = C->enlarged || D->enlarged; - A->enlarged = B->enlarged || E->enlarged; + B->flags |= (C->flags | D->flags) & b2_enlargedNode; + A->flags |= (B->flags | E->flags) & b2_enlargedNode; break; default: @@ -646,8 +643,8 @@ static void b2InsertLeaf( b2DynamicTree* tree, int32_t leaf, bool shouldRotate ) nodes[index].aabb = b2AABB_Union( nodes[child1].aabb, nodes[child2].aabb ); nodes[index].categoryBits = nodes[child1].categoryBits | nodes[child2].categoryBits; - nodes[index].height = 1 + b2MaxInt16( nodes[child1].height, nodes[child2].height ); - nodes[index].enlarged = nodes[child1].enlarged || nodes[child2].enlarged; + nodes[index].height = 1 + b2MaxUInt16( nodes[child1].height, nodes[child2].height ); + nodes[index].flags |= (nodes[child1].flags | nodes[child2].flags) & b2_enlargedNode; if ( shouldRotate ) { @@ -712,7 +709,7 @@ static void b2RemoveLeaf( b2DynamicTree* tree, int32_t leaf ) node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); node->categoryBits = child1->categoryBits | child2->categoryBits; - node->height = 1 + b2MaxInt16( child1->height, child2->height ); + node->height = 1 + b2MaxUInt16( child1->height, child2->height ); index = node->parent; } @@ -741,6 +738,7 @@ int32_t b2DynamicTree_CreateProxy( b2DynamicTree* tree, b2AABB aabb, uint64_t ca node->userData = userData; node->categoryBits = categoryBits; node->height = 0; + node->flags = b2_leafNode; bool shouldRotate = true; b2InsertLeaf( tree, proxyId, shouldRotate ); @@ -802,7 +800,7 @@ void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aa while ( parentIndex != B2_NULL_INDEX ) { bool changed = b2EnlargeAABB( &nodes[parentIndex].aabb, aabb ); - nodes[parentIndex].enlarged = true; + nodes[parentIndex].flags |= b2_enlargedNode; parentIndex = nodes[parentIndex].parent; if ( changed == false ) @@ -813,13 +811,13 @@ void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aa while ( parentIndex != B2_NULL_INDEX ) { - if ( nodes[parentIndex].enlarged == true ) + if ( nodes[parentIndex].flags & b2_enlargedNode ) { // early out because this ancestor was previously ascended and marked as enlarged break; } - nodes[parentIndex].enlarged = true; + nodes[parentIndex].flags |= b2_enlargedNode; parentIndex = nodes[parentIndex].parent; } } @@ -897,26 +895,24 @@ static void b2ValidateStructure( const b2DynamicTree* tree, int32_t index ) const b2TreeNode* node = tree->nodes + index; - int32_t child1 = node->child1; - int32_t child2 = node->child2; - if ( b2IsLeaf( node ) ) { - B2_ASSERT( child1 == B2_NULL_INDEX ); - B2_ASSERT( child2 == B2_NULL_INDEX ); B2_ASSERT( node->height == 0 ); return; } + int32_t child1 = node->child1; + int32_t child2 = node->child2; + B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); B2_ASSERT( tree->nodes[child1].parent == index ); B2_ASSERT( tree->nodes[child2].parent == index ); - if ( tree->nodes[child1].enlarged || tree->nodes[child2].enlarged ) + if ( (tree->nodes[child1].flags | tree->nodes[child2].flags) & b2_enlargedNode ) { - B2_ASSERT( node->enlarged == true ); + B2_ASSERT( node->flags & b2_enlargedNode ); } b2ValidateStructure( tree, child1 ); @@ -932,17 +928,15 @@ static void b2ValidateMetrics( const b2DynamicTree* tree, int32_t index ) const b2TreeNode* node = tree->nodes + index; - int child1 = node->child1; - int child2 = node->child2; - if ( b2IsLeaf( node ) ) { - B2_ASSERT( child1 == B2_NULL_INDEX ); - B2_ASSERT( child2 == B2_NULL_INDEX ); B2_ASSERT( node->height == 0 ); return; } + int child1 = node->child1; + int child2 = node->child2; + B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); @@ -1080,7 +1074,7 @@ void b2DynamicTree_RebuildBottomUp( b2DynamicTree* tree ) parent->child2 = index2; parent->aabb = b2AABB_Union( child1->aabb, child2->aabb ); parent->categoryBits = child1->categoryBits | child2->categoryBits; - parent->height = 1 + b2MaxInt16( child1->height, child2->height ); + parent->height = 1 + b2MaxUInt16( child1->height, child2->height ); parent->parent = B2_NULL_INDEX; child1->parent = parentIndex; @@ -1737,7 +1731,7 @@ static int32_t b2BuildTree( b2DynamicTree* tree, int32_t leafCount ) b2TreeNode* child2 = nodes + node->child2; node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); - node->height = 1 + b2MaxInt16( child1->height, child2->height ); + node->height = 1 + b2MaxUInt16( child1->height, child2->height ); node->categoryBits = child1->categoryBits | child2->categoryBits; // Pop stack @@ -1812,7 +1806,7 @@ static int32_t b2BuildTree( b2DynamicTree* tree, int32_t leafCount ) b2TreeNode* child2 = nodes + rootNode->child2; rootNode->aabb = b2AABB_Union( child1->aabb, child2->aabb ); - rootNode->height = 1 + b2MaxInt16( child1->height, child2->height ); + rootNode->height = 1 + b2MaxUInt16( child1->height, child2->height ); rootNode->categoryBits = child1->categoryBits | child2->categoryBits; return stack[0].nodeIndex; @@ -1872,7 +1866,7 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) // this should be weighed against b2_aabbMargin while ( true ) { - if ( node->height == 0 || ( node->enlarged == false && fullBuild == false ) ) + if ( node->height == 0 || (( node->flags & b2_enlargedNode) == 0 && fullBuild == false ) ) { leafIndices[leafCount] = nodeIndex; #if B2_TREE_HEURISTIC == 0 @@ -1919,9 +1913,9 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) int32_t capacity = tree->nodeCapacity; for ( int32_t i = 0; i < capacity; ++i ) { - if ( nodes[i].height >= 0 ) + if ( nodes[i].flags & b2_allocatedNode ) { - B2_ASSERT( nodes[i].enlarged == false ); + B2_ASSERT( (nodes[i].flags & b2_enlargedNode) == 0 ); } } #endif @@ -1935,1759 +1929,3 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) return leafCount; } -#else - -/* - typedef struct b2TreeNode -{ - enum - { - e_enlarged = 0x0001, - e_free = 0x0002, - e_leaf = 0x0004, - e_category64 = 0x0008, - }; - - /// The node bounding box - b2AABB aabb; // 16 - - union - { - struct b2InternalNode internal; - struct b2LeafNode leaf; - }; // 8 - - union - { - /// The node parent index - int32_t parent; - - /// The node freelist next index - int32_t next; - }; // 4 - - uint16_t height; // 2 - uint16_t flags; // 2 - -} b2TreeNode; - - */ - -static b2TreeNode b2_defaultTreeNode = { - .aabb = { { 0.0f, 0.0f }, { 0.0f, 0.0f } }, - .internal = { B2_NULL_INDEX, B2_NULL_INDEX }, - .parent = B2_NULL_INDEX, - .height = 0, - .flags = b2_allocatedNode, -}; - -static inline bool b2IsLeaf( const b2TreeNode* node ) -{ - return (bool)(node->flags & b2_leafNode); -} - -static inline uint16_t b2MaxUInt16( uint16_t a, uint16_t b ) -{ - return a > b ? a : b; -} - -b2DynamicTree b2DynamicTree_Create( void ) -{ - _Static_assert( ( sizeof( b2TreeNode ) & 0xF ) == 0, "tree node size not a multiple of 16" ); - - b2DynamicTree tree; - tree.root = B2_NULL_INDEX; - tree.nodeCapacity = 16; - tree.nodeCount = 0; - tree.nodes = (b2TreeNode*)b2Alloc( tree.nodeCapacity * sizeof( b2TreeNode ) ); - memset( tree.nodes, 0, tree.nodeCapacity * sizeof( b2TreeNode ) ); - - // Build a linked list for the free list. - for ( int32_t i = 0; i < tree.nodeCapacity - 1; ++i ) - { - tree.nodes[i].next = i + 1; - } - - tree.nodes[tree.nodeCapacity - 1].next = B2_NULL_INDEX; - tree.freeList = 0; - tree.proxyCount = 0; - tree.leafIndices = NULL; - tree.leafBoxes = NULL; - tree.leafCenters = NULL; - tree.binIndices = NULL; - tree.rebuildCapacity = 0; - - return tree; -} - -void b2DynamicTree_Destroy( b2DynamicTree* tree ) -{ - b2Free( tree->nodes, tree->nodeCapacity * sizeof( b2TreeNode ) ); - b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) ); - b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) ); - b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) ); - b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) ); - - memset( tree, 0, sizeof( b2DynamicTree ) ); -} - -// Allocate a node from the pool. Grow the pool if necessary. -static int32_t b2AllocateNode( b2DynamicTree* tree ) -{ - // Expand the node pool as needed. - if ( tree->freeList == B2_NULL_INDEX ) - { - B2_ASSERT( tree->nodeCount == tree->nodeCapacity ); - - // The free list is empty. Rebuild a bigger pool. - b2TreeNode* oldNodes = tree->nodes; - int32_t oldCapcity = tree->nodeCapacity; - tree->nodeCapacity += oldCapcity >> 1; - tree->nodes = (b2TreeNode*)b2Alloc( tree->nodeCapacity * sizeof( b2TreeNode ) ); - B2_ASSERT( oldNodes != NULL ); - memcpy( tree->nodes, oldNodes, tree->nodeCount * sizeof( b2TreeNode ) ); - b2Free( oldNodes, oldCapcity * sizeof( b2TreeNode ) ); - - // Build a linked list for the free list - for ( int32_t i = tree->nodeCount; i < tree->nodeCapacity - 1; ++i ) - { - tree->nodes[i].next = i + 1; - } - tree->nodes[tree->nodeCapacity - 1].next = B2_NULL_INDEX; - tree->freeList = tree->nodeCount; - } - - // Peel a node off the free list. - int32_t nodeIndex = tree->freeList; - b2TreeNode* node = tree->nodes + nodeIndex; - tree->freeList = node->next; - *node = b2_defaultTreeNode; - ++tree->nodeCount; - return nodeIndex; -} - -// Return a node to the pool. -static void b2FreeNode( b2DynamicTree* tree, int32_t nodeId ) -{ - B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); - B2_ASSERT( 0 < tree->nodeCount ); - tree->nodes[nodeId].next = tree->freeList; - tree->nodes[nodeId].flags = 0; - tree->freeList = nodeId; - --tree->nodeCount; -} - -// Greedy algorithm for sibling selection using the SAH -// We have three nodes A-(B,C) and want to add a leaf D, there are three choices. -// 1: make a new parent for A and D : E-(A-(B,C), D) -// 2: associate D with B -// a: B is a leaf : A-(E-(B,D), C) -// b: B is an internal node: A-(B{D},C) -// 3: associate D with C -// a: C is a leaf : A-(B, E-(C,D)) -// b: C is an internal node: A-(B, C{D}) -// All of these have a clear cost except when B or C is an internal node. Hence we need to be greedy. - -// The cost for cases 1, 2a, and 3a can be computed using the sibling cost formula. -// cost of sibling H = area(union(H, D)) + increased are of ancestors - -// Suppose B (or C) is an internal node, then the lowest cost would be one of two cases: -// case1: D becomes a sibling of B -// case2: D becomes a descendant of B along with a new internal node of area(D). -static int32_t b2FindBestSibling( const b2DynamicTree* tree, b2AABB boxD ) -{ - b2Vec2 centerD = b2AABB_Center( boxD ); - float areaD = b2Perimeter( boxD ); - - const b2TreeNode* nodes = tree->nodes; - int32_t rootIndex = tree->root; - - b2AABB rootBox = nodes[rootIndex].aabb; - - // Area of current node - float areaBase = b2Perimeter( rootBox ); - - // Area of inflated node - float directCost = b2Perimeter( b2AABB_Union( rootBox, boxD ) ); - float inheritedCost = 0.0f; - - int32_t bestSibling = rootIndex; - float bestCost = directCost; - - // Descend the tree from root, following a single greedy path. - int32_t index = rootIndex; - while ( nodes[index].height > 0 ) - { - int32_t child1 = nodes[index].internal.child1; - int32_t child2 = nodes[index].internal.child2; - - // Cost of creating a new parent for this node and the new leaf - float cost = directCost + inheritedCost; - - // Sometimes there are multiple identical costs within tolerance. - // This breaks the ties using the centroid distance. - if ( cost < bestCost ) - { - bestSibling = index; - bestCost = cost; - } - - // Inheritance cost seen by children - inheritedCost += directCost - areaBase; - - bool leaf1 = nodes[child1].height == 0; - bool leaf2 = nodes[child2].height == 0; - - // Cost of descending into child 1 - float lowerCost1 = FLT_MAX; - b2AABB box1 = nodes[child1].aabb; - float directCost1 = b2Perimeter( b2AABB_Union( box1, boxD ) ); - float area1 = 0.0f; - if ( leaf1 ) - { - // Child 1 is a leaf - // Cost of creating new node and increasing area of node P - float cost1 = directCost1 + inheritedCost; - - // Need this here due to while condition above - if ( cost1 < bestCost ) - { - bestSibling = child1; - bestCost = cost1; - } - } - else - { - // Child 1 is an internal node - area1 = b2Perimeter( box1 ); - - // Lower bound cost of inserting under child 1. - lowerCost1 = inheritedCost + directCost1 + b2MinFloat( areaD - area1, 0.0f ); - } - - // Cost of descending into child 2 - float lowerCost2 = FLT_MAX; - b2AABB box2 = nodes[child2].aabb; - float directCost2 = b2Perimeter( b2AABB_Union( box2, boxD ) ); - float area2 = 0.0f; - if ( leaf2 ) - { - // Child 2 is a leaf - // Cost of creating new node and increasing area of node P - float cost2 = directCost2 + inheritedCost; - - // Need this here due to while condition above - if ( cost2 < bestCost ) - { - bestSibling = child2; - bestCost = cost2; - } - } - else - { - // Child 2 is an internal node - area2 = b2Perimeter( box2 ); - - // Lower bound cost of inserting under child 2. This is not the cost - // of child 2, it is the best we can hope for under child 2. - lowerCost2 = inheritedCost + directCost2 + b2MinFloat( areaD - area2, 0.0f ); - } - - if ( leaf1 && leaf2 ) - { - break; - } - - // Can the cost possibly be decreased? - if ( bestCost <= lowerCost1 && bestCost <= lowerCost2 ) - { - break; - } - - if ( lowerCost1 == lowerCost2 && leaf1 == false ) - { - B2_ASSERT( lowerCost1 < FLT_MAX ); - B2_ASSERT( lowerCost2 < FLT_MAX ); - - // No clear choice based on lower bound surface area. This can happen when both - // children fully contain D. Fall back to node distance. - b2Vec2 d1 = b2Sub( b2AABB_Center( box1 ), centerD ); - b2Vec2 d2 = b2Sub( b2AABB_Center( box2 ), centerD ); - lowerCost1 = b2LengthSquared( d1 ); - lowerCost2 = b2LengthSquared( d2 ); - } - - // Descend - if ( lowerCost1 < lowerCost2 && leaf1 == false ) - { - index = child1; - areaBase = area1; - directCost = directCost1; - } - else - { - index = child2; - areaBase = area2; - directCost = directCost2; - } - - B2_ASSERT( nodes[index].height > 0 ); - } - - return bestSibling; -} - -enum b2RotateType -{ - b2_rotateNone, - b2_rotateBF, - b2_rotateBG, - b2_rotateCD, - b2_rotateCE -}; - -// Perform a left or right rotation if node A is imbalanced. -// Returns the new root index. -static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) -{ - B2_ASSERT( iA != B2_NULL_INDEX ); - - b2TreeNode* nodes = tree->nodes; - - b2TreeNode* A = nodes + iA; - if ( A->height < 2 ) - { - return; - } - - int32_t iB = A->internal.child1; - int32_t iC = A->internal.child2; - B2_ASSERT( 0 <= iB && iB < tree->nodeCapacity ); - B2_ASSERT( 0 <= iC && iC < tree->nodeCapacity ); - - b2TreeNode* B = nodes + iB; - b2TreeNode* C = nodes + iC; - - if ( B->height == 0 ) - { - // B is a leaf and C is internal - B2_ASSERT( C->height > 0 ); - - int32_t iF = C->internal.child1; - int32_t iG = C->internal.child2; - b2TreeNode* F = nodes + iF; - b2TreeNode* G = nodes + iG; - B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity ); - B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity ); - - // Base cost - float costBase = b2Perimeter( C->aabb ); - - // Cost of swapping B and F - b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb ); - float costBF = b2Perimeter( aabbBG ); - - // Cost of swapping B and G - b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb ); - float costBG = b2Perimeter( aabbBF ); - - if ( costBase < costBF && costBase < costBG ) - { - // Rotation does not improve cost - return; - } - - if ( costBF < costBG ) - { - // Swap B and F - A->internal.child1 = iF; - C->internal.child1 = iB; - - B->parent = iC; - F->parent = iA; - - C->aabb = aabbBG; - - C->height = 1 + b2MaxUInt16( B->height, G->height ); - A->height = 1 + b2MaxUInt16( C->height, F->height ); - C->flags |= (B->flags | G->flags) & b2_enlargedNode; - A->flags |= (C->flags | F->flags) & b2_enlargedNode; - } - else - { - // Swap B and G - A->internal.child1 = iG; - C->internal.child2 = iB; - - B->parent = iC; - G->parent = iA; - - C->aabb = aabbBF; - - C->height = 1 + b2MaxUInt16( B->height, F->height ); - A->height = 1 + b2MaxUInt16( C->height, G->height ); - C->flags |= (B->flags | F->flags) & b2_enlargedNode; - A->flags |= (C->flags | G->flags) & b2_enlargedNode; - } - } - else if ( C->height == 0 ) - { - // C is a leaf and B is internal - B2_ASSERT( B->height > 0 ); - - int iD = B->internal.child1; - int iE = B->internal.child2; - b2TreeNode* D = nodes + iD; - b2TreeNode* E = nodes + iE; - B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity ); - B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity ); - - // Base cost - float costBase = b2Perimeter( B->aabb ); - - // Cost of swapping C and D - b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb ); - float costCD = b2Perimeter( aabbCE ); - - // Cost of swapping C and E - b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb ); - float costCE = b2Perimeter( aabbCD ); - - if ( costBase < costCD && costBase < costCE ) - { - // Rotation does not improve cost - return; - } - - if ( costCD < costCE ) - { - // Swap C and D - A->internal.child2 = iD; - B->internal.child1 = iC; - - C->parent = iB; - D->parent = iA; - - B->aabb = aabbCE; - - B->height = 1 + b2MaxUInt16( C->height, E->height ); - A->height = 1 + b2MaxUInt16( B->height, D->height ); - B->flags |= (C->flags | E->flags) & b2_enlargedNode; - A->flags |= (B->flags | D->flags) & b2_enlargedNode; - } - else - { - // Swap C and E - A->internal.child2 = iE; - B->internal.child2 = iC; - - C->parent = iB; - E->parent = iA; - - B->aabb = aabbCD; - B->height = 1 + b2MaxUInt16( C->height, D->height ); - A->height = 1 + b2MaxUInt16( B->height, E->height ); - B->flags |= (C->flags | D->flags) & b2_enlargedNode; - A->flags |= (B->flags | E->flags) & b2_enlargedNode; - } - } - else - { - int iD = B->internal.child1; - int iE = B->internal.child2; - int iF = C->internal.child1; - int iG = C->internal.child2; - - b2TreeNode* D = nodes + iD; - b2TreeNode* E = nodes + iE; - b2TreeNode* F = nodes + iF; - b2TreeNode* G = nodes + iG; - - B2_ASSERT( 0 <= iD && iD < tree->nodeCapacity ); - B2_ASSERT( 0 <= iE && iE < tree->nodeCapacity ); - B2_ASSERT( 0 <= iF && iF < tree->nodeCapacity ); - B2_ASSERT( 0 <= iG && iG < tree->nodeCapacity ); - - // Base cost - float areaB = b2Perimeter( B->aabb ); - float areaC = b2Perimeter( C->aabb ); - float costBase = areaB + areaC; - enum b2RotateType bestRotation = b2_rotateNone; - float bestCost = costBase; - - // Cost of swapping B and F - b2AABB aabbBG = b2AABB_Union( B->aabb, G->aabb ); - float costBF = areaB + b2Perimeter( aabbBG ); - if ( costBF < bestCost ) - { - bestRotation = b2_rotateBF; - bestCost = costBF; - } - - // Cost of swapping B and G - b2AABB aabbBF = b2AABB_Union( B->aabb, F->aabb ); - float costBG = areaB + b2Perimeter( aabbBF ); - if ( costBG < bestCost ) - { - bestRotation = b2_rotateBG; - bestCost = costBG; - } - - // Cost of swapping C and D - b2AABB aabbCE = b2AABB_Union( C->aabb, E->aabb ); - float costCD = areaC + b2Perimeter( aabbCE ); - if ( costCD < bestCost ) - { - bestRotation = b2_rotateCD; - bestCost = costCD; - } - - // Cost of swapping C and E - b2AABB aabbCD = b2AABB_Union( C->aabb, D->aabb ); - float costCE = areaC + b2Perimeter( aabbCD ); - if ( costCE < bestCost ) - { - bestRotation = b2_rotateCE; - // bestCost = costCE; - } - - switch ( bestRotation ) - { - case b2_rotateNone: - break; - - case b2_rotateBF: - A->internal.child1 = iF; - C->internal.child1 = iB; - - B->parent = iC; - F->parent = iA; - - C->aabb = aabbBG; - C->height = 1 + b2MaxUInt16( B->height, G->height ); - A->height = 1 + b2MaxUInt16( C->height, F->height ); - C->flags |= (B->flags | G->flags) & b2_enlargedNode; - A->flags |= (C->flags | F->flags) & b2_enlargedNode; - break; - - case b2_rotateBG: - A->internal.child1 = iG; - C->internal.child2 = iB; - - B->parent = iC; - G->parent = iA; - - C->aabb = aabbBF; - C->height = 1 + b2MaxUInt16( B->height, F->height ); - A->height = 1 + b2MaxUInt16( C->height, G->height ); - C->flags |= (B->flags | F->flags) & b2_enlargedNode; - A->flags |= (C->flags | G->flags) & b2_enlargedNode; - break; - - case b2_rotateCD: - A->internal.child2 = iD; - B->internal.child1 = iC; - - C->parent = iB; - D->parent = iA; - - B->aabb = aabbCE; - B->height = 1 + b2MaxUInt16( C->height, E->height ); - A->height = 1 + b2MaxUInt16( B->height, D->height ); - B->flags |= (C->flags | E->flags) & b2_enlargedNode; - A->flags |= (B->flags | D->flags) & b2_enlargedNode; - break; - - case b2_rotateCE: - A->internal.child2 = iE; - B->internal.child2 = iC; - - C->parent = iB; - E->parent = iA; - - B->aabb = aabbCD; - B->height = 1 + b2MaxUInt16( C->height, D->height ); - A->height = 1 + b2MaxUInt16( B->height, E->height ); - B->flags |= (C->flags | D->flags) & b2_enlargedNode; - A->flags |= (B->flags | E->flags) & b2_enlargedNode; - break; - - default: - B2_ASSERT( false ); - break; - } - } -} - -static void b2InsertLeaf( b2DynamicTree* tree, int32_t leaf, bool shouldRotate ) -{ - if ( tree->root == B2_NULL_INDEX ) - { - tree->root = leaf; - tree->nodes[tree->root].parent = B2_NULL_INDEX; - return; - } - - // Stage 1: find the best sibling for this node - b2AABB leafAABB = tree->nodes[leaf].aabb; - int32_t sibling = b2FindBestSibling( tree, leafAABB ); - - // Stage 2: create a new parent for the leaf and sibling - int32_t oldParent = tree->nodes[sibling].parent; - int32_t newParent = b2AllocateNode( tree ); - - // warning: node pointer can change after allocation - b2TreeNode* nodes = tree->nodes; - nodes[newParent].parent = oldParent; - nodes[newParent].aabb = b2AABB_Union( leafAABB, nodes[sibling].aabb ); - nodes[newParent].height = nodes[sibling].height + 1; - - if ( oldParent != B2_NULL_INDEX ) - { - // The sibling was not the root. - if ( nodes[oldParent].internal.child1 == sibling ) - { - nodes[oldParent].internal.child1 = newParent; - } - else - { - nodes[oldParent].internal.child2 = newParent; - } - - nodes[newParent].internal.child1 = sibling; - nodes[newParent].internal.child2 = leaf; - nodes[sibling].parent = newParent; - nodes[leaf].parent = newParent; - } - else - { - // The sibling was the root. - nodes[newParent].internal.child1 = sibling; - nodes[newParent].internal.child2 = leaf; - nodes[sibling].parent = newParent; - nodes[leaf].parent = newParent; - tree->root = newParent; - } - - // Stage 3: walk back up the tree fixing heights and AABBs - int32_t index = nodes[leaf].parent; - while ( index != B2_NULL_INDEX ) - { - int32_t child1 = nodes[index].internal.child1; - int32_t child2 = nodes[index].internal.child2; - - B2_ASSERT( child1 != B2_NULL_INDEX ); - B2_ASSERT( child2 != B2_NULL_INDEX ); - - nodes[index].aabb = b2AABB_Union( nodes[child1].aabb, nodes[child2].aabb ); - nodes[index].height = 1 + b2MaxUInt16( nodes[child1].height, nodes[child2].height ); - nodes[index].flags |= (nodes[child1].flags | nodes[child2].flags) & b2_enlargedNode; - - if ( shouldRotate ) - { - b2RotateNodes( tree, index ); - } - - index = nodes[index].parent; - } -} - -static void b2RemoveLeaf( b2DynamicTree* tree, int32_t leaf ) -{ - if ( leaf == tree->root ) - { - tree->root = B2_NULL_INDEX; - return; - } - - b2TreeNode* nodes = tree->nodes; - - int32_t parent = nodes[leaf].parent; - int32_t grandParent = nodes[parent].parent; - int32_t sibling; - if ( nodes[parent].internal.child1 == leaf ) - { - sibling = nodes[parent].internal.child2; - } - else - { - sibling = nodes[parent].internal.child1; - } - - if ( grandParent != B2_NULL_INDEX ) - { - // Destroy parent and connect sibling to grandParent. - if ( nodes[grandParent].internal.child1 == parent ) - { - nodes[grandParent].internal.child1 = sibling; - } - else - { - nodes[grandParent].internal.child2 = sibling; - } - nodes[sibling].parent = grandParent; - b2FreeNode( tree, parent ); - - // Adjust ancestor bounds. - int32_t index = grandParent; - while ( index != B2_NULL_INDEX ) - { - b2TreeNode* node = nodes + index; - b2TreeNode* child1 = nodes + node->internal.child1; - b2TreeNode* child2 = nodes + node->internal.child2; - - // Fast union using SSE - //__m128 aabb1 = _mm_load_ps(&child1->aabb.lowerBound.x); - //__m128 aabb2 = _mm_load_ps(&child2->aabb.lowerBound.x); - //__m128 lower = _mm_min_ps(aabb1, aabb2); - //__m128 upper = _mm_max_ps(aabb1, aabb2); - //__m128 aabb = _mm_shuffle_ps(lower, upper, _MM_SHUFFLE(3, 2, 1, 0)); - //_mm_store_ps(&node->aabb.lowerBound.x, aabb); - - node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); - node->height = 1 + b2MaxUInt16( child1->height, child2->height ); - - index = node->parent; - } - } - else - { - tree->root = sibling; - tree->nodes[sibling].parent = B2_NULL_INDEX; - b2FreeNode( tree, parent ); - } -} - -// Create a proxy in the tree as a leaf node. We return the index of the node instead of a pointer so that we can grow -// the node pool. -int32_t b2DynamicTree_CreateProxy( b2DynamicTree* tree, b2AABB aabb, uint64_t categoryBits, int32_t userData ) -{ - B2_ASSERT( -b2_huge < aabb.lowerBound.x && aabb.lowerBound.x < b2_huge ); - B2_ASSERT( -b2_huge < aabb.lowerBound.y && aabb.lowerBound.y < b2_huge ); - B2_ASSERT( -b2_huge < aabb.upperBound.x && aabb.upperBound.x < b2_huge ); - B2_ASSERT( -b2_huge < aabb.upperBound.y && aabb.upperBound.y < b2_huge ); - - int32_t proxyId = b2AllocateNode( tree ); - b2TreeNode* node = tree->nodes + proxyId; - - node->aabb = aabb; - node->leaf.userData = userData; - node->leaf.categoryBits = (uint32_t)(categoryBits & UINT32_MAX); - node->height = 0; - node->flags |= b2_leafNode; - - bool shouldRotate = true; - b2InsertLeaf( tree, proxyId, shouldRotate ); - - tree->proxyCount += 1; - - // todo temp testing - //b2DynamicTree_Validate( tree ); - - return proxyId; -} - -void b2DynamicTree_DestroyProxy( b2DynamicTree* tree, int32_t proxyId ) -{ - B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); - B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); - - b2RemoveLeaf( tree, proxyId ); - b2FreeNode( tree, proxyId ); - - B2_ASSERT( tree->proxyCount > 0 ); - tree->proxyCount -= 1; -} - -int32_t b2DynamicTree_GetProxyCount( const b2DynamicTree* tree ) -{ - return tree->proxyCount; -} - -void b2DynamicTree_MoveProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aabb ) -{ - B2_ASSERT( b2AABB_IsValid( aabb ) ); - B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < b2_huge ); - B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < b2_huge ); - B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); - B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); - - b2RemoveLeaf( tree, proxyId ); - - tree->nodes[proxyId].aabb = aabb; - - bool shouldRotate = false; - b2InsertLeaf( tree, proxyId, shouldRotate ); -} - -void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2AABB aabb ) -{ - b2TreeNode* nodes = tree->nodes; - - B2_ASSERT( b2AABB_IsValid( aabb ) ); - B2_ASSERT( aabb.upperBound.x - aabb.lowerBound.x < b2_huge ); - B2_ASSERT( aabb.upperBound.y - aabb.lowerBound.y < b2_huge ); - B2_ASSERT( 0 <= proxyId && proxyId < tree->nodeCapacity ); - B2_ASSERT( b2IsLeaf( tree->nodes + proxyId ) ); - - // Caller must ensure this - B2_ASSERT( b2AABB_Contains( nodes[proxyId].aabb, aabb ) == false ); - - nodes[proxyId].aabb = aabb; - - int32_t parentIndex = nodes[proxyId].parent; - while ( parentIndex != B2_NULL_INDEX ) - { - bool changed = b2EnlargeAABB( &nodes[parentIndex].aabb, aabb ); - nodes[parentIndex].flags |= b2_enlargedNode; - parentIndex = nodes[parentIndex].parent; - - if ( changed == false ) - { - break; - } - } - - while ( parentIndex != B2_NULL_INDEX ) - { - if ( nodes[parentIndex].flags & b2_enlargedNode ) - { - // early out because this ancestor was previously ascended and marked as enlarged - break; - } - - nodes[parentIndex].flags |= b2_enlargedNode; - parentIndex = nodes[parentIndex].parent; - } -} - -int b2DynamicTree_GetHeight( const b2DynamicTree* tree ) -{ - if ( tree->root == B2_NULL_INDEX ) - { - return 0; - } - - return tree->nodes[tree->root].height; -} - -float b2DynamicTree_GetAreaRatio( const b2DynamicTree* tree ) -{ - if ( tree->root == B2_NULL_INDEX ) - { - return 0.0f; - } - - const b2TreeNode* root = tree->nodes + tree->root; - float rootArea = b2Perimeter( root->aabb ); - - float totalArea = 0.0f; - for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) - { - const b2TreeNode* node = tree->nodes + i; - if ( node->height < 0 || b2IsLeaf( node ) || i == tree->root ) - { - // Free node in pool - continue; - } - - totalArea += b2Perimeter( node->aabb ); - } - - return totalArea / rootArea; -} - -// Compute the height of a sub-tree. -static int b2ComputeHeight( const b2DynamicTree* tree, int32_t nodeId ) -{ - B2_ASSERT( 0 <= nodeId && nodeId < tree->nodeCapacity ); - b2TreeNode* node = tree->nodes + nodeId; - - if ( b2IsLeaf( node ) ) - { - return 0; - } - - int32_t height1 = b2ComputeHeight( tree, node->internal.child1 ); - int32_t height2 = b2ComputeHeight( tree, node->internal.child2 ); - return 1 + b2MaxInt( height1, height2 ); -} - -int b2DynamicTree_ComputeHeight( const b2DynamicTree* tree ) -{ - int height = b2ComputeHeight( tree, tree->root ); - return height; -} - -#if B2_VALIDATE -static void b2ValidateStructure( const b2DynamicTree* tree, int32_t index ) -{ - if ( index == B2_NULL_INDEX ) - { - return; - } - - if ( index == tree->root ) - { - B2_ASSERT( tree->nodes[index].parent == B2_NULL_INDEX ); - } - - const b2TreeNode* node = tree->nodes + index; - - int32_t child1 = node->internal.child1; - int32_t child2 = node->internal.child2; - - if ( b2IsLeaf( node ) ) - { - B2_ASSERT( node->height == 0 ); - return; - } - - B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); - B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); - - B2_ASSERT( tree->nodes[child1].parent == index ); - B2_ASSERT( tree->nodes[child2].parent == index ); - - if ( (tree->nodes[child1].flags | tree->nodes[child2].flags) & b2_enlargedNode ) - { - B2_ASSERT( node->flags & b2_enlargedNode ); - } - - b2ValidateStructure( tree, child1 ); - b2ValidateStructure( tree, child2 ); -} - -static void b2ValidateMetrics( const b2DynamicTree* tree, int32_t index ) -{ - if ( index == B2_NULL_INDEX ) - { - return; - } - - const b2TreeNode* node = tree->nodes + index; - - B2_ASSERT( node->flags & b2_allocatedNode ); - - if ( b2IsLeaf( node ) ) - { - B2_ASSERT( node->height == 0 ); - return; - } - - int child1 = node->internal.child1; - int child2 = node->internal.child2; - - B2_ASSERT( 0 <= child1 && child1 < tree->nodeCapacity ); - B2_ASSERT( 0 <= child2 && child2 < tree->nodeCapacity ); - - int height1 = tree->nodes[child1].height; - int height2 = tree->nodes[child2].height; - int height = 1 + b2MaxInt( height1, height2 ); - B2_ASSERT( node->height == height ); - - // b2AABB aabb = b2AABB_Union(tree->nodes[child1].aabb, tree->nodes[child2].aabb); - - B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child1].aabb ) ); - B2_ASSERT( b2AABB_Contains( node->aabb, tree->nodes[child2].aabb ) ); - - // B2_ASSERT(aabb.lowerBound.x == node->aabb.lowerBound.x); - // B2_ASSERT(aabb.lowerBound.y == node->aabb.lowerBound.y); - // B2_ASSERT(aabb.upperBound.x == node->aabb.upperBound.x); - // B2_ASSERT(aabb.upperBound.y == node->aabb.upperBound.y); - - b2ValidateMetrics( tree, child1 ); - b2ValidateMetrics( tree, child2 ); -} -#endif - -void b2DynamicTree_Validate( const b2DynamicTree* tree ) -{ -#if B2_VALIDATE - if ( tree->root == B2_NULL_INDEX ) - { - return; - } - - b2ValidateStructure( tree, tree->root ); - b2ValidateMetrics( tree, tree->root ); - - int32_t freeCount = 0; - int32_t freeIndex = tree->freeList; - while ( freeIndex != B2_NULL_INDEX ) - { - B2_ASSERT( 0 <= freeIndex && freeIndex < tree->nodeCapacity ); - freeIndex = tree->nodes[freeIndex].next; - ++freeCount; - } - - int32_t height = b2DynamicTree_GetHeight( tree ); - int32_t computedHeight = b2DynamicTree_ComputeHeight( tree ); - B2_ASSERT( height == computedHeight ); - - B2_ASSERT( tree->nodeCount + freeCount == tree->nodeCapacity ); -#else - B2_MAYBE_UNUSED( tree ); -#endif -} - -int32_t b2DynamicTree_GetMaxBalance( const b2DynamicTree* tree ) -{ - int maxBalance = 0; - for ( int i = 0; i < tree->nodeCapacity; ++i ) - { - const b2TreeNode* node = tree->nodes + i; - if ( node->height <= 1 ) - { - continue; - } - - B2_ASSERT( b2IsLeaf( node ) == false ); - - int child1 = node->internal.child1; - int child2 = node->internal.child2; - int balance = b2AbsInt( tree->nodes[child2].height - tree->nodes[child1].height ); - maxBalance = b2MaxInt( maxBalance, balance ); - } - - return maxBalance; -} - -void b2DynamicTree_RebuildBottomUp( b2DynamicTree* tree ) -{ - int* nodes = b2Alloc( tree->nodeCount * sizeof( int ) ); - int count = 0; - - // Build array of leaves. Free the rest. - for ( int i = 0; i < tree->nodeCapacity; ++i ) - { - if ( (tree->nodes[i].flags & b2_allocatedNode) == 0 ) - { - // free node in pool - continue; - } - - if ( b2IsLeaf( tree->nodes + i ) ) - { - tree->nodes[i].parent = B2_NULL_INDEX; - nodes[count] = i; - ++count; - } - else - { - b2FreeNode( tree, i ); - } - } - - while ( count > 1 ) - { - float minCost = FLT_MAX; - int32_t iMin = -1, jMin = -1; - for ( int32_t i = 0; i < count; ++i ) - { - b2AABB aabbi = tree->nodes[nodes[i]].aabb; - - for ( int32_t j = i + 1; j < count; ++j ) - { - b2AABB aabbj = tree->nodes[nodes[j]].aabb; - b2AABB b = b2AABB_Union( aabbi, aabbj ); - float cost = b2Perimeter( b ); - if ( cost < minCost ) - { - iMin = i; - jMin = j; - minCost = cost; - } - } - } - - int32_t index1 = nodes[iMin]; - int32_t index2 = nodes[jMin]; - b2TreeNode* child1 = tree->nodes + index1; - b2TreeNode* child2 = tree->nodes + index2; - - int32_t parentIndex = b2AllocateNode( tree ); - b2TreeNode* parent = tree->nodes + parentIndex; - parent->internal.child1 = index1; - parent->internal.child2 = index2; - parent->aabb = b2AABB_Union( child1->aabb, child2->aabb ); - parent->height = 1 + b2MaxUInt16( child1->height, child2->height ); - parent->parent = B2_NULL_INDEX; - - child1->parent = parentIndex; - child2->parent = parentIndex; - - nodes[jMin] = nodes[count - 1]; - nodes[iMin] = parentIndex; - --count; - } - - tree->root = nodes[0]; - b2Free( nodes, tree->nodeCount * sizeof( b2TreeNode ) ); - - b2DynamicTree_Validate( tree ); -} - -void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin ) -{ - // shift all AABBs - for ( int32_t i = 0; i < tree->nodeCapacity; ++i ) - { - b2TreeNode* n = tree->nodes + i; - n->aabb.lowerBound.x -= newOrigin.x; - n->aabb.lowerBound.y -= newOrigin.y; - n->aabb.upperBound.x -= newOrigin.x; - n->aabb.upperBound.y -= newOrigin.y; - } -} - -int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ) -{ - size_t size = sizeof( b2DynamicTree ) + sizeof( b2TreeNode ) * tree->nodeCapacity + - tree->rebuildCapacity * ( sizeof( int32_t ) + sizeof( b2AABB ) + sizeof( b2Vec2 ) + sizeof( int32_t ) ); - - return (int)size; -} - -void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, - void* context ) -{ - int32_t stack[b2_treeStackSize]; - int32_t stackCount = 0; - stack[stackCount++] = tree->root; - - uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); - - while ( stackCount > 0 ) - { - int32_t nodeId = stack[--stackCount]; - if ( nodeId == B2_NULL_INDEX ) - { - continue; - } - - const b2TreeNode* node = tree->nodes + nodeId; - - if ( b2AABB_Overlaps( node->aabb, aabb ) ) - { - if ( b2IsLeaf( node ) && ( node->leaf.categoryBits & maskBits32 ) != 0 ) - { - // callback to user code with proxy id - bool proceed = callback( nodeId, node->leaf.userData, context ); - if ( proceed == false ) - { - return; - } - } - else - { - B2_ASSERT( stackCount < b2_treeStackSize - 1 ); - if ( stackCount < b2_treeStackSize - 1 ) - { - stack[stackCount++] = node->internal.child1; - stack[stackCount++] = node->internal.child2; - } - } - } - } -} - -b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, - b2TreeRayCastCallbackFcn* callback, void* context ) -{ - b2TraversalResult result = { 0 }; - - b2Vec2 p1 = input->origin; - b2Vec2 d = input->translation; - - b2Vec2 r = b2Normalize( d ); - - // v is perpendicular to the segment. - b2Vec2 v = b2CrossSV( 1.0f, r ); - b2Vec2 abs_v = b2Abs( v ); - - // Separating axis for segment (Gino, p80). - // |dot(v, p1 - c)| > dot(|v|, h) - - float maxFraction = input->maxFraction; - - b2Vec2 p2 = b2MulAdd( p1, maxFraction, d ); - - // Build a bounding box for the segment. - b2AABB segmentAABB = { b2Min( p1, p2 ), b2Max( p1, p2 ) }; - - int32_t stack[b2_treeStackSize]; - int32_t stackCount = 0; - stack[stackCount++] = tree->root; - - b2RayCastInput subInput = *input; - uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); - - while ( stackCount > 0 ) - { - int32_t nodeId = stack[--stackCount]; - if ( nodeId == B2_NULL_INDEX ) - { - continue; - } - - const b2TreeNode* node = tree->nodes + nodeId; - result.nodeVisits += 1; - - b2AABB nodeAABB = node->aabb; - - if ( b2AABB_Overlaps( nodeAABB, segmentAABB ) == false ) - { - continue; - } - - // Separating axis for segment (Gino, p80). - // |dot(v, p1 - c)| > dot(|v|, h) - // radius extension is added to the node in this case - b2Vec2 c = b2AABB_Center( nodeAABB ); - b2Vec2 h = b2AABB_Extents( nodeAABB ); - float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); - float term2 = b2Dot( abs_v, h ); - if ( term2 < term1 ) - { - continue; - } - - if ( b2IsLeaf( node ) ) - { - if ( ( node->leaf.categoryBits & maskBits32 ) == 0 ) - { - continue; - } - - subInput.maxFraction = maxFraction; - - float value = callback( &subInput, nodeId, node->leaf.userData, context ); - result.leafVisits += 1; - - if ( value == 0.0f ) - { - // The client has terminated the ray cast. - return result; - } - - if ( 0.0f < value && value < maxFraction ) - { - // Update segment bounding box. - maxFraction = value; - p2 = b2MulAdd( p1, maxFraction, d ); - segmentAABB.lowerBound = b2Min( p1, p2 ); - segmentAABB.upperBound = b2Max( p1, p2 ); - } - } - else - { - B2_ASSERT( stackCount < b2_treeStackSize - 1 ); - if ( stackCount < b2_treeStackSize - 1 ) - { - // TODO_ERIN just put one node on the stack, continue on a child node - // TODO_ERIN test ordering children by nearest to ray origin - stack[stackCount++] = node->internal.child1; - stack[stackCount++] = node->internal.child2; - } - } - } - - return result; -} - -void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, - b2TreeShapeCastCallbackFcn* callback, void* context ) -{ - if ( input->count == 0 ) - { - return; - } - - b2AABB originAABB = { input->points[0], input->points[0] }; - for ( int i = 1; i < input->count; ++i ) - { - originAABB.lowerBound = b2Min( originAABB.lowerBound, input->points[i] ); - originAABB.upperBound = b2Max( originAABB.upperBound, input->points[i] ); - } - - b2Vec2 radius = { input->radius, input->radius }; - - originAABB.lowerBound = b2Sub( originAABB.lowerBound, radius ); - originAABB.upperBound = b2Add( originAABB.upperBound, radius ); - - b2Vec2 p1 = b2AABB_Center( originAABB ); - b2Vec2 extension = b2AABB_Extents( originAABB ); - - // v is perpendicular to the segment. - b2Vec2 r = input->translation; - b2Vec2 v = b2CrossSV( 1.0f, r ); - b2Vec2 abs_v = b2Abs( v ); - - // Separating axis for segment (Gino, p80). - // |dot(v, p1 - c)| > dot(|v|, h) - - float maxFraction = input->maxFraction; - - // Build total box for the shape cast - b2Vec2 t = b2MulSV( maxFraction, input->translation ); - b2AABB totalAABB = { - b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ), - b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ), - }; - - b2ShapeCastInput subInput = *input; - uint32_t maskBits32 = (uint32_t)( maskBits & UINT32_MAX ); - - int32_t stack[b2_treeStackSize]; - int32_t stackCount = 0; - stack[stackCount++] = tree->root; - - while ( stackCount > 0 ) - { - int32_t nodeId = stack[--stackCount]; - if ( nodeId == B2_NULL_INDEX ) - { - continue; - } - - const b2TreeNode* node = tree->nodes + nodeId; - if ( b2AABB_Overlaps( node->aabb, totalAABB ) == false ) - { - continue; - } - - // Separating axis for segment (Gino, p80). - // |dot(v, p1 - c)| > dot(|v|, h) - // radius extension is added to the node in this case - b2Vec2 c = b2AABB_Center( node->aabb ); - b2Vec2 h = b2Add( b2AABB_Extents( node->aabb ), extension ); - float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); - float term2 = b2Dot( abs_v, h ); - if ( term2 < term1 ) - { - continue; - } - - if ( b2IsLeaf( node ) && ( node->leaf.categoryBits & maskBits32 ) != 0 ) - { - subInput.maxFraction = maxFraction; - - float value = callback( &subInput, nodeId, node->leaf.userData, context ); - - if ( value == 0.0f ) - { - // The client has terminated the ray cast. - return; - } - - if ( 0.0f < value && value < maxFraction ) - { - // Update segment bounding box. - maxFraction = value; - t = b2MulSV( maxFraction, input->translation ); - totalAABB.lowerBound = b2Min( originAABB.lowerBound, b2Add( originAABB.lowerBound, t ) ); - totalAABB.upperBound = b2Max( originAABB.upperBound, b2Add( originAABB.upperBound, t ) ); - } - } - else - { - B2_ASSERT( stackCount < b2_treeStackSize - 1 ); - if ( stackCount < b2_treeStackSize - 1 ) - { - // TODO_ERIN just put one node on the stack, continue on a child node - // TODO_ERIN test ordering children by nearest to ray origin - stack[stackCount++] = node->internal.child1; - stack[stackCount++] = node->internal.child2; - } - } - } -} - -// Median split heuristic -static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count ) -{ - // Handle trivial case - if ( count <= 2 ) - { - return count / 2; - } - - // todo SIMD? - b2Vec2 lowerBound = centers[0]; - b2Vec2 upperBound = centers[0]; - - for ( int32_t i = 1; i < count; ++i ) - { - lowerBound = b2Min( lowerBound, centers[i] ); - upperBound = b2Max( upperBound, centers[i] ); - } - - b2Vec2 d = b2Sub( upperBound, lowerBound ); - b2Vec2 c = { 0.5f * ( lowerBound.x + upperBound.x ), 0.5f * ( lowerBound.y + upperBound.y ) }; - - // Partition longest axis using the Hoare partition scheme - // https://en.wikipedia.org/wiki/Quicksort - // https://nicholasvadivelu.com/2021/01/11/array-partition/ - int32_t i1 = 0, i2 = count; - if ( d.x > d.y ) - { - float pivot = c.x; - - while ( i1 < i2 ) - { - while ( i1 < i2 && centers[i1].x < pivot ) - { - i1 += 1; - }; - - while ( i1 < i2 && centers[i2 - 1].x >= pivot ) - { - i2 -= 1; - }; - - if ( i1 < i2 ) - { - // Swap indices - { - int32_t temp = indices[i1]; - indices[i1] = indices[i2 - 1]; - indices[i2 - 1] = temp; - } - - // Swap centers - { - b2Vec2 temp = centers[i1]; - centers[i1] = centers[i2 - 1]; - centers[i2 - 1] = temp; - } - - i1 += 1; - i2 -= 1; - } - } - } - else - { - float pivot = c.y; - - while ( i1 < i2 ) - { - while ( i1 < i2 && centers[i1].y < pivot ) - { - i1 += 1; - }; - - while ( i1 < i2 && centers[i2 - 1].y >= pivot ) - { - i2 -= 1; - }; - - if ( i1 < i2 ) - { - // Swap indices - { - int32_t temp = indices[i1]; - indices[i1] = indices[i2 - 1]; - indices[i2 - 1] = temp; - } - - // Swap centers - { - b2Vec2 temp = centers[i1]; - centers[i1] = centers[i2 - 1]; - centers[i2 - 1] = temp; - } - - i1 += 1; - i2 -= 1; - } - } - } - B2_ASSERT( i1 == i2 ); - - if ( i1 > 0 && i1 < count ) - { - return i1; - } - else - { - return count / 2; - } -} - -// Temporary data used to track the rebuild of a tree node -struct b2RebuildItem -{ - int32_t nodeIndex; - int32_t childCount; - - // Leaf indices - int32_t startIndex; - int32_t splitIndex; - int32_t endIndex; -}; - -// Returns root node index -static int32_t b2BuildTree( b2DynamicTree* tree, int32_t leafCount ) -{ - b2TreeNode* nodes = tree->nodes; - int32_t* leafIndices = tree->leafIndices; - - if ( leafCount == 1 ) - { - nodes[leafIndices[0]].parent = B2_NULL_INDEX; - return leafIndices[0]; - } - - b2Vec2* leafCenters = tree->leafCenters; - - // todo large stack item - struct b2RebuildItem stack[b2_treeStackSize]; - int32_t top = 0; - - stack[0].nodeIndex = b2AllocateNode( tree ); - stack[0].childCount = -1; - stack[0].startIndex = 0; - stack[0].endIndex = leafCount; - stack[0].splitIndex = b2PartitionMid( leafIndices, leafCenters, leafCount ); - - while ( true ) - { - struct b2RebuildItem* item = stack + top; - - item->childCount += 1; - - if ( item->childCount == 2 ) - { - // This internal node has both children established - - if ( top == 0 ) - { - // all done - break; - } - - struct b2RebuildItem* parentItem = stack + ( top - 1 ); - b2TreeNode* parentNode = nodes + parentItem->nodeIndex; - - if ( parentItem->childCount == 0 ) - { - B2_ASSERT( parentNode->internal.child1 == B2_NULL_INDEX ); - parentNode->internal.child1 = item->nodeIndex; - } - else - { - B2_ASSERT( parentItem->childCount == 1 ); - B2_ASSERT( parentNode->internal.child2 == B2_NULL_INDEX ); - parentNode->internal.child2 = item->nodeIndex; - } - - b2TreeNode* node = nodes + item->nodeIndex; - - B2_ASSERT( node->parent == B2_NULL_INDEX ); - node->parent = parentItem->nodeIndex; - - B2_ASSERT( node->internal.child1 != B2_NULL_INDEX ); - B2_ASSERT( node->internal.child2 != B2_NULL_INDEX ); - b2TreeNode* child1 = nodes + node->internal.child1; - b2TreeNode* child2 = nodes + node->internal.child2; - - node->aabb = b2AABB_Union( child1->aabb, child2->aabb ); - node->height = 1 + b2MaxUInt16( child1->height, child2->height ); - - // Pop stack - top -= 1; - } - else - { - int32_t startIndex, endIndex; - if ( item->childCount == 0 ) - { - startIndex = item->startIndex; - endIndex = item->splitIndex; - } - else - { - B2_ASSERT( item->childCount == 1 ); - startIndex = item->splitIndex; - endIndex = item->endIndex; - } - - int32_t count = endIndex - startIndex; - - if ( count == 1 ) - { - int32_t childIndex = leafIndices[startIndex]; - b2TreeNode* node = nodes + item->nodeIndex; - - if ( item->childCount == 0 ) - { - B2_ASSERT( node->internal.child1 == B2_NULL_INDEX ); - node->internal.child1 = childIndex; - } - else - { - B2_ASSERT( item->childCount == 1 ); - B2_ASSERT( node->internal.child2 == B2_NULL_INDEX ); - node->internal.child2 = childIndex; - } - - b2TreeNode* childNode = nodes + childIndex; - B2_ASSERT( childNode->parent == B2_NULL_INDEX ); - childNode->parent = item->nodeIndex; - } - else - { - B2_ASSERT( count > 0 ); - B2_ASSERT( top < b2_treeStackSize ); - - top += 1; - struct b2RebuildItem* newItem = stack + top; - newItem->nodeIndex = b2AllocateNode( tree ); - newItem->childCount = -1; - newItem->startIndex = startIndex; - newItem->endIndex = endIndex; -#if B2_TREE_HEURISTIC == 0 - newItem->splitIndex = b2PartitionMid( leafIndices + startIndex, leafCenters + startIndex, count ); -#else - newItem->splitIndex = - b2PartitionSAH( leafIndices + startIndex, binIndices + startIndex, leafBoxes + startIndex, count ); -#endif - newItem->splitIndex += startIndex; - } - } - } - - b2TreeNode* rootNode = nodes + stack[0].nodeIndex; - B2_ASSERT( rootNode->parent == B2_NULL_INDEX ); - B2_ASSERT( rootNode->internal.child1 != B2_NULL_INDEX ); - B2_ASSERT( rootNode->internal.child2 != B2_NULL_INDEX ); - - b2TreeNode* child1 = nodes + rootNode->internal.child1; - b2TreeNode* child2 = nodes + rootNode->internal.child2; - - rootNode->aabb = b2AABB_Union( child1->aabb, child2->aabb ); - rootNode->height = 1 + b2MaxUInt16( child1->height, child2->height ); - - return stack[0].nodeIndex; -} - -// Not safe to access tree during this operation because it may grow -int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) -{ - int32_t proxyCount = tree->proxyCount; - if ( proxyCount == 0 ) - { - return 0; - } - - // Ensure capacity for rebuild space - if ( proxyCount > tree->rebuildCapacity ) - { - int32_t newCapacity = proxyCount + proxyCount / 2; - - b2Free( tree->leafIndices, tree->rebuildCapacity * sizeof( int32_t ) ); - tree->leafIndices = b2Alloc( newCapacity * sizeof( int32_t ) ); - -#if B2_TREE_HEURISTIC == 0 - b2Free( tree->leafCenters, tree->rebuildCapacity * sizeof( b2Vec2 ) ); - tree->leafCenters = b2Alloc( newCapacity * sizeof( b2Vec2 ) ); -#else - b2Free( tree->leafBoxes, tree->rebuildCapacity * sizeof( b2AABB ) ); - tree->leafBoxes = b2Alloc( newCapacity * sizeof( b2AABB ) ); - b2Free( tree->binIndices, tree->rebuildCapacity * sizeof( int32_t ) ); - tree->binIndices = b2Alloc( newCapacity * sizeof( int32_t ) ); -#endif - tree->rebuildCapacity = newCapacity; - } - - int32_t leafCount = 0; - int32_t stack[b2_treeStackSize]; - int32_t stackCount = 0; - - int32_t nodeIndex = tree->root; - b2TreeNode* nodes = tree->nodes; - b2TreeNode* node = nodes + nodeIndex; - - // These are the nodes that get sorted to rebuild the tree. - // I'm using indices because the node pool may grow during the build. - int32_t* leafIndices = tree->leafIndices; - -#if B2_TREE_HEURISTIC == 0 - b2Vec2* leafCenters = tree->leafCenters; -#else - b2AABB* leafBoxes = tree->leafBoxes; -#endif - - // Gather all proxy nodes that have grown and all internal nodes that haven't grown. Both are - // considered leaves in the tree rebuild. - // Free all internal nodes that have grown. - // todo use a node growth metric instead of simply enlarged to reduce rebuild size and frequency - // this should be weighed against b2_aabbMargin - while ( true ) - { - if ( node->height == 0 || ( ( node->flags & b2_enlargedNode) == 0 && fullBuild == false ) ) - { - leafIndices[leafCount] = nodeIndex; -#if B2_TREE_HEURISTIC == 0 - leafCenters[leafCount] = b2AABB_Center( node->aabb ); -#else - leafBoxes[leafCount] = node->aabb; -#endif - leafCount += 1; - - // Detach - node->parent = B2_NULL_INDEX; - } - else - { - int32_t doomedNodeIndex = nodeIndex; - - // Handle children - nodeIndex = node->internal.child1; - - B2_ASSERT( stackCount < b2_treeStackSize ); - if ( stackCount < b2_treeStackSize ) - { - stack[stackCount++] = node->internal.child2; - } - - node = nodes + nodeIndex; - - // Remove doomed node - b2FreeNode( tree, doomedNodeIndex ); - - continue; - } - - if ( stackCount == 0 ) - { - break; - } - - nodeIndex = stack[--stackCount]; - node = nodes + nodeIndex; - } - -#if B2_VALIDATE == 1 - int32_t capacity = tree->nodeCapacity; - for ( int32_t i = 0; i < capacity; ++i ) - { - if ( nodes[i].flags & b2_allocatedNode ) - { - B2_ASSERT( (nodes[i].flags & b2_enlargedNode) == 0 ); - } - } -#endif - - B2_ASSERT( leafCount <= proxyCount ); - - tree->root = b2BuildTree( tree, leafCount ); - - b2DynamicTree_Validate( tree ); - - return leafCount; -} - -#endif From 337788d3801b2a57b4dd777039030a4e3c6abf04 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 5 Oct 2024 22:45:13 -0700 Subject: [PATCH 05/11] tried Dirk's recursion --- src/dynamic_tree.c | 105 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 394af6e8f..28bf972a6 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -1153,6 +1153,108 @@ void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskB } } +#define B2_DIRK_RECURSE 0 + +#if B2_DIRK_RECURSE == 1 +b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, + b2TreeRayCastCallbackFcn* callback, void* context ) +{ + b2TraversalResult result = { 0 }; + + b2Vec2 p1 = input->origin; + b2Vec2 d = input->translation; + + b2Vec2 r = b2Normalize( d ); + + // v is perpendicular to the segment. + b2Vec2 v = b2CrossSV( 1.0f, r ); + b2Vec2 abs_v = b2Abs( v ); + + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + + float maxFraction = input->maxFraction; + + b2Vec2 p2 = b2MulAdd( p1, maxFraction, d ); + + // Build a bounding box for the segment. + b2AABB segmentAABB = { b2Min( p1, p2 ), b2Max( p1, p2 ) }; + + int32_t stack[b2_treeStackSize]; + int32_t stackCount = 0; + int32_t nodeId = tree->root; + + b2RayCastInput subInput = *input; + + while ( true ) + { + const b2TreeNode* node = tree->nodes + nodeId; + result.nodeVisits += 1; + + b2AABB nodeAABB = node->aabb; + + if ( ( node->categoryBits & maskBits ) != 0 && + b2AABB_Overlaps( nodeAABB, segmentAABB ) ) + { + // Separating axis for segment (Gino, p80). + // |dot(v, p1 - c)| > dot(|v|, h) + // radius extension is added to the node in this case + b2Vec2 c = b2AABB_Center( nodeAABB ); + b2Vec2 h = b2AABB_Extents( nodeAABB ); + float term1 = b2AbsFloat( b2Dot( v, b2Sub( p1, c ) ) ); + float term2 = b2Dot( abs_v, h ); + if ( term2 >= term1 ) + { + if ( b2IsLeaf( node ) ) + { + subInput.maxFraction = maxFraction; + + float value = callback( &subInput, nodeId, node->userData, context ); + result.leafVisits += 1; + + if ( value == 0.0f ) + { + // The client has terminated the ray cast. + return result; + } + + if ( 0.0f < value && value < maxFraction ) + { + // Update segment bounding box. + maxFraction = value; + p2 = b2MulAdd( p1, maxFraction, d ); + segmentAABB.lowerBound = b2Min( p1, p2 ); + segmentAABB.upperBound = b2Max( p1, p2 ); + } + } + else + { + B2_ASSERT( stackCount < b2_treeStackSize - 1 ); + if ( stackCount < b2_treeStackSize - 1 ) + { + stack[stackCount++] = node->child2; + } + + nodeId = node->child1; + + continue; + } + } + } + + if (stackCount == 0) + { + break; + } + + nodeId = stack[--stackCount]; + } + + return result; +} + +#else + b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, b2TreeRayCastCallbackFcn* callback, void* context ) { @@ -1251,6 +1353,9 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC return result; } +#endif + + void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, b2TreeShapeCastCallbackFcn* callback, void* context ) { From ec8d53888677502e3052b27d603bc8d6fce21311 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 5 Oct 2024 23:01:34 -0700 Subject: [PATCH 06/11] tried rebuilding static tree --- include/box2d/box2d.h | 3 +++ src/dynamic_tree.c | 9 +++------ src/world.c | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index af3eec88a..2d2b688c2 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -184,6 +184,9 @@ B2_API b2Counters b2World_GetCounters( b2WorldId worldId ); /// Dump memory stats to box2d_memory.txt B2_API void b2World_DumpMemoryStats( b2WorldId worldId ); +/// todo testing +B2_API void b2World_RebuildStaticTree( b2WorldId worldId ); + /** @} */ /** diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 28bf972a6..7f188aec8 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -1477,7 +1477,6 @@ static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count return count / 2; } - // todo SIMD? b2Vec2 lowerBound = centers[0]; b2Vec2 upperBound = centers[0]; @@ -1574,15 +1573,13 @@ static int32_t b2PartitionMid( int32_t* indices, b2Vec2* centers, int32_t count { return i1; } - else - { - return count / 2; - } + + return count / 2; } #else -#define B2_BIN_COUNT 8 +#define B2_BIN_COUNT 64 typedef struct b2TreeBin { diff --git a/src/world.c b/src/world.c index 1a89e193f..a6426ecc1 100644 --- a/src/world.c +++ b/src/world.c @@ -2524,6 +2524,20 @@ void b2World_Explode( b2WorldId worldId, const b2ExplosionDef* explosionDef ) &explosionContext ); } +void b2World_RebuildStaticTree(b2WorldId worldId) +{ + b2World* world = b2GetWorldFromId( worldId ); + B2_ASSERT( world->locked == false ); + if ( world->locked ) + { + return; + } + + b2DynamicTree* staticTree = world->broadPhase.trees + b2_staticBody; + b2DynamicTree_Rebuild( staticTree, true ); +} + + #if B2_VALIDATE // When validating islands ids I have to compare the root island // ids because islands are not merged until the next time step. From b5622e8d109f0a1c690b4f2e078c2d870c204f3b Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 6 Oct 2024 13:44:44 -0700 Subject: [PATCH 07/11] doubled cast performance by visiting closer nodes first added circle cast and overlap aabb benchmarks --- include/box2d/box2d.h | 34 ++++--- include/box2d/collision.h | 37 ++++---- samples/sample_benchmark.cpp | 179 +++++++++++++++++++++++++++++++---- samples/sample_collision.cpp | 2 +- src/dynamic_tree.c | 138 ++++++++++++++++++--------- src/world.c | 123 +++++++++++++++++------- 6 files changed, 385 insertions(+), 128 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index 2d2b688c2..328129250 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -51,19 +51,20 @@ B2_API b2SensorEvents b2World_GetSensorEvents( b2WorldId worldId ); B2_API b2ContactEvents b2World_GetContactEvents( b2WorldId worldId ); /// Overlap test for all shapes that *potentially* overlap the provided AABB -B2_API void b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ); +B2_API b2TreeStats b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, + void* context ); /// Overlap test for for all shapes that overlap the provided circle -B2_API void b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform, b2QueryFilter filter, - b2OverlapResultFcn* fcn, void* context ); +B2_API b2TreeStats b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform, + b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ); /// Overlap test for all shapes that overlap the provided capsule -B2_API void b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform, b2QueryFilter filter, - b2OverlapResultFcn* fcn, void* context ); +B2_API b2TreeStats b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform, + b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ); /// Overlap test for all shapes that overlap the provided polygon -B2_API void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform, b2QueryFilter filter, - b2OverlapResultFcn* fcn, void* context ); +B2_API b2TreeStats b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform, + b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ); /// Cast a ray into the world to collect shapes in the path of the ray. /// Your callback function controls whether you get the closest point, any point, or n-points. @@ -76,24 +77,27 @@ B2_API void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, /// @param fcn A user implemented callback function /// @param context A user context that is passed along to the callback function /// @return traversal performance counters -B2_API b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, - void* context ); +B2_API b2TreeStats b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, + b2CastResultFcn* fcn, void* context ); /// Cast a ray into the world to collect the closest hit. This is a convenience function. /// This is less general than b2World_CastRay() and does not allow for custom filtering. B2_API b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter ); /// Cast a circle through the world. Similar to a cast ray except that a circle is cast instead of a point. -B2_API void b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform, b2Vec2 translation, - b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); +/// @see b2World_CastRay +B2_API b2TreeStats b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform, + b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); /// Cast a capsule through the world. Similar to a cast ray except that a capsule is cast instead of a point. -B2_API void b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform, b2Vec2 translation, - b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); +/// @see b2World_CastRay +B2_API b2TreeStats b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform, + b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); /// Cast a polygon through the world. Similar to a cast ray except that a polygon is cast instead of a point. -B2_API void b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform, b2Vec2 translation, - b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); +/// @see b2World_CastRay +B2_API b2TreeStats b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform, + b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ); /// Enable/disable sleep. If your application does not need sleeping, you can gain some performance /// by disabling sleep completely at the world level. diff --git a/include/box2d/collision.h b/include/box2d/collision.h index 4b8ef8880..a9e39b80d 100644 --- a/include/box2d/collision.h +++ b/include/box2d/collision.h @@ -566,16 +566,16 @@ B2_API b2Manifold b2CollideSegmentAndPolygon( const b2Segment* segmentA, b2Trans b2Transform xfB ); /// Compute the contact manifold between a chain segment and a circle -B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA, - const b2Circle* circleB, b2Transform xfB ); +B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA, const b2Circle* circleB, + b2Transform xfB ); /// Compute the contact manifold between a chain segment and a capsule -B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA, - const b2Capsule* capsuleB, b2Transform xfB, b2DistanceCache* cache ); +B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA, const b2Capsule* capsuleB, + b2Transform xfB, b2DistanceCache* cache ); /// Compute the contact manifold between a chain segment and a rounded polygon -B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA, - const b2Polygon* polygonB, b2Transform xfB, b2DistanceCache* cache ); +B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA, const b2Polygon* polygonB, + b2Transform xfB, b2DistanceCache* cache ); /**@}*/ @@ -636,7 +636,7 @@ typedef struct b2TreeNode }; // 4 uint16_t height; // 2 - uint16_t flags; // 2 + uint16_t flags; // 2 } b2TreeNode; /// The dynamic tree structure. This should be considered private data. @@ -677,12 +677,15 @@ typedef struct b2DynamicTree int32_t rebuildCapacity; } b2DynamicTree; -/// These are performance results returned by BVH queries. -typedef struct b2TraversalResult +/// These are performance results returned by dynamic tree queries. +typedef struct b2TreeStats { + /// Number of internal nodes visited during the query int32_t nodeVisits; + + /// Number of leaf nodes visited during the query int32_t leafVisits; -} b2TraversalResult; +} b2TreeStats; /// Constructing the tree initializes the node pool. B2_API b2DynamicTree b2DynamicTree_Create( void ); @@ -707,8 +710,9 @@ B2_API void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2 typedef bool b2TreeQueryCallbackFcn( int32_t proxyId, int32_t userData, void* context ); /// Query an AABB for overlapping proxies. The callback class is called for each proxy that overlaps the supplied AABB. -B2_API void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, - void* context ); +/// @return performance data +B2_API b2TreeStats b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, + b2TreeQueryCallbackFcn* callback, void* context ); /// This function receives clipped ray cast input for a proxy. The function /// returns the new ray fraction. @@ -730,8 +734,8 @@ typedef float b2TreeRayCastCallbackFcn( const b2RayCastInput* input, int32_t pro /// @param callback a callback class that is called for each proxy that is hit by the ray /// @param context user context that is passed to the callback /// @return performance data -B2_API b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, - b2TreeRayCastCallbackFcn* callback, void* context ); +B2_API b2TreeStats b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, + b2TreeRayCastCallbackFcn* callback, void* context ); /// This function receives clipped ray cast input for a proxy. The function /// returns the new ray fraction. @@ -750,8 +754,9 @@ typedef float b2TreeShapeCastCallbackFcn( const b2ShapeCastInput* input, int32_t /// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;` /// @param callback a callback class that is called for each proxy that is hit by the shape /// @param context user context that is passed to the callback -B2_API void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, - b2TreeShapeCastCallbackFcn* callback, void* context ); +/// @return performance data +B2_API b2TreeStats b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, + b2TreeShapeCastCallbackFcn* callback, void* context ); /// Validate this tree. For testing. B2_API void b2DynamicTree_Validate( const b2DynamicTree* tree ); diff --git a/samples/sample_benchmark.cpp b/samples/sample_benchmark.cpp index 158153f66..f349d3dc8 100644 --- a/samples/sample_benchmark.cpp +++ b/samples/sample_benchmark.cpp @@ -1541,12 +1541,10 @@ class BenchmarkKinematic : public Sample static int sampleKinematic = RegisterSample( "Benchmark", "Kinematic", BenchmarkKinematic::Create ); -#if 1 - enum QueryType { e_rayCast, - e_shapeCast, + e_circleCast, e_overlap, }; @@ -1564,7 +1562,7 @@ class BenchmarkCast : public Sample settings.drawShapes = g_sampleDebug; } - m_queryType = e_rayCast; + m_queryType = e_circleCast; m_ratio = 5.0f; m_grid = 1.0f; m_fill = 0.1f; @@ -1572,6 +1570,9 @@ class BenchmarkCast : public Sample m_columnCount = g_sampleDebug ? 100 : 1000; m_minTime = 1e6f; m_drawIndex = 0; + m_topDown = false; + m_buildTime = 0.0f; + m_radius = 0.1f; g_seed = 1234; int sampleCount = g_sampleDebug ? 100 : 10000; @@ -1599,6 +1600,8 @@ class BenchmarkCast : public Sample b2WorldDef worldDef = b2DefaultWorldDef(); m_worldId = b2CreateWorld( &worldDef ); + b2Timer timer = b2CreateTimer(); + b2BodyDef bodyDef = b2DefaultBodyDef(); b2ShapeDef shapeDef = b2DefaultShapeDef(); @@ -1652,11 +1655,19 @@ class BenchmarkCast : public Sample y += m_grid; } + + if (m_topDown) + { + b2World_RebuildStaticTree( m_worldId ); + } + + m_buildTime = b2GetMilliseconds( &timer ); + m_minTime = 1e6f; } void UpdateUI() override { - float height = 220.0f; + float height = 240.0f; ImGui::SetNextWindowPos( ImVec2( 10.0f, g_camera.m_height - height - 50.0f ), ImGuiCond_Once ); ImGui::SetNextWindowSize( ImVec2( 200.0f, height ) ); @@ -1665,6 +1676,24 @@ class BenchmarkCast : public Sample ImGui::PushItemWidth( 100.0f ); bool changed = false; + + const char* queryTypes[] = { "Ray", "Circle", "Overlap" }; + int queryType = int( m_queryType ); + if (ImGui::Combo( "Query", &queryType, queryTypes, IM_ARRAYSIZE( queryTypes ) )) + { + m_queryType = QueryType( queryType ); + if ( m_queryType == e_overlap ) + { + m_radius = 5.0f; + } + else + { + m_radius = 0.1f; + } + + changed = true; + } + if ( ImGui::SliderInt( "rows", &m_rowCount, 0, 1000, "%d" ) ) { changed = true; @@ -1690,10 +1719,10 @@ class BenchmarkCast : public Sample changed = true; } - const char* queryTypes[] = { "Ray Cast", "Circle Cast", "Overlap" }; - int queryType = int( m_queryType ); - changed = changed || ImGui::Combo( "Query", &queryType, queryTypes, IM_ARRAYSIZE( queryTypes ) ); - m_queryType = QueryType( queryType ); + if ( ImGui::Checkbox( "top down", &m_topDown ) ) + { + changed = true; + } if ( ImGui::Button( "Draw Next" ) ) { @@ -1709,6 +1738,41 @@ class BenchmarkCast : public Sample } } + struct CastResult + { + b2Vec2 point; + float fraction; + bool hit; + }; + + static float CastCallback( b2ShapeId shapeId, b2Vec2 point, b2Vec2 normal, float fraction, void* context ) + { + CastResult* result = (CastResult*)context; + result->point = point; + result->fraction = fraction; + result->hit = true; + return fraction; + } + + struct OverlapResult + { + b2Vec2 points[32]; + int count; + }; + + static bool OverlapCallback( b2ShapeId shapeId, void* context ) + { + OverlapResult* result = (OverlapResult*)context; + if (result->count < 32) + { + b2AABB aabb = b2Shape_GetAABB( shapeId ); + result->points[result->count] = b2AABB_Center( aabb ); + result->count += 1; + } + + return true; + } + void Step( Settings& settings ) override { Sample::Step( settings ); @@ -1732,12 +1796,6 @@ class BenchmarkCast : public Sample b2Vec2 origin = m_origins[i]; b2Vec2 translation = m_translations[i]; - // todo for breakpoint - if (i == 2) - { - i += 0; - } - b2RayResult result = b2World_CastRayClosest( m_worldId, origin, translation, filter ); if (i == m_drawIndex) @@ -1764,6 +1822,91 @@ class BenchmarkCast : public Sample g_draw.DrawPoint( drawResult.point, 5.0f, b2_colorWhite ); } } + else if ( m_queryType == e_circleCast ) + { + b2Timer timer = b2CreateTimer(); + + b2Circle circle = { { 0.0f, 0.0f }, m_radius }; + CastResult drawResult = {}; + + for ( int i = 0; i < sampleCount; ++i ) + { + b2Transform origin = { m_origins[i], { 1.0f, 0.0f } }; + b2Vec2 translation = m_translations[i]; + + CastResult result; + b2TreeStats traversalResult = + b2World_CastCircle( m_worldId, &circle, origin, translation, filter, CastCallback, &result ); + + if (i == m_drawIndex) + { + drawResult = result; + } + + nodeVisits += traversalResult.nodeVisits; + leafVisits += traversalResult.leafVisits; + hitCount += result.hit ? 1 : 0; + } + + ms = b2GetMilliseconds( &timer ); + + m_minTime = b2MinFloat( m_minTime, ms ); + + b2Vec2 p1 = m_origins[m_drawIndex]; + b2Vec2 p2 = p1 + m_translations[m_drawIndex]; + g_draw.DrawSegment( p1, p2, b2_colorWhite ); + g_draw.DrawPoint( p1, 5.0f, b2_colorGreen ); + g_draw.DrawPoint( p2, 5.0f, b2_colorRed ); + if (drawResult.hit) + { + b2Vec2 t = b2Lerp( p1, p2, drawResult.fraction ); + g_draw.DrawCircle( t, m_radius, b2_colorWhite ); + g_draw.DrawPoint( drawResult.point, 5.0f, b2_colorWhite ); + } + } + else if ( m_queryType == e_overlap ) + { + b2Timer timer = b2CreateTimer(); + + OverlapResult drawResult = {}; + b2Vec2 extent = { m_radius, m_radius }; + OverlapResult result = {}; + + for ( int i = 0; i < sampleCount; ++i ) + { + b2Vec2 origin = m_origins[i]; + b2AABB aabb = { origin - extent, origin + extent }; + + result.count = 0; + b2TreeStats traversalResult = b2World_OverlapAABB( m_worldId, aabb, filter, OverlapCallback, &result ); + + if (i == m_drawIndex) + { + drawResult = result; + } + + nodeVisits += traversalResult.nodeVisits; + leafVisits += traversalResult.leafVisits; + hitCount += result.count; + } + + ms = b2GetMilliseconds( &timer ); + + m_minTime = b2MinFloat( m_minTime, ms ); + + b2Vec2 origin = m_origins[m_drawIndex]; + b2AABB aabb = { origin - extent, origin + extent }; + + g_draw.DrawAABB( aabb, b2_colorWhite ); + + for (int i = 0; i < drawResult.count; ++i) + { + g_draw.DrawPoint( drawResult.points[i], 5.0f, b2_colorHotPink ); + } + } + + g_draw.DrawString( 5, m_textLine, "build time ms = %g", m_buildTime ); + m_textLine += m_textIncrement; g_draw.DrawString( 5, m_textLine, "hit count = %d, node visits = %d, leaf visits = %d", hitCount, nodeVisits, leafVisits ); m_textLine += m_textIncrement; @@ -1775,7 +1918,7 @@ class BenchmarkCast : public Sample m_textLine += m_textIncrement; float aveRayCost = 1000.0f * m_minTime / float( sampleCount ); - g_draw.DrawString( 5, m_textLine, "average ray us = %.2f", aveRayCost ); + g_draw.DrawString( 5, m_textLine, "average us = %.2f", aveRayCost ); m_textLine += m_textIncrement; } @@ -1789,14 +1932,16 @@ class BenchmarkCast : public Sample std::vector m_origins; std::vector m_translations; float m_minTime; + float m_buildTime; int m_rowCount, m_columnCount; int m_updateType; int m_drawIndex; + float m_radius; float m_fill; float m_ratio; float m_grid; + bool m_topDown; }; static int sampleCast = RegisterSample( "Benchmark", "Cast", BenchmarkCast::Create ); -#endif diff --git a/samples/sample_collision.cpp b/samples/sample_collision.cpp index 74760010b..8128f185b 100644 --- a/samples/sample_collision.cpp +++ b/samples/sample_collision.cpp @@ -691,7 +691,7 @@ class DynamicTree : public Sample if ( m_rayDrag ) { b2RayCastInput input = { m_startPoint, b2Sub( m_endPoint, m_startPoint ), 1.0f }; - b2TraversalResult result = b2DynamicTree_RayCast( &m_tree, &input, b2_defaultMaskBits, RayCallback, this ); + b2TreeStats result = b2DynamicTree_RayCast( &m_tree, &input, b2_defaultMaskBits, RayCallback, this ); g_draw.DrawSegment( m_startPoint, m_endPoint, b2_colorWhite ); g_draw.DrawPoint( m_startPoint, 5.0f, b2_colorGreen ); diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index 7f188aec8..05ba86272 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -356,8 +356,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( C->height, F->height ); C->categoryBits = B->categoryBits | G->categoryBits; A->categoryBits = C->categoryBits | F->categoryBits; - C->flags |= (B->flags | G->flags) & b2_enlargedNode; - A->flags |= (C->flags | F->flags) & b2_enlargedNode; + C->flags |= ( B->flags | G->flags ) & b2_enlargedNode; + A->flags |= ( C->flags | F->flags ) & b2_enlargedNode; } else { @@ -374,8 +374,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( C->height, G->height ); C->categoryBits = B->categoryBits | F->categoryBits; A->categoryBits = C->categoryBits | G->categoryBits; - C->flags |= (B->flags | F->flags) & b2_enlargedNode; - A->flags |= (C->flags | G->flags) & b2_enlargedNode; + C->flags |= ( B->flags | F->flags ) & b2_enlargedNode; + A->flags |= ( C->flags | G->flags ) & b2_enlargedNode; } } else if ( C->height == 0 ) @@ -422,8 +422,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( B->height, D->height ); B->categoryBits = C->categoryBits | E->categoryBits; A->categoryBits = B->categoryBits | D->categoryBits; - B->flags |= (C->flags | E->flags) & b2_enlargedNode; - A->flags |= (B->flags | D->flags) & b2_enlargedNode; + B->flags |= ( C->flags | E->flags ) & b2_enlargedNode; + A->flags |= ( B->flags | D->flags ) & b2_enlargedNode; } else { @@ -439,8 +439,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( B->height, E->height ); B->categoryBits = C->categoryBits | D->categoryBits; A->categoryBits = B->categoryBits | E->categoryBits; - B->flags |= (C->flags | D->flags) & b2_enlargedNode; - A->flags |= (B->flags | E->flags) & b2_enlargedNode; + B->flags |= ( C->flags | D->flags ) & b2_enlargedNode; + A->flags |= ( B->flags | E->flags ) & b2_enlargedNode; } } else @@ -520,8 +520,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( C->height, F->height ); C->categoryBits = B->categoryBits | G->categoryBits; A->categoryBits = C->categoryBits | F->categoryBits; - C->flags |= (B->flags | G->flags) & b2_enlargedNode; - A->flags |= (C->flags | F->flags) & b2_enlargedNode; + C->flags |= ( B->flags | G->flags ) & b2_enlargedNode; + A->flags |= ( C->flags | F->flags ) & b2_enlargedNode; break; case b2_rotateBG: @@ -536,8 +536,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( C->height, G->height ); C->categoryBits = B->categoryBits | F->categoryBits; A->categoryBits = C->categoryBits | G->categoryBits; - C->flags |= (B->flags | F->flags) & b2_enlargedNode; - A->flags |= (C->flags | G->flags) & b2_enlargedNode; + C->flags |= ( B->flags | F->flags ) & b2_enlargedNode; + A->flags |= ( C->flags | G->flags ) & b2_enlargedNode; break; case b2_rotateCD: @@ -552,8 +552,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( B->height, D->height ); B->categoryBits = C->categoryBits | E->categoryBits; A->categoryBits = B->categoryBits | D->categoryBits; - B->flags |= (C->flags | E->flags) & b2_enlargedNode; - A->flags |= (B->flags | D->flags) & b2_enlargedNode; + B->flags |= ( C->flags | E->flags ) & b2_enlargedNode; + A->flags |= ( B->flags | D->flags ) & b2_enlargedNode; break; case b2_rotateCE: @@ -568,8 +568,8 @@ static void b2RotateNodes( b2DynamicTree* tree, int32_t iA ) A->height = 1 + b2MaxUInt16( B->height, E->height ); B->categoryBits = C->categoryBits | D->categoryBits; A->categoryBits = B->categoryBits | E->categoryBits; - B->flags |= (C->flags | D->flags) & b2_enlargedNode; - A->flags |= (B->flags | E->flags) & b2_enlargedNode; + B->flags |= ( C->flags | D->flags ) & b2_enlargedNode; + A->flags |= ( B->flags | E->flags ) & b2_enlargedNode; break; default: @@ -644,7 +644,7 @@ static void b2InsertLeaf( b2DynamicTree* tree, int32_t leaf, bool shouldRotate ) nodes[index].aabb = b2AABB_Union( nodes[child1].aabb, nodes[child2].aabb ); nodes[index].categoryBits = nodes[child1].categoryBits | nodes[child2].categoryBits; nodes[index].height = 1 + b2MaxUInt16( nodes[child1].height, nodes[child2].height ); - nodes[index].flags |= (nodes[child1].flags | nodes[child2].flags) & b2_enlargedNode; + nodes[index].flags |= ( nodes[child1].flags | nodes[child2].flags ) & b2_enlargedNode; if ( shouldRotate ) { @@ -910,7 +910,7 @@ static void b2ValidateStructure( const b2DynamicTree* tree, int32_t index ) B2_ASSERT( tree->nodes[child1].parent == index ); B2_ASSERT( tree->nodes[child2].parent == index ); - if ( (tree->nodes[child1].flags | tree->nodes[child2].flags) & b2_enlargedNode ) + if ( ( tree->nodes[child1].flags | tree->nodes[child2].flags ) & b2_enlargedNode ) { B2_ASSERT( node->flags & b2_enlargedNode ); } @@ -1112,9 +1112,16 @@ int b2DynamicTree_GetByteCount( const b2DynamicTree* tree ) return (int)size; } -void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback, - void* context ) +b2TreeStats b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, + b2TreeQueryCallbackFcn* callback, void* context ) { + b2TreeStats result = { 0 }; + + if (tree->nodeCount == 0) + { + return result; + } + int32_t stack[b2_treeStackSize]; int32_t stackCount = 0; stack[stackCount++] = tree->root; @@ -1124,10 +1131,13 @@ void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskB int32_t nodeId = stack[--stackCount]; if ( nodeId == B2_NULL_INDEX ) { + // todo huh? + B2_ASSERT( false ); continue; } const b2TreeNode* node = tree->nodes + nodeId; + result.nodeVisits += 1; if ( b2AABB_Overlaps( node->aabb, aabb ) && ( node->categoryBits & maskBits ) != 0 ) { @@ -1135,9 +1145,11 @@ void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskB { // callback to user code with proxy id bool proceed = callback( nodeId, node->userData, context ); + result.leafVisits += 1; + if ( proceed == false ) { - return; + return result; } } else @@ -1151,6 +1163,8 @@ void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskB } } } + + return result; } #define B2_DIRK_RECURSE 0 @@ -1193,8 +1207,7 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC b2AABB nodeAABB = node->aabb; - if ( ( node->categoryBits & maskBits ) != 0 && - b2AABB_Overlaps( nodeAABB, segmentAABB ) ) + if ( ( node->categoryBits & maskBits ) != 0 && b2AABB_Overlaps( nodeAABB, segmentAABB ) ) { // Separating axis for segment (Gino, p80). // |dot(v, p1 - c)| > dot(|v|, h) @@ -1242,7 +1255,7 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC } } - if (stackCount == 0) + if ( stackCount == 0 ) { break; } @@ -1255,10 +1268,15 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC #else -b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, +b2TreeStats b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits, b2TreeRayCastCallbackFcn* callback, void* context ) { - b2TraversalResult result = { 0 }; + b2TreeStats result = { 0 }; + + if (tree->nodeCount == 0) + { + return result; + } b2Vec2 p1 = input->origin; b2Vec2 d = input->translation; @@ -1283,6 +1301,8 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC int32_t stackCount = 0; stack[stackCount++] = tree->root; + const b2TreeNode* nodes = tree->nodes; + b2RayCastInput subInput = *input; while ( stackCount > 0 ) @@ -1290,10 +1310,12 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC int32_t nodeId = stack[--stackCount]; if ( nodeId == B2_NULL_INDEX ) { + // todo is this possible? + B2_ASSERT( false ); continue; } - const b2TreeNode* node = tree->nodes + nodeId; + const b2TreeNode* node = nodes + nodeId; result.nodeVisits += 1; b2AABB nodeAABB = node->aabb; @@ -1342,10 +1364,18 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC B2_ASSERT( stackCount < b2_treeStackSize - 1 ); if ( stackCount < b2_treeStackSize - 1 ) { - // TODO_ERIN just put one node on the stack, continue on a child node - // TODO_ERIN test ordering children by nearest to ray origin - stack[stackCount++] = node->child1; - stack[stackCount++] = node->child2; + b2Vec2 c1 = b2AABB_Center( nodes[node->child1].aabb ); + b2Vec2 c2 = b2AABB_Center( nodes[node->child2].aabb ); + if ( b2DistanceSquared( c1, p1 ) < b2DistanceSquared( c2, p1 ) ) + { + stack[stackCount++] = node->child2; + stack[stackCount++] = node->child1; + } + else + { + stack[stackCount++] = node->child1; + stack[stackCount++] = node->child2; + } } } } @@ -1355,13 +1385,14 @@ b2TraversalResult b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayC #endif - -void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, - b2TreeShapeCastCallbackFcn* callback, void* context ) +b2TreeStats b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits, + b2TreeShapeCastCallbackFcn* callback, void* context ) { - if ( input->count == 0 ) + b2TreeStats stats = { 0 }; + + if ( tree->nodeCount == 0 || input->count == 0 ) { - return; + return stats; } b2AABB originAABB = { input->points[0], input->points[0] }; @@ -1397,6 +1428,7 @@ void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* }; b2ShapeCastInput subInput = *input; + const b2TreeNode* nodes = tree->nodes; int32_t stack[b2_treeStackSize]; int32_t stackCount = 0; @@ -1407,11 +1439,15 @@ void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* int32_t nodeId = stack[--stackCount]; if ( nodeId == B2_NULL_INDEX ) { + // todo is this possible? + B2_ASSERT( false ); continue; } - const b2TreeNode* node = tree->nodes + nodeId; - if ( b2AABB_Overlaps( node->aabb, totalAABB ) == false || ( node->categoryBits & maskBits ) == 0 ) + const b2TreeNode* node = nodes + nodeId; + stats.nodeVisits += 1; + + if ( ( node->categoryBits & maskBits ) == 0 || b2AABB_Overlaps( node->aabb, totalAABB ) == false ) { continue; } @@ -1433,11 +1469,12 @@ void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* subInput.maxFraction = maxFraction; float value = callback( &subInput, nodeId, node->userData, context ); + stats.leafVisits += 1; if ( value == 0.0f ) { // The client has terminated the ray cast. - return; + return stats; } if ( 0.0f < value && value < maxFraction ) @@ -1454,13 +1491,23 @@ void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* B2_ASSERT( stackCount < b2_treeStackSize - 1 ); if ( stackCount < b2_treeStackSize - 1 ) { - // TODO_ERIN just put one node on the stack, continue on a child node - // TODO_ERIN test ordering children by nearest to ray origin - stack[stackCount++] = node->child1; - stack[stackCount++] = node->child2; + b2Vec2 c1 = b2AABB_Center( nodes[node->child1].aabb ); + b2Vec2 c2 = b2AABB_Center( nodes[node->child2].aabb ); + if ( b2DistanceSquared( c1, p1 ) < b2DistanceSquared( c2, p1 ) ) + { + stack[stackCount++] = node->child2; + stack[stackCount++] = node->child1; + } + else + { + stack[stackCount++] = node->child1; + stack[stackCount++] = node->child2; + } } } } + + return stats; } // Median split == 0, Surface area heuristic == 1 @@ -1968,7 +2015,7 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) // this should be weighed against b2_aabbMargin while ( true ) { - if ( node->height == 0 || (( node->flags & b2_enlargedNode) == 0 && fullBuild == false ) ) + if ( node->height == 0 || ( ( node->flags & b2_enlargedNode ) == 0 && fullBuild == false ) ) { leafIndices[leafCount] = nodeIndex; #if B2_TREE_HEURISTIC == 0 @@ -2017,7 +2064,7 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) { if ( nodes[i].flags & b2_allocatedNode ) { - B2_ASSERT( (nodes[i].flags & b2_enlargedNode) == 0 ); + B2_ASSERT( ( nodes[i].flags & b2_enlargedNode ) == 0 ); } } #endif @@ -2030,4 +2077,3 @@ int32_t b2DynamicTree_Rebuild( b2DynamicTree* tree, bool fullBuild ) return leafCount; } - diff --git a/src/world.c b/src/world.c index a6426ecc1..f9226810c 100644 --- a/src/world.c +++ b/src/world.c @@ -1884,13 +1884,16 @@ static bool TreeQueryCallback( int proxyId, int shapeId, void* context ) return result; } -void b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ) +b2TreeStats b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, + void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2AABB_IsValid( aabb ) ); @@ -1899,8 +1902,14 @@ void b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeQueryCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeQueryCallback, &worldContext ); + + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; } + + return treeStats; } typedef struct WorldOverlapContext @@ -1953,14 +1962,16 @@ static bool TreeOverlapCallback( int proxyId, int shapeId, void* context ) return result; } -void b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform, b2QueryFilter filter, +b2TreeStats b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( transform.p ) ); @@ -1973,18 +1984,26 @@ void b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transfo for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext ); + + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; } + + return treeStats; } -void b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform, b2QueryFilter filter, +b2TreeStats b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( transform.p ) ); @@ -1997,18 +2016,26 @@ void b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Tran for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext ); + + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; } + + return treeStats; } -void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform, b2QueryFilter filter, +b2TreeStats b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( transform.p ) ); @@ -2021,8 +2048,14 @@ void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Tran for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_Query( world->broadPhase.trees + i, aabb, filter.maskBits, TreeOverlapCallback, &worldContext ); + + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; } + + return treeStats; } typedef struct WorldRayCastContext @@ -2065,16 +2098,16 @@ static float RayCastCallback( const b2RayCastInput* input, int proxyId, int shap return input->maxFraction; } -b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, +b2TreeStats b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ) { - b2TraversalResult traversalResult = { 0 }; + b2TreeStats treeStats = { 0 }; b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return traversalResult; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( origin ) ); @@ -2086,19 +2119,19 @@ b2TraversalResult b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 tran for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2TraversalResult treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); - traversalResult.nodeVisits += treeResult.nodeVisits; - traversalResult.leafVisits += treeResult.leafVisits; + b2TreeStats treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { - return traversalResult; + return treeStats; } input.maxFraction = worldContext.fraction; } - return traversalResult; + return treeStats; } // This callback finds the closest hit. This is the most common callback used in games. @@ -2132,7 +2165,7 @@ b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 tra for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2TraversalResult treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); + b2TreeStats treeResult = b2DynamicTree_RayCast( world->broadPhase.trees + i, &input, filter.maskBits, RayCastCallback, &worldContext ); result.nodeVisits += treeResult.nodeVisits; result.leafVisits += treeResult.leafVisits; @@ -2165,6 +2198,7 @@ static float ShapeCastCallback( const b2ShapeCastInput* input, int proxyId, int b2Body* body = b2BodyArray_Get( &world->bodies, shape->bodyId ); b2Transform transform = b2GetBodyTransformQuick( world, body ); + b2CastOutput output = b2ShapeCastShape( input, shape, transform ); if ( output.hit ) @@ -2178,14 +2212,16 @@ static float ShapeCastCallback( const b2ShapeCastInput* input, int proxyId, int return input->maxFraction; } -void b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform, b2Vec2 translation, +b2TreeStats b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( originTransform.p ) ); @@ -2203,25 +2239,33 @@ void b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext ); + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { - return; + return treeStats; } input.maxFraction = worldContext.fraction; } + + return treeStats; } -void b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform, b2Vec2 translation, +b2TreeStats b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform, + b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( originTransform.p ) ); @@ -2240,25 +2284,33 @@ void b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transfo for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext ); + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { - return; + return treeStats; } input.maxFraction = worldContext.fraction; } + + return treeStats; } -void b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform, b2Vec2 translation, +b2TreeStats b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform, + b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context ) { + b2TreeStats treeStats = { 0 }; + b2World* world = b2GetWorldFromId( worldId ); B2_ASSERT( world->locked == false ); if ( world->locked ) { - return; + return treeStats; } B2_ASSERT( b2Vec2_IsValid( originTransform.p ) ); @@ -2279,15 +2331,20 @@ void b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transfo for ( int i = 0; i < b2_bodyTypeCount; ++i ) { - b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext ); + b2TreeStats treeResult = + b2DynamicTree_ShapeCast( world->broadPhase.trees + i, &input, filter.maskBits, ShapeCastCallback, &worldContext ); + treeStats.nodeVisits += treeResult.nodeVisits; + treeStats.leafVisits += treeResult.leafVisits; if ( worldContext.fraction == 0.0f ) { - return; + return treeStats; } input.maxFraction = worldContext.fraction; } + + return treeStats; } #if 0 From 170a3fdeb5cb2da9b7fe006a86664f68d6cf6ca1 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 6 Oct 2024 13:48:15 -0700 Subject: [PATCH 08/11] fix for 32-bit Neon from #814 --- src/contact_solver.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/contact_solver.c b/src/contact_solver.c index 9c9d547fb..b89bca967 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -667,12 +667,26 @@ static inline void b2StoreW( float32_t* data, b2FloatW a ) static inline b2FloatW b2UnpackLoW( b2FloatW a, b2FloatW b ) { +#if defined( __aarch64__ ) return vzip1q_f32( a, b ); +#else + float32x2_t a1 = vget_low_f32( a ); + float32x2_t b1 = vget_low_f32( b ); + float32x2x2_t result = vzip_f32( a1, b1 ); + return vcombine_f32( result.val[0], result.val[1] ); +#endif } static inline b2FloatW b2UnpackHiW( b2FloatW a, b2FloatW b ) { +#if defined( __aarch64__ ) return vzip2q_f32( a, b ); +#else + float32x2_t a1 = vget_high_f32( a ); + float32x2_t b1 = vget_high_f32( b ); + float32x2x2_t result = vzip_f32( a1, b1 ); + return vcombine_f32( result.val[0], result.val[1] ); +#endif } #elif defined( B2_SIMD_SSE2 ) From 30096e51fef6f1f248a9c62c57a5394a4094e29f Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 6 Oct 2024 14:09:16 -0700 Subject: [PATCH 09/11] Fixes for 32-bit Windows #815 --- samples/main.cpp | 4 ++-- src/core.h | 2 +- src/timer.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/main.cpp b/samples/main.cpp index d967f5664..9e4ad4133 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -68,7 +68,7 @@ void* AllocFcn( uint32_t size, int32_t alignment ) size_t sizeAligned = ( ( size - 1 ) | ( alignment - 1 ) ) + 1; assert( ( sizeAligned & ( alignment - 1 ) ) == 0 ); -#if defined( _WIN64 ) +#if defined( _WIN64 ) || defined( _WIN32 ) void* ptr = _aligned_malloc( sizeAligned, alignment ); #else void* ptr = aligned_alloc( alignment, sizeAligned ); @@ -79,7 +79,7 @@ void* AllocFcn( uint32_t size, int32_t alignment ) void FreeFcn( void* mem ) { -#if defined( _WIN64 ) +#if defined( _WIN64 ) || defined( _WIN32 ) _aligned_free( mem ); #else free( mem ); diff --git a/src/core.h b/src/core.h index f0e64e126..c7d755e9e 100644 --- a/src/core.h +++ b/src/core.h @@ -25,7 +25,7 @@ #endif // Define platform -#if defined( _WIN64 ) +#if defined(_WIN32) || defined(_WIN64) #define B2_PLATFORM_WINDOWS #elif defined( __ANDROID__ ) #define B2_PLATFORM_ANDROID diff --git a/src/timer.c b/src/timer.c index 0aecd57ba..fe2c53d6f 100644 --- a/src/timer.c +++ b/src/timer.c @@ -193,7 +193,7 @@ void b2Yield() uint32_t b2Hash( uint32_t hash, const uint8_t* data, int count ) { uint32_t result = hash; - for ( size_t i = 0; i < count; i++ ) + for ( int i = 0; i < count; i++ ) { result = ( result << 5 ) + result + data[i]; } From bb0fa3b6166cc9872e378045117ffa1825cf6e21 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 6 Oct 2024 14:39:11 -0700 Subject: [PATCH 10/11] mac fix --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 8e8701d8f..030fb7282 100644 --- a/src/core.c +++ b/src/core.c @@ -78,7 +78,7 @@ void b2SetAllocator( b2AllocFcn* allocFcn, b2FreeFcn* freeFcn ) } // Use 32 byte alignment for everything. Works with 256bit SIMD. -#define B2_ALIGNMENT 64 +#define B2_ALIGNMENT 32 void* b2Alloc( int size ) { From b0cb7264da1af8534a44a965bbbc4a642061058d Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 6 Oct 2024 14:44:24 -0700 Subject: [PATCH 11/11] build samples on windows --- .github/workflows/build.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 621087b62..a77b7564e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -75,7 +75,6 @@ jobs: arch: x64 - name: Configure CMake - # enkiTS is failing ASAN on windows run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF # run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF @@ -85,4 +84,22 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build run: ./bin/${{env.BUILD_TYPE}}/test + + samples-windows: + name: windows + runs-on: windows-latest + steps: + + - uses: actions/checkout@v4 + + - name: Setup MSVC dev command prompt + uses: TheMrMilchmann/setup-msvc-dev@v3 + with: + arch: x64 + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=Release -DBOX2D_SAMPLES=ON -DBUILD_SHARED_LIBS=OFF -DBOX2D_UNIT_TESTS=OFF + + - name: Build + run: cmake --build ${{github.workspace}}/build --config Release \ No newline at end of file