-
-
Notifications
You must be signed in to change notification settings - Fork 472
Bunch of microoptimizations #1600
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 5 commits
23e6928
ff3fac7
b846569
39c3f31
d9da42f
525d793
6f1c24d
95534b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,37 +74,42 @@ void CCoverManager::compute_static_cover() | |
{ | ||
clear(); | ||
xr_delete(m_covers); | ||
m_covers = xr_new<CPointQuadTree>( | ||
ai().level_graph().header().box(), ai().level_graph().header().cell_size() * .5f, 8 * 65536, 4 * 65536); | ||
m_temp.resize(ai().level_graph().header().vertex_count()); | ||
|
||
|
||
const CLevelGraph& graph = ai().level_graph(); | ||
const u32 levelVertexCount = ai().level_graph().header().vertex_count(); | ||
const LevelGraph::CHeader& levelGraphHeader = graph.header(); | ||
const u32 levelVertexCount = levelGraphHeader.vertex_count(); | ||
|
||
m_covers = xr_new<CPointQuadTree>(levelGraphHeader.box(), levelGraphHeader.cell_size() * .5f, 8 * 65536, 4 * 65536); | ||
|
||
// avoiding extra allocations with a static storage for m_covers | ||
static xr_vector<std::optional<CCoverPoint>> quadTreeStaticStorage; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if this is a good trade-off between allocs/RSS. It would be nice to hear an opinion on this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not recommended to use global/static objects which allocate memory dynamically. |
||
quadTreeStaticStorage.resize(levelVertexCount); | ||
m_temp.resize(levelVertexCount); | ||
|
||
xr_parallel_for(TaskRange<u32>(0, levelVertexCount), [&](const TaskRange<u32>& range) | ||
{ | ||
for (u32 i = range.begin(); i != range.end(); ++i) | ||
{ | ||
const CLevelGraph::CLevelVertex& vertex = *graph.vertex(i); | ||
if (vertex.high_cover(0) + vertex.high_cover(1) + vertex.high_cover(2) + vertex.high_cover(3)) | ||
{ | ||
m_temp[i] = edge_vertex(i); | ||
continue; | ||
} | ||
m_temp[i] = false; | ||
quadTreeStaticStorage[i] = std::nullopt; | ||
|
||
if (vertex.low_cover(0) + vertex.low_cover(1) + vertex.low_cover(2) + vertex.low_cover(3)) | ||
const CLevelGraph::CLevelVertex& vertex = *graph.vertex(i); | ||
const int highCover = vertex.high_cover(0) + vertex.high_cover(1) + vertex.high_cover(2) + vertex.high_cover(3); | ||
const int lowCover = vertex.low_cover(0) + vertex.low_cover(1) + vertex.low_cover(2) + vertex.low_cover(3); | ||
|
||
if (highCover || lowCover) | ||
{ | ||
m_temp[i] = edge_vertex(i); | ||
continue; | ||
if (m_temp[i] && critical_cover(i)) | ||
{ | ||
quadTreeStaticStorage[i] = CCoverPoint(graph.vertex_position(graph.vertex(i)), i); | ||
} | ||
} | ||
|
||
m_temp[i] = false; | ||
} | ||
}); | ||
|
||
for (u32 i = 0; i < levelVertexCount; ++i) | ||
if (m_temp[i] && critical_cover(i)) | ||
m_covers->insert(xr_new<CCoverPoint>(ai().level_graph().vertex_position(ai().level_graph().vertex(i)), i)); | ||
for (auto& p : quadTreeStaticStorage) | ||
if (p.has_value()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
m_covers->insert(&p.value()); | ||
|
||
VERIFY(!m_smart_covers_storage); | ||
m_smart_covers_storage = xr_new<smart_cover::storage>(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,9 @@ class CCoverManager | |
|
||
protected: | ||
CPointQuadTree* m_covers; | ||
xr_vector<bool> m_temp; | ||
// vector<bool> is not applicable for `m_temp` | ||
// since it is filled in parallel_for (https://timsong-cpp.github.io/cppwp/container.requirements.dataraces). | ||
xr_vector<int> m_temp; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose it wasn't thread-safe before |
||
mutable PointVector m_nearest; | ||
|
||
private: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
#include "xrAICore/Navigation/level_graph.h" | ||
#include "space_restrictor.h" | ||
#include "xrAICore/Navigation/graph_engine.h" | ||
#include "xrCore/Threading/ParallelFor.hpp" | ||
|
||
struct CBorderMergePredicate | ||
{ | ||
|
@@ -79,10 +80,45 @@ void CSpaceRestrictionShape::fill_shape(const CCF_Shape::shape_def& shape) | |
} | ||
default: NODEFAULT; | ||
} | ||
ai().level_graph().iterate_vertices(start, dest, CBorderMergePredicate(this)); | ||
|
||
CLevelGraph& graph = ai().level_graph(); | ||
|
||
std::mutex mergeMutex; | ||
|
||
auto [begin, end] = graph.get_range(start, dest); | ||
xr_parallel_for(TaskRange(begin, end), [this, &mergeMutex, &graph] (const auto& range) | ||
{ | ||
xr_vector<u32> m_border_chunk; | ||
m_border_chunk.reserve(range.size()); | ||
for (const auto& vertex : range) | ||
{ | ||
if (inside(graph.vertex_id(&vertex), true) && | ||
!inside(graph.vertex_id(&vertex), false)) | ||
m_border_chunk.push_back(graph.vertex_id(&vertex)); | ||
} | ||
std::lock_guard lock(mergeMutex); | ||
if (m_border.capacity() < m_border.size() + m_border_chunk.size()) | ||
m_border.reserve(m_border.size() + m_border_chunk.size()); | ||
for (auto x : m_border_chunk) | ||
m_border.push_back(x); | ||
Comment on lines
+94
to
+103
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, it might be irrelevant since I accidentally used
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @olefirenque, what profiler did you use? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To me this strongly looks like tracy but I might be wrong there |
||
}); | ||
|
||
#ifdef DEBUG | ||
ai().level_graph().iterate_vertices(start, dest, CShapeTestPredicate(this)); | ||
xr_parallel_for(TaskRange(begin, end), [this, &mergeMutex, &graph] (const auto& range) | ||
{ | ||
xr_vector<u32> m_test_storage_chunk; | ||
m_test_storage_chunk.reserve(range.size()); | ||
for (const auto& vertex : range) | ||
{ | ||
if (inside(graph.vertex_id(&vertex), false)) | ||
m_test_storage_chunk.push_back(graph.vertex_id(&vertex)); | ||
} | ||
std::lock_guard lock(mergeMutex); | ||
if (m_test_storage.capacity() < m_test_storage.size() + m_test_storage_chunk.size()) | ||
m_test_storage.reserve(m_test_storage.size() + m_test_storage_chunk.size()); | ||
for (auto x : m_test_storage_chunk) | ||
m_test_storage.push_back(x); | ||
}); | ||
#endif | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,7 +100,14 @@ void CSpaceRestrictor::net_Destroy() | |
bool CSpaceRestrictor::inside(const Fsphere& sphere) const | ||
{ | ||
if (!actual()) | ||
prepare(); | ||
{ | ||
static std::mutex prepareMutex; | ||
std::lock_guard lock(prepareMutex); | ||
|
||
// Double-checked locking | ||
if (!actual()) | ||
prepare(); | ||
} | ||
Comment on lines
102
to
+110
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe it was only non-concurrent part for |
||
|
||
if (!m_selfbounds.intersect(sphere)) | ||
return (false); | ||
|
Uh oh!
There was an error while loading. Please reload this page.