Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions common/eventconsumption/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ An optional identifier that must be `index-events` when present.

storage/

Required container for storage plane and page cache configuration settings.
Optional container for storage plane and page cache configuration settings.

storage/
@cacheReadTime
Expand All @@ -171,7 +171,7 @@ Optional page cache controls. Setting a positive read time enables the page cach
storage/
plane/

Required and repeatable element container for a single storage plane. The first occurrence becomes the default plane for any file that is not explicitly assigned a plane.
Optional and repeatable element container for a single storage plane. The first occurrence becomes the default plane for any file that is not explicitly assigned a plane.

storage/
plane/
Expand All @@ -188,7 +188,7 @@ Required number of nanoseconds needed to load a single 8KB page from the storage
storage/
file/

Optional repeatable element enabling one or all files to be explicitly associated with a storage plane.
Optional repeatable element enabling one or all files to be explicitly associated with a storage plane. Must be omitted when `storage/plane` is omitted.

storage/
file/
Expand Down
67 changes: 62 additions & 5 deletions common/eventconsumption/eventindexmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,8 @@ class CIndexEventModel : public CInterfaceOf<IEventModel>
virtual void configure(const IPropertyTree& config) override
{
const IPropertyTree* node = config.queryBranch("storage");
if (!node)
throw makeStringException(-1, "index event model configuration missing required <storage> element");
storage.configure(*node);
if (node)
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The change makes storage optional, but there's no corresponding update to handle the case where storage is present but empty (e.g., <storage/>). While the code may handle this through planes.empty() checks in Storage, it would be clearer to explicitly document this behavior or add a validation that an empty storage element is equivalent to omission.

Suggested change
if (node)
// Treat an empty <storage/> element as equivalent to omission.
if (node && (node->hasChildren() || node->getNumProps() > 0))

Copilot uses AI. Check for mistakes.
storage.configure(*node);
node = config.queryBranch("memory");
if (node)
memory.configure(*node);
Expand Down Expand Up @@ -142,7 +141,7 @@ class CIndexEventModel : public CInterfaceOf<IEventModel>
memory.describePage(event, page);
if (!page.cacheHit)
{
storage.useAndDescribePage(event.queryNumericValue(EvAttrFileId), event.queryNumericValue(EvAttrFileOffset), event.queryNumericValue(EvAttrNodeKind), page);
storage.useAndDescribePage(event, page);
CEvent simulated = event;
event.changeEventType(EventIndexCacheMiss);
simulated.changeEventType(EventIndexLoad);
Expand Down Expand Up @@ -233,7 +232,7 @@ class CIndexEventModel : public CInterfaceOf<IEventModel>
return; // discard unobserved page
}
ModeledPage page;
storage.useAndDescribePage(event.queryNumericValue(EvAttrFileId), event.queryNumericValue(EvAttrFileOffset), event.queryNumericValue(EvAttrNodeKind), page);
storage.useAndDescribePage(event, page);
event.setValue(EvAttrReadTime, page.readTime);
memory.describePage(event, page);
if (ExpansionMode::OnLoad == page.expansionMode)
Expand Down Expand Up @@ -354,6 +353,9 @@ class IndexEventModelTests : public CppUnit::TestFixture
CPPUNIT_TEST(testExplicitNoPageCache);
CPPUNIT_TEST(testPageCacheEviction);
CPPUNIT_TEST(testNoPageCacheEviction);
CPPUNIT_TEST(testNoStorage);
CPPUNIT_TEST(testNoStoragePlanes);
CPPUNIT_TEST(testStoragePageCacheOnly);
CPPUNIT_TEST(testInMemoryExpansionEstimation);
CPPUNIT_TEST(testOnLoadExpansionEstimation);
CPPUNIT_TEST(testOnLoadToOneDemandExpansionEstimation);
Expand Down Expand Up @@ -588,6 +590,61 @@ class IndexEventModelTests : public CppUnit::TestFixture
testEventVisitationLinks(testData);
}

void testNoStorage()
{
constexpr const char *testData = R"!!!(
<test>
<link kind="index-events">
</link>
<input>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
</input>
<expect>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
</expect>
</test>
)!!!";
testEventVisitationLinks(testData);
}

void testNoStoragePlanes()
{
constexpr const char *testData = R"!!!(
<test>
<link kind="index-events">
<storage/>
</link>
<input>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
</input>
<expect>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
</expect>
</test>
)!!!";
testEventVisitationLinks(testData);
}

void testStoragePageCacheOnly()
{
constexpr const char *testData = R"!!!(
<test>
<link kind="index-events">
<storage cacheReadTime="100" cacheCapacity="16384"/>
</link>
<input>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
</input>
<expect>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="1234"/>
<event type="IndexLoad" FileId="1" FileOffset="0" NodeKind="0" InMemorySize="0" ExpandTime="0" ReadTime="100"/>
</expect>
</test>
)!!!";
testEventVisitationLinks(testData);
}

void testInMemoryExpansionEstimation()
{
constexpr const char *testData = R"!!!(
Expand Down
69 changes: 2 additions & 67 deletions common/eventconsumption/eventindexmodel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,71 +45,6 @@ enum class ExpansionMode
OnDemand
};

// The index event model configuration conforms to this structure:
// kind: index-event
// storage:
// cacheReadTime: nanosecond time to read one page
// cacheCapacity: maximum bytes to use for the cache
// plane:
// - name: unique, non-empty, identifier
// readTime: nanosecond time to read one page
// file:
// - path: empty or unique index file path
// plane: name of place in which the file resides, if different from the default
// branchPlane: name of place in which index branches reside, if different from the default
// leafPlane: name of place in which index leaves reside, if different from the default
// memory:
// node:
// - kind: node kind
// sizeFactor: floating point multiplier for estimated node size
// sizeToTimeFactor: floating point multiplier for estimated node expansionMode time
// expansionMode: choice of `ll`, `ld`, or `dd`
// cacheCapacity: maximum bytes to use for the cache
// observed:
// - FileId: unique index file identifier
// FileOffset: index page offset
// NodeKind: node kind
// InMemorySize: size of the node in bytes
// ExpandTime: time in nanoseconds to expand the node
//
// - `kind` is optional; as the first model the value is implied.
// - `cacheReadTime` is optional; if zero, empty, or omitted, the cache is disabled.
// - `cacheCapacity` is optional; if zero, empty, or omitted, the cache is unlimited.
// - The first `plane` declared is assumed to be the default plane for files not explicitly assigned
// an alternate choice.
// - `file` is optional; omission implies all files exist in the default plane.
// - `file/path` is optional; omission, or empty, assigns the storage plane for all files not
// explicitly configured.
// - `file/plane` is optional; omission, or empty, implies the file resides in the default storage
// plane.
// - `file/branchPlane` is optional; omission, or empty, implies the file's index branches reside in
// the file's default storage plane.
// - `file/leafPlane` is optional; omission, or empty, implies the file's index leaves reside in the
// file's default storage plane.
// - `memory` is optional; omission prevents any memory modeling.
// - `memory/node` is optional; omission prevents estimating algorithm performance. If present,
// one instance for each type of index node is required.
// - `memory/node/kind` is a required choice of `branch` or `leaf`.
// - `memory/node/sizeFactor` is a required positive floating point multiplier for algorithm
// performance estimation. The on disk page size is multiplied by this value.
// - `memory/node/sizeToTimeFactor` is a required positive floating point multiplier for
// algorithm performance estimation. The estimated size is multipled by this factor to estimate
// the expansion time.
// - `expansion/node/mode` is optional; omission and empty are equivalent to `ll`. The value is
// ignored for branch nodes, which are always treated as `ll`. Accepted options are:
// - `ll`: model input and output are both on-load
// - `ld`: model input is on-load and output is on-demand
// - `dd`: model input and output are both on-demand
// - `memory/observed` is an optional mechanism to pre-populate the memory caches. It enables
// unit test case scenarios to be set up without requiring explicit events to be visited.
// - `memory/observed/NodeKind` is expected and corresponds to input event NodeKind values.
// - `memory/observed/FileId` is expected only when history is used for the node kind.
// - `memory/observed/FileOffset` is expected only when history is used for the node kind.
// - `memory/observed/InMemorySize` is expected only when the node cache for the node kind is
// enabled, which implies that history is also used for the node kind.
// - `memory/observed/ExpandTime` is expected only when the node cache for the node kind is
// enabled, which implies that history is also used for the node kind.

// Encapsulation of all modeled information about given file page. Note that the file path is not
// included as the model does not retain that information.
struct ModeledPage
Expand Down Expand Up @@ -278,10 +213,10 @@ class Storage
// file specification, is not supported.
void observeFile(__uint64 fileId, const char* path);

// Fill in the page data with storage information known about the file and offset. An enabled
// Fill in the page data with storage information known about the event's page. An enabled
// page cache will be updated when the page was not the most recently used. A file ID not
// previously obseved will use information from the default file specification.
void useAndDescribePage(__uint64 fileId, __uint64 offset, __uint64 nodeKind, ModeledPage& page);
void useAndDescribePage(const CEvent& event, ModeledPage& page);

private:
void configurePlanes(const IPropertyTree& config);
Expand Down
35 changes: 23 additions & 12 deletions common/eventconsumption/eventindexmodelstorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ void Storage::configure(const IPropertyTree& config)

void Storage::observeFile(__uint64 fileId, const char* path)
{
// don't observe files when no modeling is configured
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is misleading. The check planes.empty() only verifies that no storage planes are configured, not that modeling is entirely disabled. Memory modeling could still be active. Consider: 'don't observe files when no storage planes are configured'.

Suggested change
// don't observe files when no modeling is configured
// don't observe files when no storage planes are configured

Copilot uses AI. Check for mistakes.
if (planes.empty())
return;
const File& mappedFile = lookupFile(path);
const File*& observation = observedFiles[fileId];
if (!observation)
Expand All @@ -87,21 +90,29 @@ void Storage::observeFile(__uint64 fileId, const char* path)
assertex(&mappedFile == observation);
}

void Storage::useAndDescribePage(__uint64 fileId, __uint64 offset, __uint64 nodeKind, ModeledPage& page)
void Storage::useAndDescribePage(const CEvent& event, ModeledPage& page)
{
const File& file = lookupFile(fileId);
page.fileId = fileId;
page.offset = offset;
page.nodeKind = nodeKind;
page.readTime = cache.getReadTimeIfExists(fileId, offset);
page.fileId = event.queryNumericValue(EvAttrFileId);
page.offset = event.queryNumericValue(EvAttrFileOffset);
page.nodeKind = event.queryNumericValue(EvAttrNodeKind);
page.readTime = cache.getReadTimeIfExists(page.fileId, page.offset);
if (!page.readTime)
{
const Plane& plane = file.lookupPlane(nodeKind);
page.readTime = plane.readTime;
if (planes.empty())
{
if (event.hasAttribute(EvAttrReadTime))
page.readTime = event.queryNumericValue(EvAttrReadTime);
}
else
{
const File& file = lookupFile(page.fileId);
const Plane& plane = file.lookupPlane(page.nodeKind);
page.readTime = plane.readTime;
}
// MORE: if multiple pages should be loaded, add all to the cache, with the requested page
// added last.
IndexMRUNullCacheReporter nullreporter;
(void)cache.insert(fileId, offset, nullreporter);
(void)cache.insert(page.fileId, page.offset, nullreporter);
}
}

Expand All @@ -121,8 +132,6 @@ void Storage::configurePlanes(const IPropertyTree& config)
else if (!defaultPlane)
defaultPlane = &*planeIt;
}
if (planes.empty())
throw makeStringException(-1, "missing storage plane configurations");
}

const Storage::Plane& Storage::lookupPlane(const char* name, const char* forFile) const
Expand All @@ -144,6 +153,8 @@ void Storage::configureFiles(const IPropertyTree& config)
{
bool haveDefault = false;
Owned<IPropertyTreeIterator> it = config.getElements("file");
if (it->first() && planes.empty())
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The call to it->first() consumes the iterator position. This means the subsequent ForEach(*it) at line 158 will skip the first file element. The check should use it->isValid() after calling first() and then reset the iterator, or restructure to avoid consuming the iterator.

Suggested change
if (it->first() && planes.empty())
if (it->isValid() && planes.empty())

Copilot uses AI. Check for mistakes.
throw makeStringException(-1, "storage file configuration requires at least one plane configuration");
ForEach(*it)
{
File file;
Expand Down Expand Up @@ -177,7 +188,7 @@ void Storage::configureFiles(const IPropertyTree& config)
if (isEmptyString(file.path.get()))
haveDefault = true;
}
if (!haveDefault)
if (!haveDefault && !planes.empty())
configuredFiles.insert(File(nullptr, lookupPlane(nullptr, nullptr)));
}

Expand Down
Loading