From 5dd05b8bc55af6973d14dce971b2b3537ad5c76b Mon Sep 17 00:00:00 2001 From: Tim Klemm Date: Fri, 17 Oct 2025 09:20:24 -0400 Subject: [PATCH] HPCC-35200: Make modeling of index event read times optional Update the index event model configuration: - `storage` is an optional element; and - `storage/plane` is an optional element; and - `storage/file`, already optional, is allowed only when `storage/plane` is defined. --- common/eventconsumption/README.md | 6 +- common/eventconsumption/eventindexmodel.cpp | 67 ++++++++++++++++-- common/eventconsumption/eventindexmodel.hpp | 69 +------------------ .../eventindexmodelstorage.cpp | 35 ++++++---- 4 files changed, 90 insertions(+), 87 deletions(-) diff --git a/common/eventconsumption/README.md b/common/eventconsumption/README.md index 428c7be9f57..2a1f926ecbf 100644 --- a/common/eventconsumption/README.md +++ b/common/eventconsumption/README.md @@ -158,7 +158,7 @@ An optional identifier that must be `index-events` when present. storage/ -Required container for storage plane and page cache configuration settings. +Optional container for storage plane and page cache configuration settings. storage/ @cacheReadTime @@ -171,7 +171,7 @@ Optional page cache controls. Setting a positive read time enables the page cach storage/ plane/ -Required and repeatable element container for a single storage plane. The first occurrence becomes the default plane for any file that is not explicitly assigned a plane. +Optional and repeatable element container for a single storage plane. The first occurrence becomes the default plane for any file that is not explicitly assigned a plane. storage/ plane/ @@ -188,7 +188,7 @@ Required number of nanoseconds needed to load a single 8KB page from the storage storage/ file/ -Optional repeatable element enabling one or all files to be explicitly associated with a storage plane. +Optional repeatable element enabling one or all files to be explicitly associated with a storage plane. Must be omitted when `storage/plane` is omitted. storage/ file/ diff --git a/common/eventconsumption/eventindexmodel.cpp b/common/eventconsumption/eventindexmodel.cpp index 6adb38209a8..6a91cd57483 100644 --- a/common/eventconsumption/eventindexmodel.cpp +++ b/common/eventconsumption/eventindexmodel.cpp @@ -76,9 +76,8 @@ class CIndexEventModel : public CInterfaceOf virtual void configure(const IPropertyTree& config) override { const IPropertyTree* node = config.queryBranch("storage"); - if (!node) - throw makeStringException(-1, "index event model configuration missing required element"); - storage.configure(*node); + if (node) + storage.configure(*node); node = config.queryBranch("memory"); if (node) memory.configure(*node); @@ -142,7 +141,7 @@ class CIndexEventModel : public CInterfaceOf memory.describePage(event, page); if (!page.cacheHit) { - storage.useAndDescribePage(event.queryNumericValue(EvAttrFileId), event.queryNumericValue(EvAttrFileOffset), event.queryNumericValue(EvAttrNodeKind), page); + storage.useAndDescribePage(event, page); CEvent simulated = event; event.changeEventType(EventIndexCacheMiss); simulated.changeEventType(EventIndexLoad); @@ -233,7 +232,7 @@ class CIndexEventModel : public CInterfaceOf return; // discard unobserved page } ModeledPage page; - storage.useAndDescribePage(event.queryNumericValue(EvAttrFileId), event.queryNumericValue(EvAttrFileOffset), event.queryNumericValue(EvAttrNodeKind), page); + storage.useAndDescribePage(event, page); event.setValue(EvAttrReadTime, page.readTime); memory.describePage(event, page); if (ExpansionMode::OnLoad == page.expansionMode) @@ -354,6 +353,9 @@ class IndexEventModelTests : public CppUnit::TestFixture CPPUNIT_TEST(testExplicitNoPageCache); CPPUNIT_TEST(testPageCacheEviction); CPPUNIT_TEST(testNoPageCacheEviction); + CPPUNIT_TEST(testNoStorage); + CPPUNIT_TEST(testNoStoragePlanes); + CPPUNIT_TEST(testStoragePageCacheOnly); CPPUNIT_TEST(testInMemoryExpansionEstimation); CPPUNIT_TEST(testOnLoadExpansionEstimation); CPPUNIT_TEST(testOnLoadToOneDemandExpansionEstimation); @@ -588,6 +590,61 @@ class IndexEventModelTests : public CppUnit::TestFixture testEventVisitationLinks(testData); } + void testNoStorage() + { + constexpr const char *testData = R"!!!( + + + + + + + + + + + )!!!"; + testEventVisitationLinks(testData); + } + + void testNoStoragePlanes() + { + constexpr const char *testData = R"!!!( + + + + + + + + + + + + )!!!"; + testEventVisitationLinks(testData); + } + + void testStoragePageCacheOnly() + { + constexpr const char *testData = R"!!!( + + + + + + + + + + + + + + )!!!"; + testEventVisitationLinks(testData); + } + void testInMemoryExpansionEstimation() { constexpr const char *testData = R"!!!( diff --git a/common/eventconsumption/eventindexmodel.hpp b/common/eventconsumption/eventindexmodel.hpp index e2b40253cfa..a0cc560f684 100644 --- a/common/eventconsumption/eventindexmodel.hpp +++ b/common/eventconsumption/eventindexmodel.hpp @@ -45,71 +45,6 @@ enum class ExpansionMode OnDemand }; -// The index event model configuration conforms to this structure: -// kind: index-event -// storage: -// cacheReadTime: nanosecond time to read one page -// cacheCapacity: maximum bytes to use for the cache -// plane: -// - name: unique, non-empty, identifier -// readTime: nanosecond time to read one page -// file: -// - path: empty or unique index file path -// plane: name of place in which the file resides, if different from the default -// branchPlane: name of place in which index branches reside, if different from the default -// leafPlane: name of place in which index leaves reside, if different from the default -// memory: -// node: -// - kind: node kind -// sizeFactor: floating point multiplier for estimated node size -// sizeToTimeFactor: floating point multiplier for estimated node expansionMode time -// expansionMode: choice of `ll`, `ld`, or `dd` -// cacheCapacity: maximum bytes to use for the cache -// observed: -// - FileId: unique index file identifier -// FileOffset: index page offset -// NodeKind: node kind -// InMemorySize: size of the node in bytes -// ExpandTime: time in nanoseconds to expand the node -// -// - `kind` is optional; as the first model the value is implied. -// - `cacheReadTime` is optional; if zero, empty, or omitted, the cache is disabled. -// - `cacheCapacity` is optional; if zero, empty, or omitted, the cache is unlimited. -// - The first `plane` declared is assumed to be the default plane for files not explicitly assigned -// an alternate choice. -// - `file` is optional; omission implies all files exist in the default plane. -// - `file/path` is optional; omission, or empty, assigns the storage plane for all files not -// explicitly configured. -// - `file/plane` is optional; omission, or empty, implies the file resides in the default storage -// plane. -// - `file/branchPlane` is optional; omission, or empty, implies the file's index branches reside in -// the file's default storage plane. -// - `file/leafPlane` is optional; omission, or empty, implies the file's index leaves reside in the -// file's default storage plane. -// - `memory` is optional; omission prevents any memory modeling. -// - `memory/node` is optional; omission prevents estimating algorithm performance. If present, -// one instance for each type of index node is required. -// - `memory/node/kind` is a required choice of `branch` or `leaf`. -// - `memory/node/sizeFactor` is a required positive floating point multiplier for algorithm -// performance estimation. The on disk page size is multiplied by this value. -// - `memory/node/sizeToTimeFactor` is a required positive floating point multiplier for -// algorithm performance estimation. The estimated size is multipled by this factor to estimate -// the expansion time. -// - `expansion/node/mode` is optional; omission and empty are equivalent to `ll`. The value is -// ignored for branch nodes, which are always treated as `ll`. Accepted options are: -// - `ll`: model input and output are both on-load -// - `ld`: model input is on-load and output is on-demand -// - `dd`: model input and output are both on-demand -// - `memory/observed` is an optional mechanism to pre-populate the memory caches. It enables -// unit test case scenarios to be set up without requiring explicit events to be visited. -// - `memory/observed/NodeKind` is expected and corresponds to input event NodeKind values. -// - `memory/observed/FileId` is expected only when history is used for the node kind. -// - `memory/observed/FileOffset` is expected only when history is used for the node kind. -// - `memory/observed/InMemorySize` is expected only when the node cache for the node kind is -// enabled, which implies that history is also used for the node kind. -// - `memory/observed/ExpandTime` is expected only when the node cache for the node kind is -// enabled, which implies that history is also used for the node kind. - // Encapsulation of all modeled information about given file page. Note that the file path is not // included as the model does not retain that information. struct ModeledPage @@ -278,10 +213,10 @@ class Storage // file specification, is not supported. void observeFile(__uint64 fileId, const char* path); - // Fill in the page data with storage information known about the file and offset. An enabled + // Fill in the page data with storage information known about the event's page. An enabled // page cache will be updated when the page was not the most recently used. A file ID not // previously obseved will use information from the default file specification. - void useAndDescribePage(__uint64 fileId, __uint64 offset, __uint64 nodeKind, ModeledPage& page); + void useAndDescribePage(const CEvent& event, ModeledPage& page); private: void configurePlanes(const IPropertyTree& config); diff --git a/common/eventconsumption/eventindexmodelstorage.cpp b/common/eventconsumption/eventindexmodelstorage.cpp index 9c751c5129c..8530b1bf3c5 100644 --- a/common/eventconsumption/eventindexmodelstorage.cpp +++ b/common/eventconsumption/eventindexmodelstorage.cpp @@ -76,6 +76,9 @@ void Storage::configure(const IPropertyTree& config) void Storage::observeFile(__uint64 fileId, const char* path) { + // don't observe files when no modeling is configured + if (planes.empty()) + return; const File& mappedFile = lookupFile(path); const File*& observation = observedFiles[fileId]; if (!observation) @@ -87,21 +90,29 @@ void Storage::observeFile(__uint64 fileId, const char* path) assertex(&mappedFile == observation); } -void Storage::useAndDescribePage(__uint64 fileId, __uint64 offset, __uint64 nodeKind, ModeledPage& page) +void Storage::useAndDescribePage(const CEvent& event, ModeledPage& page) { - const File& file = lookupFile(fileId); - page.fileId = fileId; - page.offset = offset; - page.nodeKind = nodeKind; - page.readTime = cache.getReadTimeIfExists(fileId, offset); + page.fileId = event.queryNumericValue(EvAttrFileId); + page.offset = event.queryNumericValue(EvAttrFileOffset); + page.nodeKind = event.queryNumericValue(EvAttrNodeKind); + page.readTime = cache.getReadTimeIfExists(page.fileId, page.offset); if (!page.readTime) { - const Plane& plane = file.lookupPlane(nodeKind); - page.readTime = plane.readTime; + if (planes.empty()) + { + if (event.hasAttribute(EvAttrReadTime)) + page.readTime = event.queryNumericValue(EvAttrReadTime); + } + else + { + const File& file = lookupFile(page.fileId); + const Plane& plane = file.lookupPlane(page.nodeKind); + page.readTime = plane.readTime; + } // MORE: if multiple pages should be loaded, add all to the cache, with the requested page // added last. IndexMRUNullCacheReporter nullreporter; - (void)cache.insert(fileId, offset, nullreporter); + (void)cache.insert(page.fileId, page.offset, nullreporter); } } @@ -121,8 +132,6 @@ void Storage::configurePlanes(const IPropertyTree& config) else if (!defaultPlane) defaultPlane = &*planeIt; } - if (planes.empty()) - throw makeStringException(-1, "missing storage plane configurations"); } const Storage::Plane& Storage::lookupPlane(const char* name, const char* forFile) const @@ -144,6 +153,8 @@ void Storage::configureFiles(const IPropertyTree& config) { bool haveDefault = false; Owned it = config.getElements("file"); + if (it->first() && planes.empty()) + throw makeStringException(-1, "storage file configuration requires at least one plane configuration"); ForEach(*it) { File file; @@ -177,7 +188,7 @@ void Storage::configureFiles(const IPropertyTree& config) if (isEmptyString(file.path.get())) haveDefault = true; } - if (!haveDefault) + if (!haveDefault && !planes.empty()) configuredFiles.insert(File(nullptr, lookupPlane(nullptr, nullptr))); }