From cdc86b16eb8154ceea634d6bacfe8e9111c80535 Mon Sep 17 00:00:00 2001 From: Dave Streeter Date: Thu, 30 Oct 2025 17:02:10 +0000 Subject: [PATCH 1/5] HPCC-35246 New function for deserializing all attributes 1. setAttribute now processes a vector with 1 allocation call, no critical section, no existing attribute processing 2. Unit tests amend accordingly Signed-off-by: Dave Streeter --- system/jlib/jptree.cpp | 334 +++++++++- system/jlib/jptree.ipp | 3 + system/jlib/jstream.cpp | 67 ++ system/jlib/jstream.hpp | 4 + testing/unittests/CMakeLists.txt | 2 + testing/unittests/jlibtests.cpp | 1043 +++++++++++++++++++++++++++--- testing/unittests/unittests.cpp | 39 +- 7 files changed, 1373 insertions(+), 119 deletions(-) diff --git a/system/jlib/jptree.cpp b/system/jlib/jptree.cpp index dc3d37dee7b..306360a5073 100644 --- a/system/jlib/jptree.cpp +++ b/system/jlib/jptree.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "platform.h" #include "jarray.hpp" @@ -3070,18 +3071,37 @@ void PTree::deserializeFromStream(IBufferedSerialInputStream &src) } } + +static std::vector attrStringOffsets = []() +{ + std::vector vec; + // Reserve capacity to reduce allocations + vec.reserve(50); + return vec; +}(); + void PTree::deserializeSelf(IBufferedSerialInputStream &src) { - size32_t len{0}; - const char *name = queryZeroTerminatedString(src, len); - if (len == 0) + size32_t skipLen{0}; + const char *name = queryZeroTerminatedString(src, skipLen); + if (skipLen == 0) throwUnexpectedX("PTree deserialization error: end of stream, expected name"); setName(name); - src.skip(len + 1); // Skip over name and null terminator + src.skip(skipLen + 1); // Skip over name and null terminator read(src, flags); // Read attributes until we encounter a zero byte (attribute list terminator) + attrStringOffsets.clear(); + const char * base = peekAttributePairList(attrStringOffsets, src, skipLen); + if (base) // there is at least one attribute to process + { + setAttribute(base, attrStringOffsets); + src.skip(skipLen); // Skip over all attributes and the terminator + } + else + throwUnexpectedX("PTree deserialization error: end of stream, expected attribute name"); +/* DJPS for (;;) { NextByteStatus status = isNextByteZero(src); @@ -3094,16 +3114,16 @@ void PTree::deserializeSelf(IBufferedSerialInputStream &src) throwUnexpectedX("PTree deserialization error: end of stream, expected attribute name"); // NextByteStatus::nextByteIsNonZero - read the attribute key-value pair - std::pair attrPair = peekKeyValuePair(src, len); + std::pair attrPair = peekKeyValuePair(src, skipLen); if (attrPair.second == nullptr) throwUnexpectedX("PTree deserialization error: end of stream, expected attribute value"); constexpr bool attributeNameNotEncoded = false; // Deserialized attribute name is in its original unencoded form setAttribute(attrPair.first, attrPair.second, attributeNameNotEncoded); - src.skip(len + 1); // +1 to skip over second null terminator. + src.skip(skipLen + 1); // +1 to skip over second null terminator. } - +*/ if (value) delete value; size32_t size{0}; @@ -3881,6 +3901,306 @@ void LocalPTree::setAttribute(const char *inputkey, const char *val, bool encode AttrStr::destroy(goer); } +void LocalPTree::setAttribute(const char *base, const std::vector &offsets) +{ + size_t numStringOffsets = offsets.size(); + if (numStringOffsets % 2 != 0) + throwUnexpectedX("setAttribute error: offsets vector must contain pairs of key/value offsets"); + + for (size_t i = 0; i < numStringOffsets; i += 2) + { + const char *inputkey = base + offsets[i]; + const char *val = base + offsets[i + 1]; + + if (!inputkey) + continue; + const char *key = inputkey; + // Note: encoded is always false for deserialized attributes + if (!validateXMLTag(key+1)) + throw MakeIPTException(-1, "Invalid xml attribute: %s", key); + if (!val) + val = ""; // cannot have NULL value + AttrValue *v = findAttribute(key); + AttrStr *goer = nullptr; + if (v) + { + if (streq(v->value.get(), val)) + continue; + goer = v->value.getPtr(); + } + else + { + attrs = (AttrValue *)realloc(attrs, (numAttrs+1)*sizeof(AttrValue)); + v = new(&attrs[numAttrs++]) AttrValue; // Initialize new AttrValue + if (!v->key.set(inputkey)) //AttrStr will not return encoding marker when get() is called + v->key.setPtr(isnocase() ? AttrStr::createNC(inputkey) : AttrStr::create(inputkey)); + } + if (arrayOwner) + { + CQualifierMap *map = arrayOwner->queryMap(); + if (map) + { + if (goer) + map->replaceEntryIfMapped(key, v->value.get(), val, this); + else + map->insertEntryIfMapped(key, val, this); + } + } + + if (!v->value.set(val)) + v->value.setPtr(AttrStr::create(val)); + if (goer) + AttrStr::destroy(goer); + } +} + +/* DJPS Processing vector with 1 CD and newAttrArray() allocation per new attribute +void CAtomPTree::setAttribute(const char *base, const std::vector &offsets) +{ + size_t numStringOffsets = offsets.size(); + if (numStringOffsets % 2 != 0) + throwUnexpectedX("setAttribute error: offsets vector must contain pairs of key/value offsets"); + + CriticalBlock block(hashcrit); + for (size_t i = 0; i < numStringOffsets; i += 2) + { + const char *key = base + offsets[i]; + const char *val = base + offsets[i + 1]; + + if (!key) + continue; + if (!validateXMLTag(key+1)) + throw MakeIPTException(-1, "Invalid xml attribute: %s", key); + if (!val) + val = ""; // cannot have NULL value + AttrValue *v = findAttribute(key); + if (v) + { + if (streq(v->value.get(), val)) + continue; + if (arrayOwner) + { + CQualifierMap *map = arrayOwner->queryMap(); + if (map) + map->replaceEntryIfMapped(key, v->value.get(), val, this); + } + + AttrStr * goer = v->value.getPtr(); + if (!v->value.set(val)) + { + if (goer) + attrHT->removeval(goer); + v->value.setPtr(attrHT->addval(val)); + } + else if (goer) + { + attrHT->removeval(goer); + } + } + else + { + AttrValue *newattrs = newAttrArray(numAttrs+1); + if (attrs) + { + memcpy(newattrs, attrs, numAttrs*sizeof(AttrValue)); + freeAttrArray(attrs, numAttrs); + } + if (arrayOwner) + { + CQualifierMap *map = arrayOwner->queryMap(); + if (map) + map->insertEntryIfMapped(key, val, this); + } + v = &newattrs[numAttrs]; + if (!v->key.set(key)) + v->key.setPtr(attrHT->addkey(key, isnocase())); + if (!v->value.set(val)) + v->value.setPtr(attrHT->addval(val)); + numAttrs++; + attrs = newattrs; + } + } +} +*/ + +/* DJPS no CS and single newAttrArray() allocation for new attributes +void CAtomPTree::setAttribute(const char *base, const std::vector &offsets) +{ + size_t numStringOffsets = offsets.size(); + if (numStringOffsets == 0) + return; + + if (numStringOffsets % 2 != 0) + throwUnexpectedX("setAttribute error: offsets vector must contain pairs of key/value offsets"); + + + // Pre-pass for new attributes to allocate them in one call + // Count the new attributes + std::set newAttributesMap; + std::unordered_set *processed{nullptr}; + for (size_t i = 0; i < numStringOffsets; i += 2) + { + const char *key = base + offsets[i]; + if (!key) + continue; + if (!validateXMLTag(key+1)) + throw MakeIPTException(-1, "Invalid xml attribute: %s", key); + + AttrValue *v = findAttribute(key); + if (!v) + newAttributesMap.emplace(i); + } + size_t numNewAttributes = newAttributesMap.size(); + AttrValue *newattrs{nullptr}; + if (numNewAttributes) + { + // Allocate the new attributes in one call + newattrs = newAttrArray(numAttrs+numNewAttributes); + assertex(newattrs); + if (attrs) + { + memcpy(newattrs, attrs, numAttrs*sizeof(AttrValue)); + freeAttrArray(attrs, numAttrs); + } + + // process new additions only + size_t newAttributeIndex{0}; + processed = new std::unordered_set(); + for (size_t i : newAttributesMap) + { + const char *key = base + offsets[i]; + const char *val = base + offsets[i + 1]; + + if (!val) + val = ""; // cannot have NULL value + + if (arrayOwner) + { + CQualifierMap *map = arrayOwner->queryMap(); + if (map) + map->insertEntryIfMapped(key, val, this); + } + AttrValue *v = &newattrs[numAttrs + newAttributeIndex]; + assertex(v); + if (!v->key.set(key)) + v->key.setPtr(attrHT->addkey(key, isnocase())); + if (!v->value.set(val)) + v->value.setPtr(attrHT->addval(val)); + + ++newAttributeIndex; + processed->emplace(i); + } + attrs = newattrs; + numAttrs += numNewAttributes; + } + + // Process existing attributes + for (size_t i = 0; i < numStringOffsets; i += 2) + { + if (processed && (processed->find(i) != processed->end())) // ignore new attributes + continue; + + const char *key = base + offsets[i]; + const char *val = base + offsets[i + 1]; + + if (!key) + continue; + if (!val) + val = ""; // cannot have NULL value + AttrValue *v = findAttribute(key); + if (v) + { + if (streq(v->value.get(), val)) + continue; + if (arrayOwner) + { + CQualifierMap *map = arrayOwner->queryMap(); + if (map) + map->replaceEntryIfMapped(key, v->value.get(), val, this); + } + + AttrStr * goer = v->value.getPtr(); + if (!v->value.set(val)) + { + if (goer) + attrHT->removeval(goer); + v->value.setPtr(attrHT->addval(val)); + } + else if (goer) + { + attrHT->removeval(goer); + } + } + else + { + assertex(false); + } + } + delete processed; +} +*/ + +/* DJPS +no CS +single newAttrArray() allocation for new attributes +no findAttribute() call for existing attributes +*/ +void CAtomPTree::setAttribute(const char *base, const std::vector &offsets) +{ + size_t numStringOffsets = offsets.size(); + if (numStringOffsets < 2) + return; + + if (numStringOffsets % 2 != 0) + throwUnexpectedX("setAttribute error: offsets vector must contain pairs of key/value offsets"); + + // All attributes are treated as new i.e. do not exist in the tree already + // Allocate them in one call + size_t numNewAttributes = numStringOffsets / 2; + AttrValue *newattrs{nullptr}; + // Allocate the new attributes in one call + newattrs = newAttrArray(numAttrs+numNewAttributes); + assertex(newattrs); + if (attrs) + { + memcpy(newattrs, attrs, numAttrs*sizeof(AttrValue)); + freeAttrArray(attrs, numAttrs); + } + + // process new additions only + size_t newAttributeIndex{0}; + for (size_t i = 0; i < numStringOffsets; i += 2) + { + const char *key = base + offsets[i]; + const char *val = base + offsets[i + 1]; + + if (!key || !validateXMLTag(key+1)) + throw MakeIPTException(-1, "Invalid xml attribute: %s", ('\0'==*key ? "(null)" : key)); + if (findAttribute(key)) + fprintf(stderr, "DJPS found existing attribute key=\"%s\"\n", key); + if (!val) + val = ""; // cannot have NULL value + + if (arrayOwner) + { + CQualifierMap *map = arrayOwner->queryMap(); + if (map) + map->insertEntryIfMapped(key, val, this); + } + AttrValue *v = &newattrs[numAttrs + newAttributeIndex]; + assertex(v); + if (!v->key.set(key)) + v->key.setPtr(attrHT->addkey(key, isnocase())); + if (!v->value.set(val)) + v->value.setPtr(attrHT->addval(val)); + + ++newAttributeIndex; + } + + attrs = newattrs; + numAttrs += numNewAttributes; +} + #ifdef TRACE_STRING_SIZE std::atomic<__int64> AttrStr::totsize { 0 }; std::atomic<__int64> AttrStr::maxsize { 0 }; diff --git a/system/jlib/jptree.ipp b/system/jlib/jptree.ipp index 3e4f35311f7..acc09dc866c 100644 --- a/system/jlib/jptree.ipp +++ b/system/jlib/jptree.ipp @@ -701,6 +701,7 @@ public: return a->key.isEncoded(); } virtual void setAttribute(const char *attr, const char *val, bool encoded) = 0; + virtual void setAttribute(const char *base, const std::vector &offsets) = 0; // IPropertyTree impl. virtual bool hasProp(const char * xpath) const override; @@ -864,6 +865,7 @@ public: virtual unsigned queryHash() const override; virtual void setName(const char *_name) override; virtual void setAttribute(const char *attr, const char *val, bool encoded) override; + virtual void setAttribute(const char *base, const std::vector &offsets) override; virtual bool isEquivalent(IPropertyTree *tree) const override { return (nullptr != QUERYINTERFACE(tree, CAtomPTree)); } virtual IPropertyTree *create(const char *name=nullptr, IPTArrayValue *value=nullptr, ChildMap *children=nullptr, bool existing=false) override { @@ -905,6 +907,7 @@ public: } virtual void setName(const char *_name) override; virtual void setAttribute(const char *attr, const char *val, bool encoded) override; + virtual void setAttribute(const char *base, const std::vector &offsets) override; virtual bool isEquivalent(IPropertyTree *tree) const override { return (nullptr != QUERYINTERFACE(tree, LocalPTree)); } virtual IPropertyTree *create(const char *name=nullptr, IPTArrayValue *value=nullptr, ChildMap *children=nullptr, bool existing=false) override { diff --git a/system/jlib/jstream.cpp b/system/jlib/jstream.cpp index f18b7a825b1..2fb56d05e08 100644 --- a/system/jlib/jstream.cpp +++ b/system/jlib/jstream.cpp @@ -568,6 +568,73 @@ const char * peekStringList(std::vector & matchOffsets, IBufferedSeria } } +const char * peekAttributePairList(std::vector & matchOffsets, IBufferedSerialInputStream & in, size32_t & len) +{ + size32_t scanned = 0; + size32_t startNext = 0; + bool expectingValue = false; + + for (;;) + { + size32_t got; + const char * start = (const char *)in.peek(scanned+1, got); + if (unlikely(got <= scanned)) + { + len = scanned; + if (startNext == scanned) + { + //End of file, but the last string was null terminated... + return start; + } + return nullptr; + } + + const char * searchStart = start + scanned; + size32_t searchLen = got - scanned; + + // null-byte scanning in the newly available portion + const char * nullPos; + while ((nullPos = (const char *)memchr(searchStart, '\0', searchLen)) != nullptr) + { + size32_t offset = nullPos - start; + + if (unlikely(offset == startNext)) + { + if (expectingValue) + { + // Valid empty value + size32_t secondCharOffset = offset + 1; + matchOffsets.push_back(startNext); + expectingValue = false; + startNext = secondCharOffset; + + searchStart = nullPos + 1; + searchLen = got - secondCharOffset; + continue; + } + else + { + // Invalid empty string for Name, treat as list terminator + len = offset + 1; + return start; + } + } + else + { + matchOffsets.push_back(startNext); + expectingValue = !expectingValue; + startNext = offset + 1; + + searchStart = nullPos + 1; + searchLen = got - startNext; + } + } + + // No more nulls found in current buffer, need to peek more data + scanned = got; + } +} + //--------------------------------------------------------------------------- class CFileSerialInputStream final : public CInterfaceOf diff --git a/system/jlib/jstream.hpp b/system/jlib/jstream.hpp index ac4431d84a5..1e61df33850 100644 --- a/system/jlib/jstream.hpp +++ b/system/jlib/jstream.hpp @@ -74,6 +74,10 @@ extern jlib_decl std::pair peekKeyValuePair(IBuffere //Returns a pointer to the base string if valid. extern jlib_decl const char * peekStringList(std::vector & matches, IBufferedSerialInputStream & in, size32_t & len); +//Return a vector of offsets of the starts of null terminated Attribute Name/Value strings (Value can be empty string) +// - terminated by a null string or end of file. +//Returns a pointer to the base string if valid. +extern jlib_decl const char * peekAttributePairList(std::vector & matches, IBufferedSerialInputStream & in, size32_t & len); interface ISerialOutputStream : extends IInterface { diff --git a/testing/unittests/CMakeLists.txt b/testing/unittests/CMakeLists.txt index f218d72fa58..29e54e8be3a 100644 --- a/testing/unittests/CMakeLists.txt +++ b/testing/unittests/CMakeLists.txt @@ -82,6 +82,7 @@ include_directories ( ./../../common/thorhelper ./../../dali/base ./../../system/security/shared + ./../../system/security/zcrypt ./../../common/deftype ./../../common/workunit ./../../system/security/cryptohelper @@ -128,6 +129,7 @@ target_link_libraries ( unittests logginglib workunit eventconsumption + zcrypt ${CppUnit_LIBRARIES} ) diff --git a/testing/unittests/jlibtests.cpp b/testing/unittests/jlibtests.cpp index 7537298113c..511adc68510 100644 --- a/testing/unittests/jlibtests.cpp +++ b/testing/unittests/jlibtests.cpp @@ -27,6 +27,10 @@ #include #include +#ifdef CALLGRIND_PROFILING +#include +#endif + #include "jsem.hpp" #include "jfile.hpp" #include "jdebug.hpp" @@ -38,6 +42,7 @@ #include "jsecrets.hpp" #include "jutil.hpp" #include "junicode.hpp" +#include "zcrypt.hpp" #include "opentelemetry/sdk/common/attribute_utils.h" #include "opentelemetry/sdk/resource/resource.h" @@ -3412,6 +3417,8 @@ CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(JlibIPTTest, "JlibIPTTest"); #include "jio.hpp" #include "jstring.hpp" #include +#include +#include IPropertyTree *createCompatibilityConfigPropertyTree() { @@ -3575,13 +3582,10 @@ IPropertyTree *createCompatibilityConfigPropertyTree() return root.getClear(); } -IPropertyTree *createBinaryDataCompressionTestPTree(const char *testXml) +IPropertyTree *createBinaryDataCompressionTestPTree() { - // Validate the testXml parameter - CPPUNIT_ASSERT(testXml && *testXml); - // Create a tree with various binary data sizes to test compression thresholds - Owned tree = createPTree(testXml); + Owned tree = createPTree("Compressed"); // Add some regular properties first tree->setProp("normalProp", "normalValue"); @@ -3623,6 +3627,44 @@ IPropertyTree *createBinaryDataCompressionTestPTree(const char *testXml) * and measures performance of both operations */ +// Helper: log two property trees as XML and produce a side-by-side text view to help diffing +static void logPTreeSideBySide(const char *leftLabel, IPropertyTree *left, const char *rightLabel, IPropertyTree *right) +{ + StringBuffer leftXml, rightXml; + toXML(left, leftXml.clear(), 2, XML_Format|XML_SortTags); + toXML(right, rightXml.clear(), 2, XML_Format|XML_SortTags); + + DBGLOG("PTree %s (full XML):\n%s", leftLabel, leftXml.str()); + DBGLOG("PTree %s (full XML):\n%s", rightLabel, rightXml.str()); + + std::vector leftLines; + std::vector rightLines; + { + std::istringstream is(leftXml.str()); + std::string line; + while (std::getline(is, line)) + leftLines.push_back(line); + } + { + std::istringstream is(rightXml.str()); + std::string line; + while (std::getline(is, line)) + rightLines.push_back(line); + } + + size_t maxLines = leftLines.size() > rightLines.size() ? leftLines.size() : rightLines.size(); + StringBuffer sb; + sb.appendf("===== %s (expected) %s %s (actual) =====\n", leftLabel, "", rightLabel); + for (size_t i = 0; i < maxLines; ++i) + { + const char *l = (i < leftLines.size()) ? leftLines[i].c_str() : ""; + const char *r = (i < rightLines.size()) ? rightLines[i].c_str() : ""; + sb.appendf("%-80s | %s\n", l, r); + } + sb.append("===== End side-by-side =====\n"); + DBGLOG("PTree side-by-side comparison:\n%s", sb.str()); +} + class PTreeSerializationDeserializationTest : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(PTreeSerializationDeserializationTest); @@ -3630,116 +3672,159 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture CPPUNIT_TEST(testRoundTripForRootOnlyPTree); CPPUNIT_TEST(testRoundTripForCompatibilityConfigPropertyTree); CPPUNIT_TEST(testRoundTripForBinaryDataCompressionTestPTree); + CPPUNIT_TEST(testRoundTripForRootOnlyPTree_lowmem); + CPPUNIT_TEST(testRoundTripForCompatibilityConfigPropertyTree_lowmem); + CPPUNIT_TEST(testRoundTripForBinaryDataCompressionTestPTree_lowmem); + // Direct tests for vector-based setAttribute + CPPUNIT_TEST(testSetAttributeWithVector); + CPPUNIT_TEST(testSetAttributeWithVector_lowmem); CPPUNIT_TEST_SUITE_END(); protected: - static constexpr const char *testXml{ - "" - "" - " " - " " - " " - " " - "
" - " " - " " - " " - " " - " " - " " - " " - ""}; - // Complete round-trip test method that performs serialization, deserialization, and validation - void performRoundTripTest(const char *testName, IPropertyTree *tree) + void performRoundTripTest(const char *testName, IPropertyTree *tree, byte flags = ipt_none) { - Owned originalTree; - originalTree.setown(tree); - CCycleTimer timer; - - // Serialization - MemoryBuffer memoryBuffer, streamBuffer; - - // Time serialize() method - timer.reset(); - originalTree->serialize(memoryBuffer); - __uint64 serializeElapsedNs = timer.elapsedNs(); - size_t memoryBufferSize = memoryBuffer.length(); - - // Time serializeToStream() method - Owned out = createBufferedSerialOutputStream(streamBuffer); - timer.reset(); - originalTree->serializeToStream(*out); - out->flush(); - __uint64 serializeToStreamElapsedNs = timer.elapsedNs(); - size_t streamBufferSize = streamBuffer.length(); + try + { + Owned originalTree; + originalTree.setown(tree); + CCycleTimer timer; - // Validation - serialized data matches - CPPUNIT_ASSERT_EQUAL(memoryBufferSize, streamBufferSize); - CPPUNIT_ASSERT(memcmp(memoryBuffer.toByteArray(), streamBuffer.toByteArray(), memoryBufferSize) == 0); + // Serialization + MemoryBuffer memoryBuffer, streamBuffer; + + // Time serialize() method + DBGLOG("=== Starting serialize() for test: %s ===", testName); + timer.reset(); + originalTree->serialize(memoryBuffer); + __uint64 serializeElapsedNs = timer.elapsedNs(); + size_t memoryBufferSize = memoryBuffer.length(); + DBGLOG("=== serialize() completed, size: %zu bytes ===", memoryBufferSize); + + // Time serializeToStream() method + DBGLOG("=== Starting serializeToStream() ==="); + Owned out = createBufferedSerialOutputStream(streamBuffer); + timer.reset(); + originalTree->serializeToStream(*out); + out->flush(); + __uint64 serializeToStreamElapsedNs = timer.elapsedNs(); + size_t streamBufferSize = streamBuffer.length(); + DBGLOG("=== serializeToStream() completed, size: %zu bytes ===", streamBufferSize); + + // Validation - serialized data matches + CPPUNIT_ASSERT_EQUAL(memoryBufferSize, streamBufferSize); + CPPUNIT_ASSERT(memcmp(memoryBuffer.toByteArray(), streamBuffer.toByteArray(), memoryBufferSize) == 0); + + // Copy streamBuffer for deserializationFromStream() tests + MemoryBuffer streamBuffer2, streamBuffer3; + streamBuffer2.append(streamBuffer.length(), streamBuffer.toByteArray()); + streamBuffer3.append(streamBuffer.length(), streamBuffer.toByteArray()); + + // Time deserialize() method + DBGLOG("=== Starting deserialize() ==="); + Owned memoryBufferDeserialized = createPTree(flags); + timer.reset(); + memoryBufferDeserialized->deserialize(memoryBuffer); + __uint64 deserializeElapsedNs = timer.elapsedNs(); + DBGLOG("=== deserialize() completed ==="); + + // Time deserializeFromStream() method + DBGLOG("=== Starting deserializeFromStream() ==="); + Owned in = createBufferedSerialInputStream(streamBuffer); + Owned streamDeserialized = createPTree(flags); + timer.reset(); + streamDeserialized->deserializeFromStream(*in); + __uint64 deserializeFromStreamElapsedNs = timer.elapsedNs(); + DBGLOG("=== deserializeFromStream() completed ==="); + + // Create PTree from Binary tests + // + // Test 1: Call with flags parameter + DBGLOG("=== Starting createPTreeFromBinary() with flags ==="); + Owned in2 = createBufferedSerialInputStream(streamBuffer2); + Owned deserializedCreatePTreeFromBinaryWithFlags = createPTreeFromBinary(*in2, flags); + DBGLOG("=== createPTreeFromBinary() with flags completed ==="); + // Test 2: Call with custom nodeCreator + DBGLOG("=== Starting createPTreeFromBinary() with custom nodeCreator ==="); + class TestNodeCreator : public CSimpleInterfaceOf + { + public: + bool wasCalled = false; + byte nodeFlags; - // Time deserialize() method - Owned memoryBufferDeserialized = createPTree(); - timer.reset(); - memoryBufferDeserialized->deserialize(memoryBuffer); - __uint64 deserializeElapsedNs = timer.elapsedNs(); + TestNodeCreator(byte _flags) : nodeFlags(_flags) {} - // Time deserializeFromStream() method - Owned in = createBufferedSerialInputStream(streamBuffer); - Owned streamDeserialized = createPTree(); - timer.reset(); - streamDeserialized->deserializeFromStream(*in); - __uint64 deserializeFromStreamElapsedNs = timer.elapsedNs(); - - // Create PTree from Binary tests - // - // Test 1: Call with null nodeCreator (should fall back to createPTree(src, ipt_none)) - streamBuffer.reset(); - Owned in2 = createBufferedSerialInputStream(streamBuffer); - Owned deserializedCreatePTreeFromBinaryWithNull = createPTreeFromBinary(*in2, nullptr); - // Test 2: Call with custom nodeCreator - class TestNodeCreator : public CSimpleInterfaceOf + virtual IPropertyTree *create(const char *tag) override + { + wasCalled = true; + return createPTree(tag, nodeFlags); + } + }; + Owned nodeCreator = new TestNodeCreator(flags); + // Reset stream position + Owned in3 = createBufferedSerialInputStream(streamBuffer3); + Owned deserializedCreatePTreeFromBinaryWithCreator = createPTreeFromBinary(*in3, nodeCreator); + DBGLOG("=== createPTreeFromBinary() with custom nodeCreator completed ==="); + + // Validation - verify both deserialized trees are equivalent to the original + DBGLOG("=== Starting tree comparisons ==="); + if (!areMatchingPTrees(originalTree, memoryBufferDeserialized)) + logPTreeSideBySide("originalTree", originalTree, "memoryBufferDeserialized", memoryBufferDeserialized); + CPPUNIT_ASSERT(areMatchingPTrees(originalTree, memoryBufferDeserialized)); + + if (!areMatchingPTrees(originalTree, streamDeserialized)) + logPTreeSideBySide("originalTree", originalTree, "streamDeserialized", streamDeserialized); + CPPUNIT_ASSERT(areMatchingPTrees(originalTree, streamDeserialized)); + + if (!areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithFlags)) + logPTreeSideBySide("originalTree", originalTree, "deserializedCreatePTreeFromBinaryWithFlags", deserializedCreatePTreeFromBinaryWithFlags); + CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithFlags)); + + CPPUNIT_ASSERT(nodeCreator->wasCalled); // Verify nodeCreator was called + + if (!areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator)) + logPTreeSideBySide("originalTree", originalTree, "deserializedCreatePTreeFromBinaryWithCreator", deserializedCreatePTreeFromBinaryWithCreator); + CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator)); + + DBGLOG("=== Tree comparisons completed ==="); + + double serializeTimeMs = serializeElapsedNs / 1e6; + double serializeToStreamTimeMs = serializeToStreamElapsedNs / 1e6; + DBGLOG("=== ROUND-TRIP TEST STARTED FOR: %s ===", testName); + DBGLOG("=== SERIALIZATION TEST RESULTS:"); + DBGLOG(" serialize() time: %.6f ms", serializeTimeMs); + DBGLOG(" serializeToStream() time: %.6f ms", serializeToStreamTimeMs); + DBGLOG(" Performance ratio (serializeToStream/serialize): %.6f", serializeToStreamTimeMs / serializeTimeMs); + DBGLOG(" serialize() data size: %zu bytes", memoryBufferSize); + DBGLOG(" serializeToStream() data size: %zu bytes", streamBufferSize); + + double deserializeTimeMs = deserializeElapsedNs / 1e6; + double deserializeFromStreamTimeMs = deserializeFromStreamElapsedNs / 1e6; + DBGLOG("=== DESERIALIZATION TEST RESULTS:"); + DBGLOG(" deserialize() time: %.6f ms", deserializeTimeMs); + DBGLOG(" deserializeFromStream() time: %.6f ms", deserializeFromStreamTimeMs); + DBGLOG(" Performance ratio (deserializeFromStream/deserialize): %.6f", deserializeFromStreamTimeMs / deserializeTimeMs); + + DBGLOG("=== ROUND-TRIP TEST COMPLETED SUCCESSFULLY"); + } + catch (IException *e) { - public: - bool wasCalled = false; - - virtual IPropertyTree *create(const char *tag) override - { - wasCalled = true; - return createPTree(tag); - } - }; - Owned nodeCreator = new TestNodeCreator(); - // Reset stream position - streamBuffer.reset(); - Owned in3 = createBufferedSerialInputStream(streamBuffer); - Owned deserializedCreatePTreeFromBinaryWithCreator = createPTreeFromBinary(*in3, nodeCreator); - - // Validation - verify both deserialized trees are equivalent to the original - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, memoryBufferDeserialized)); - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, streamDeserialized)); - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithNull)); - CPPUNIT_ASSERT(nodeCreator->wasCalled); // Verify nodeCreator was called - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator)); - - double serializeTimeMs = serializeElapsedNs / 1e6; - double serializeToStreamTimeMs = serializeToStreamElapsedNs / 1e6; - DBGLOG("=== ROUND-TRIP TEST STARTED FOR: %s ===", testName); - DBGLOG("=== SERIALIZATION TEST RESULTS:"); - DBGLOG(" serialize() time: %.6f ms", serializeTimeMs); - DBGLOG(" serializeToStream() time: %.6f ms", serializeToStreamTimeMs); - DBGLOG(" Performance ratio (serializeToStream/serialize): %.6f", serializeToStreamTimeMs / serializeTimeMs); - DBGLOG(" serialize() data size: %zu bytes", memoryBufferSize); - DBGLOG(" serializeToStream() data size: %zu bytes", streamBufferSize); - - double deserializeTimeMs = deserializeElapsedNs / 1e6; - double deserializeFromStreamTimeMs = deserializeFromStreamElapsedNs / 1e6; - DBGLOG("=== DESERIALIZATION TEST RESULTS:"); - DBGLOG(" deserialize() time: %.6f ms", deserializeTimeMs); - DBGLOG(" deserializeFromStream() time: %.6f ms", deserializeFromStreamTimeMs); - DBGLOG(" Performance ratio (deserializeFromStream/deserialize): %.6f", deserializeFromStreamTimeMs / deserializeTimeMs); - - DBGLOG("=== ROUND-TRIP TEST COMPLETED SUCCESSFULLY"); + StringBuffer msg; + e->errorMessage(msg); + DBGLOG("=== EXCEPTION CAUGHT in %s: [%d] %s ===", testName, e->errorCode(), msg.str()); + e->Release(); + CPPUNIT_FAIL(StringBuffer("IException in ").append(testName).append(": ").append(msg).str()); + } + catch (std::exception &e) + { + DBGLOG("=== STD::EXCEPTION CAUGHT in %s: %s ===", testName, e.what()); + CPPUNIT_FAIL(StringBuffer("std::exception in ").append(testName).append(": ").append(e.what()).str()); + } + catch (...) + { + DBGLOG("=== UNKNOWN EXCEPTION CAUGHT in %s ===", testName); + CPPUNIT_FAIL(StringBuffer("Unknown exception in ").append(testName).str()); + } } public: @@ -3756,13 +3841,757 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture void testRoundTripForBinaryDataCompressionTestPTree() { - performRoundTripTest(__func__, createBinaryDataCompressionTestPTree(testXml)); + performRoundTripTest(__func__, createBinaryDataCompressionTestPTree()); + } + + // ipt_lowmem tests + void testRoundTripForRootOnlyPTree_lowmem() + { + performRoundTripTest(__func__, createPTree("EmptyRoot", ipt_lowmem), ipt_lowmem); + } + + void testRoundTripForCompatibilityConfigPropertyTree_lowmem() + { + performRoundTripTest(__func__, createCompatibilityConfigPropertyTree(), ipt_lowmem); + } + + void testRoundTripForBinaryDataCompressionTestPTree_lowmem() + { + performRoundTripTest(__func__, createBinaryDataCompressionTestPTree(), ipt_lowmem); + } + + // Direct tests for vector-based setAttribute function + void testSetAttributeWithVector() + { + testSetAttributeWithVectorImpl(ipt_none); + } + + void testSetAttributeWithVector_lowmem() + { + testSetAttributeWithVectorImpl(ipt_lowmem); + } + +private: + void testSetAttributeWithVectorImpl(byte flags) + { + try + { + // Create a simple tree for testing + Owned tree = createPTree("TestRoot", flags); + + // Cast to PTree* to access the internal setAttribute(base, vector) method + PTree *ptree = dynamic_cast(tree.get()); + CPPUNIT_ASSERT_MESSAGE("Failed to cast IPropertyTree to PTree", ptree != nullptr); + + // Build a string buffer containing key/value pairs and a vector of offsets + // Format matches what peekStringList produces in deserializeSelf: + // - String buffer contains null-terminated strings + // - Vector contains pairs: [key_offset, value_offset, key_offset, value_offset, ...] + + MemoryBuffer attrData; + std::vector offsets; + + // Test 1: Add new attributes (tests "new attribute" code path) + DBGLOG("=== Test 1: Adding new attributes ==="); + + // Add attribute: @newAttr1="newValue1" + offsets.push_back(attrData.length()); + attrData.append("@newAttr1").append('\0'); + + offsets.push_back(attrData.length()); + attrData.append("newValue1").append('\0'); + + // Add attribute: @newAttr2="newValue2" + offsets.push_back(attrData.length()); + attrData.append("@newAttr2").append('\0'); + + offsets.push_back(attrData.length()); + attrData.append("newValue2").append('\0'); + + DBGLOG("Calling setAttribute with %u offsets", (unsigned)offsets.size()); // Call setAttribute with the vector (testing new attribute insertion) + const char *base = (const char *)attrData.toByteArray(); + ptree->setAttribute(base, offsets); + + DBGLOG("setAttribute completed for new attributes"); + + // Verify new attributes were added using areMatchingPTrees + Owned expected1 = createPTree("TestRoot", flags); + expected1->setProp("@newAttr1", "newValue1"); + expected1->setProp("@newAttr2", "newValue2"); + if (!areMatchingPTrees(expected1, tree)) + logPTreeSideBySide("expected1", expected1, "tree", tree); + CPPUNIT_ASSERT_MESSAGE("Test 1: Tree doesn't match expected after adding new attributes", areMatchingPTrees(expected1, tree)); + + DBGLOG("Test 1 passed"); + + // Test 2: Update existing attributes (tests "update existing attribute" code path) + DBGLOG("=== Test 2: Updating existing attributes ==="); + attrData.clear(); + offsets.clear(); + + // Update attribute: @newAttr1="updatedValue1" + offsets.push_back(attrData.length()); + attrData.append("@newAttr1").append('\0'); + + offsets.push_back(attrData.length()); + attrData.append("updatedValue1").append('\0'); + + // Add one more new attribute: @newAttr3="newValue3" + offsets.push_back(attrData.length()); + attrData.append("@newAttr3").append('\0'); + + offsets.push_back(attrData.length()); + attrData.append("newValue3").append('\0'); + + DBGLOG("Calling setAttribute to update 1 existing + add 1 new"); + + // Call setAttribute again (testing both update and new attribute paths) + base = (const char *)attrData.toByteArray(); + ptree->setAttribute(base, offsets); + + DBGLOG("setAttribute completed for updates"); + + // Verify updates using areMatchingPTrees + Owned expected2 = createPTree("TestRoot", flags); + expected2->setProp("@newAttr1", "updatedValue1"); // Updated value + expected2->setProp("@newAttr2", "newValue2"); // Unchanged from Test 1 + expected2->setProp("@newAttr3", "newValue3"); // Newly added + if (!areMatchingPTrees(expected2, tree)) + logPTreeSideBySide("expected2", expected2, "tree", tree); + CPPUNIT_ASSERT_MESSAGE("Test 2: Tree doesn't match expected after updating attributes", areMatchingPTrees(expected2, tree)); + + DBGLOG("Test 2 passed"); + + // Test 3: Update attributes that were added via setProp (exercises the existing attribute update path) + DBGLOG("=== Test 3: Testing setAttribute on pre-existing attributes ==="); + + // Add attributes using traditional setProp method + tree->setProp("@preExisting", "original"); + + // Now update it using setAttribute with vector + attrData.clear(); + offsets.clear(); + + offsets.push_back(attrData.length()); + attrData.append("@preExisting").append('\0'); + offsets.push_back(attrData.length()); + attrData.append("modified").append('\0'); + + DBGLOG("Calling setAttribute to update pre-existing attribute"); + + base = (const char *)attrData.toByteArray(); + ptree->setAttribute(base, offsets); + + // Prepare the expected tree for comparison (expected3) + Owned expected3 = createPTree("TestRoot", flags); + expected3->setProp("@newAttr1", "updatedValue1"); + expected3->setProp("@newAttr2", "newValue2"); + expected3->setProp("@newAttr3", "newValue3"); + expected3->setProp("@preExisting", "modified"); // Updated via setAttribute + + // Log any differences between expected and actual tree. (No intentional mutation.) + + // Log the entire expected and actual trees side-by-side for easy comparison. + { + StringBuffer expXml, actXml; + toXML(expected3, expXml.clear(), 2, XML_Format|XML_SortTags); + toXML(tree, actXml.clear(), 2, XML_Format|XML_SortTags); + + // Log full trees separately first (helpful on their own) + DBGLOG("PTree expected (full XML):\n%s", expXml.str()); + DBGLOG("PTree actual (full XML):\n%s", actXml.str()); + + // Build side-by-side view by splitting into lines + std::vector expLines; + std::vector actLines; + { + std::istringstream es(expXml.str()); + std::string line; + while (std::getline(es, line)) + expLines.push_back(line); + } + { + std::istringstream as(actXml.str()); + std::string line; + while (std::getline(as, line)) + actLines.push_back(line); + } + + size_t maxLines = expLines.size() > actLines.size() ? expLines.size() : actLines.size(); + StringBuffer sideBySide; + sideBySide.append("===== Expected ============================== | ===== Actual ================================\n"); + for (size_t i = 0; i < maxLines; ++i) + { + const char *el = (i < expLines.size()) ? expLines[i].c_str() : ""; + const char *al = (i < actLines.size()) ? actLines[i].c_str() : ""; + // fixed-width column for expected side to ease visual diffing + sideBySide.appendf("%-80s | %s\n", el, al); + } + sideBySide.append("===== End side-by-side =============================================================\n"); + DBGLOG("PTree side-by-side comparison:\n%s", sideBySide.str()); + } + + // Verify the update worked using areMatchingPTrees + if (!areMatchingPTrees(expected3, tree)) + logPTreeSideBySide("expected3", expected3, "tree", tree); + CPPUNIT_ASSERT_MESSAGE("Test 3: Tree doesn't match expected after updating pre-existing attribute", areMatchingPTrees(expected3, tree)); + + DBGLOG("Test 3 passed"); + DBGLOG("=== testSetAttributeWithVector completed successfully ==="); + } + catch (IException *e) + { + StringBuffer msg; + e->errorMessage(msg); + e->Release(); + CPPUNIT_FAIL(StringBuffer("IException in testSetAttributeWithVector: ").append(msg).str()); + } + catch (std::exception &e) + { + CPPUNIT_FAIL(StringBuffer("std::exception in testSetAttributeWithVector: ").append(e.what()).str()); + } + catch (...) + { + CPPUNIT_FAIL("Unknown exception in testSetAttributeWithVector"); + } } }; CPPUNIT_TEST_SUITE_REGISTRATION(PTreeSerializationDeserializationTest); CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(PTreeSerializationDeserializationTest, "PTreeSerializationDeserializationTest"); +// Base class with shared functionality for PTree timing tests +class PTreeTimingTestBase : public CppUnit::TestFixture +{ +protected: + struct TimingResults + { + double avgDeserializeTimeMs; + double avgSerializeTimeMs; + double totalDeserializeTimeMs; + double totalSerializeTimeMs; + const char *testName; + byte flags; + }; + + const char *getBinaryFilePath() + { + const char *envPath = getenv("DALI_TEST_BINARY_FILE"); + constexpr const char *defaultPath = "~/HPCC-Platform/testing/unittests/dalisds1.bin.gz"; + return envPath ? envPath : defaultPath; + } + + const char *getXmlFilePath() + { + const char *envPath = getenv("DALI_TEST_XML_FILE"); + constexpr const char *defaultPath = "~/HPCC-Platform/testing/unittests/dalisds1.xml.gz"; + return envPath ? envPath : defaultPath; + } + + const char *expandTilde(const char *path, StringBuffer &expanded) + { + if (!path || !path[0]) + return path; + + if (path[0] == '~' && (path[1] == '/' || path[1] == '\0')) + { + const char *home = getenv("HOME"); + if (home) + { + expanded.append(home); + if (path[1] == '/') + expanded.append(path + 1); + return expanded.str(); + } + } + return path; + } + + void readXmlFile(const char *filePath, StringBuffer &output) + { + StringBuffer expandedPath; + const char *actualPath = expandTilde(filePath, expandedPath); + if (!actualPath) + throw MakeStringException(-1, "XML file path is null"); + Owned xmlFile = createIFile(actualPath); + if (!xmlFile->exists()) + throw MakeStringException(-1, "XML file \"%s\" does not exist", actualPath); + + size32_t fileSize = (size32_t)xmlFile->size(); + MemoryBuffer fileData; + Owned fileIO = xmlFile->open(IFOread); + fileData.reserveTruncate(fileSize); + size32_t bytesRead = fileIO->read(0, fileSize, fileData.bufferBase()); + fileData.setLength(bytesRead); + + const char *ext = pathExtension(filePath); + if (ext && streq(ext, ".gz")) + gunzip((const byte *)fileData.toByteArray(), fileData.length(), output); + else + output.append(fileData.length(), fileData.toByteArray()); + } + + void createBinaryDataFromXml(const char *xmlFilePath, MemoryBuffer &binaryData) + { + StringBuffer xmlData; + try + { + readXmlFile(xmlFilePath, xmlData); + } + catch (IException *e) + { + throw; + } + Owned tree = createPTreeFromXMLString(xmlData); + binaryData.clear(); + Owned out = createBufferedSerialOutputStream(binaryData); + tree->serializeToStream(*out); + out->flush(); + } + + bool readBinaryFile(const char *filePath, MemoryBuffer &output) + { + StringBuffer expandedPath; + const char *actualPath = expandTilde(filePath, expandedPath); + if (!actualPath) + return false; + Owned binaryFile = createIFile(actualPath); + if (!binaryFile->exists()) + return false; + + size32_t fileSize = (size32_t)binaryFile->size(); + Owned fileIO = binaryFile->open(IFOread); + output.reserveTruncate(fileSize); + size32_t bytesRead = fileIO->read(0, fileSize, output.bufferBase()); + output.setLength(bytesRead); + + const char *ext = pathExtension(actualPath); + if (ext && streq(ext, ".gz")) + { + StringBuffer tempOutput; + gunzip((const byte *)output.toByteArray(), output.length(), tempOutput); + output.clear(); + output.append(tempOutput.length(), tempOutput.str()); + } + + return true; + } + + TimingResults performBinaryTimingTestWithResults(const char *testName, const MemoryBuffer &binaryDataBuffer, int iterations, byte flags) + { + assertex(testName); + unsigned binaryDataLen = binaryDataBuffer.length(); + assertex(binaryDataLen > 0); + + CCycleTimer timer; + __uint64 totalDeserializeNs{0}; + __uint64 totalSerializeNs{0}; + + MemoryBuffer streamBufferIn; + MemoryBuffer streamBufferOut; + for (int i = 0; i < iterations; i++) + { + streamBufferIn.clear(); + streamBufferIn.append(binaryDataLen, binaryDataBuffer.toByteArray()); + Owned in = createBufferedSerialInputStream(streamBufferIn); + timer.reset(); + Owned deserializedTree = createPTreeFromBinary(*in, flags); + __uint64 deserializeElapsedNs = timer.elapsedNs(); + Owned copyDeserializedTree = createPTreeFromIPT(deserializedTree); + totalDeserializeNs += deserializeElapsedNs; + + streamBufferOut.clear(); + Owned out = createBufferedSerialOutputStream(streamBufferOut); + timer.reset(); + deserializedTree->serializeToStream(*out); + out->flush(); + __uint64 serializeElapsedNs = timer.elapsedNs(); + totalSerializeNs += serializeElapsedNs; + + // Validation - serialized data matches + if (!areMatchingPTrees(copyDeserializedTree, deserializedTree)) + logPTreeSideBySide("copyDeserializedTree", copyDeserializedTree, "tree", deserializedTree); + CPPUNIT_ASSERT(areMatchingPTrees(copyDeserializedTree, deserializedTree)); + } + + TimingResults results; + results.avgDeserializeTimeMs = (totalDeserializeNs / iterations) / 1e6; + results.avgSerializeTimeMs = (totalSerializeNs / iterations) / 1e6; + results.totalDeserializeTimeMs = totalDeserializeNs / 1e6; + results.totalSerializeTimeMs = totalSerializeNs / 1e6; + results.testName = testName; + results.flags = flags; + + return results; + } + + TimingResults performXmlTimingTestWithResults(const char *testName, const char *xmlData, int iterations, byte flags) + { + assertex(testName); + assertex(xmlData); + + CCycleTimer timer; + __uint64 totalDeserializeNs{0}; + __uint64 totalSerializeNs{0}; + + for (int i = 0; i < iterations; i++) + { + timer.reset(); + Owned tree = createPTreeFromXMLString(xmlData, flags); + __uint64 deserializeElapsedNs = timer.elapsedNs(); + totalDeserializeNs += deserializeElapsedNs; + + timer.reset(); + StringBuffer xmlOutput; + toXML(tree, xmlOutput); + __uint64 serializeElapsedNs = timer.elapsedNs(); + totalSerializeNs += serializeElapsedNs; + } + + TimingResults results; + results.avgDeserializeTimeMs = (totalDeserializeNs / iterations) / 1e6; + results.avgSerializeTimeMs = (totalSerializeNs / iterations) / 1e6; + results.totalDeserializeTimeMs = totalDeserializeNs / 1e6; + results.totalSerializeTimeMs = totalSerializeNs / 1e6; + results.testName = testName; + results.flags = flags; + + return results; + } +}; + +// Test suite for XML timing tests +class PTreeXmlTimingStressTest : public PTreeTimingTestBase +{ + CPPUNIT_TEST_SUITE(PTreeXmlTimingStressTest); + CPPUNIT_TEST(testXmlTimingWithNormalVsLowMem); + CPPUNIT_TEST_SUITE_END(); + +public: + void testXmlTimingWithNormalVsLowMem() + { + constexpr const int iterations{100}; + + // Load XML data + StringBuffer xmlData; + readXmlFile(getXmlFilePath(), xmlData); + unsigned xmlDataLen = (unsigned)strlen(xmlData.str()); + + // Run XML timing tests + TimingResults xmlNormalResults = performXmlTimingTestWithResults("XML Normal", xmlData.str(), iterations, ipt_none); + TimingResults xmlLowMemResults = performXmlTimingTestWithResults("XML Low Memory", xmlData.str(), iterations, ipt_lowmem); + + // Calculate differences + double lowMemDeserializeDiff = ((xmlLowMemResults.avgDeserializeTimeMs - xmlNormalResults.avgDeserializeTimeMs) / xmlNormalResults.avgDeserializeTimeMs) * 100; + double lowMemSerializeDiff = ((xmlLowMemResults.avgSerializeTimeMs - xmlNormalResults.avgSerializeTimeMs) / xmlNormalResults.avgSerializeTimeMs) * 100; + + // Display results + fprintf(stdout, "\n=== XML TIMING COMPARISON TEST ===\n"); + fprintf(stdout, "XML data size: %u bytes\n", xmlDataLen); + fprintf(stdout, "Iterations: %d\n", iterations); + fprintf(stdout, "┌──────────────────────┬─────────────────┬─────────────────┐\n"); + fprintf(stdout, "│ Mode │ Avg Deserialize │ Avg Serialize │\n"); + fprintf(stdout, "│ │ (ms) │ (ms) │\n"); + fprintf(stdout, "├──────────────────────┼─────────────────┼─────────────────┤\n"); + fprintf(stdout, "│ XML Normal │ %15.6f │ %15.6f │\n", + xmlNormalResults.avgDeserializeTimeMs, + xmlNormalResults.avgSerializeTimeMs); + fprintf(stdout, "│ XML Low Memory │ %15.6f │ %15.6f │\n", + xmlLowMemResults.avgDeserializeTimeMs, + xmlLowMemResults.avgSerializeTimeMs); + fprintf(stdout, "│ XML LowMem Diff │ %+14.2f%% │ %+14.2f%% │\n", + lowMemDeserializeDiff, + lowMemSerializeDiff); + fprintf(stdout, "└──────────────────────┴─────────────────┴─────────────────┘\n"); + fflush(stdout); + } +}; + +// Test suite for Binary timing tests +class PTreeBinaryTimingStressTest : public PTreeTimingTestBase +{ + CPPUNIT_TEST_SUITE(PTreeBinaryTimingStressTest); + CPPUNIT_TEST(testBinaryTimingWithNormalVsLowMem); + CPPUNIT_TEST_SUITE_END(); + +public: + void testBinaryTimingWithNormalVsLowMem() + { + constexpr const int iterations{100}; + + // Load Binary data + // If the binary file does not exist then create it from the XML file + MemoryBuffer binaryData; + if (!readBinaryFile(getBinaryFilePath(), binaryData)) + createBinaryDataFromXml(getXmlFilePath(), binaryData); + unsigned binaryDataLen = (unsigned)binaryData.length(); + + // Run Binary timing tests + TimingResults binaryNormalResults = performBinaryTimingTestWithResults("Binary Normal", binaryData, iterations, ipt_none); + TimingResults binaryLowMemResults = performBinaryTimingTestWithResults("Binary Low Memory", binaryData, iterations, ipt_lowmem); + + // Calculate differences + double lowMemDeserializeDiff = ((binaryLowMemResults.avgDeserializeTimeMs - binaryNormalResults.avgDeserializeTimeMs) / binaryNormalResults.avgDeserializeTimeMs) * 100; + double lowMemSerializeDiff = ((binaryLowMemResults.avgSerializeTimeMs - binaryNormalResults.avgSerializeTimeMs) / binaryNormalResults.avgSerializeTimeMs) * 100; + + // Display results + fprintf(stdout, "\n=== BINARY TIMING COMPARISON TEST ===\n"); + fprintf(stdout, "Binary data size: %u bytes\n", binaryDataLen); + fprintf(stdout, "Iterations: %d\n", iterations); + fprintf(stdout, "┌──────────────────────┬─────────────────┬─────────────────┐\n"); + fprintf(stdout, "│ Mode │ Avg Deserialize │ Avg Serialize │\n"); + fprintf(stdout, "│ │ (ms) │ (ms) │\n"); + fprintf(stdout, "├──────────────────────┼─────────────────┼─────────────────┤\n"); + fprintf(stdout, "│ Binary Normal │ %15.6f │ %15.6f │\n", + binaryNormalResults.avgDeserializeTimeMs, + binaryNormalResults.avgSerializeTimeMs); + fprintf(stdout, "│ Binary Low Memory │ %15.6f │ %15.6f │\n", + binaryLowMemResults.avgDeserializeTimeMs, + binaryLowMemResults.avgSerializeTimeMs); + fprintf(stdout, "│ Binary LowMem Diff │ %+14.2f%% │ %+14.2f%% │\n", + lowMemDeserializeDiff, + lowMemSerializeDiff); + fprintf(stdout, "└──────────────────────┴─────────────────┴─────────────────┘\n"); + fflush(stdout); + } +}; + +// Test suite for combined XML and Binary timing tests +class PTreeCombinedTimingStressTest : public PTreeTimingTestBase +{ + CPPUNIT_TEST_SUITE(PTreeCombinedTimingStressTest); + CPPUNIT_TEST(testCombinedXmlAndBinaryTimingWithNormalVsLowMem); + CPPUNIT_TEST_SUITE_END(); + +public: + void testCombinedXmlAndBinaryTimingWithNormalVsLowMem() + { + constexpr const int iterations{100}; + + // Load XML data + StringBuffer xmlData; + readXmlFile(getXmlFilePath(), xmlData); + unsigned xmlDataLen = (unsigned)strlen(xmlData.str()); + + // Load Binary data + // If the binary file does not exist then create it from the XML file + MemoryBuffer binaryData; + if (!readBinaryFile(getBinaryFilePath(), binaryData)) + createBinaryDataFromXml(getXmlFilePath(), binaryData); + unsigned binaryDataLen = (unsigned)binaryData.length(); + + // Run all timing tests + TimingResults xmlNormalResults = performXmlTimingTestWithResults("XML Normal", xmlData.str(), iterations, ipt_none); + TimingResults xmlLowMemResults = performXmlTimingTestWithResults("XML Low Memory", xmlData.str(), iterations, ipt_lowmem); + TimingResults binaryNormalResults = performBinaryTimingTestWithResults("Binary Normal", binaryData, iterations, ipt_none); + TimingResults binaryLowMemResults = performBinaryTimingTestWithResults("Binary Low Memory", binaryData, iterations, ipt_lowmem); + + // Calculate all comparisons + double xmlLowMemDeserializeDiff = ((xmlLowMemResults.avgDeserializeTimeMs - xmlNormalResults.avgDeserializeTimeMs) / xmlNormalResults.avgDeserializeTimeMs) * 100; + double xmlLowMemSerializeDiff = ((xmlLowMemResults.avgSerializeTimeMs - xmlNormalResults.avgSerializeTimeMs) / xmlNormalResults.avgSerializeTimeMs) * 100; + double binaryLowMemDeserializeDiff = ((binaryLowMemResults.avgDeserializeTimeMs - binaryNormalResults.avgDeserializeTimeMs) / binaryNormalResults.avgDeserializeTimeMs) * 100; + double binaryLowMemSerializeDiff = ((binaryLowMemResults.avgSerializeTimeMs - binaryNormalResults.avgSerializeTimeMs) / binaryNormalResults.avgSerializeTimeMs) * 100; + double binaryVsXmlNormalDeserialize = ((binaryNormalResults.avgDeserializeTimeMs - xmlNormalResults.avgDeserializeTimeMs) / xmlNormalResults.avgDeserializeTimeMs) * 100; + double binaryVsXmlNormalSerialize = ((binaryNormalResults.avgSerializeTimeMs - xmlNormalResults.avgSerializeTimeMs) / xmlNormalResults.avgSerializeTimeMs) * 100; + double binaryVsXmlLowMemDeserialize = ((binaryLowMemResults.avgDeserializeTimeMs - xmlLowMemResults.avgDeserializeTimeMs) / xmlLowMemResults.avgDeserializeTimeMs) * 100; + double binaryVsXmlLowMemSerialize = ((binaryLowMemResults.avgSerializeTimeMs - xmlLowMemResults.avgSerializeTimeMs) / xmlLowMemResults.avgSerializeTimeMs) * 100; + + // Display combined results + fprintf(stdout, "\n=== COMBINED XML & BINARY TIMING COMPARISON TEST ===\n"); + fprintf(stdout, "XML data size: %u bytes\n", xmlDataLen); + fprintf(stdout, "Binary data size: %u bytes\n", binaryDataLen); + fprintf(stdout, "Iterations: %d\n", iterations); + fprintf(stdout, "┌──────────────────────┬─────────────────┬─────────────────┐\n"); + fprintf(stdout, "│ Format & Mode │ Avg Deserialize │ Avg Serialize │\n"); + fprintf(stdout, "│ │ (ms) │ (ms) │\n"); + fprintf(stdout, "├──────────────────────┼─────────────────┼─────────────────┤\n"); + fprintf(stdout, "│ XML Normal │ %15.6f │ %15.6f │\n", + xmlNormalResults.avgDeserializeTimeMs, + xmlNormalResults.avgSerializeTimeMs); + fprintf(stdout, "│ XML Low Memory │ %15.6f │ %15.6f │\n", + xmlLowMemResults.avgDeserializeTimeMs, + xmlLowMemResults.avgSerializeTimeMs); + fprintf(stdout, "│ XML LowMem Diff │ %+14.2f%% │ %+14.2f%% │\n", + xmlLowMemDeserializeDiff, + xmlLowMemSerializeDiff); + fprintf(stdout, "├──────────────────────┼─────────────────┼─────────────────┤\n"); + fprintf(stdout, "│ Binary Normal │ %15.6f │ %15.6f │\n", + binaryNormalResults.avgDeserializeTimeMs, + binaryNormalResults.avgSerializeTimeMs); + fprintf(stdout, "│ Binary Low Memory │ %15.6f │ %15.6f │\n", + binaryLowMemResults.avgDeserializeTimeMs, + binaryLowMemResults.avgSerializeTimeMs); + fprintf(stdout, "│ Binary LowMem Diff │ %+14.2f%% │ %+14.2f%% │\n", + binaryLowMemDeserializeDiff, + binaryLowMemSerializeDiff); + fprintf(stdout, "├──────────────────────┼─────────────────┼─────────────────┤\n"); + fprintf(stdout, "│ Binary vs XML Normal │ %+14.2f%% │ %+14.2f%% │\n", + binaryVsXmlNormalDeserialize, + binaryVsXmlNormalSerialize); + fprintf(stdout, "│ Binary vs XML LowMem │ %+14.2f%% │ %+14.2f%% │\n", + binaryVsXmlLowMemDeserialize, + binaryVsXmlLowMemSerialize); + fprintf(stdout, "└──────────────────────┴─────────────────┴─────────────────┘\n"); + fflush(stdout); + } +}; + +// Test suite for profiling Binary deserialization +class PTreeBinaryDeserializationProfilingStressTest : public PTreeTimingTestBase +{ + CPPUNIT_TEST_SUITE(PTreeBinaryDeserializationProfilingStressTest); + CPPUNIT_TEST(testBinaryLowMemDeserializationForProfiling); + CPPUNIT_TEST_SUITE_END(); + +public: + void testBinaryLowMemDeserializationForProfiling() + { + // This test is designed specifically for profiling tools + // It focuses solely on binary deserialization with low memory flags + // Run with: perf record -g ./unittests --test testBinaryLowMemDeserializationForProfiling + // Or with callgrind: valgrind --tool=callgrind ./unittests -e PTreeSerializationDeserializationXmlTimingStressTest + + constexpr const int iterations{1}; // More iterations for better profiling data + constexpr const byte flags{ipt_lowmem}; + + // Load Binary data - DECOMPRESS BEFORE PROFILING STARTS + MemoryBuffer binaryData; + const char *binaryPath = getBinaryFilePath(); + try + { + // Read raw file (without automatic decompression) + if (!readBinaryFileRaw(binaryPath, binaryData)) + createBinaryDataFromXml(getXmlFilePath(), binaryData); + + // Decompress outside the profiled section to exclude gunzip from profiling + if (!decompressIfNeeded(binaryPath, binaryData)) + CPPUNIT_FAIL("Failed to decompress binary data"); + } + catch (IException *e) + { + StringBuffer msg; + e->errorMessage(msg); + e->Release(); + CPPUNIT_FAIL(msg.str()); + } + catch (...) + { + CPPUNIT_FAIL("Failed to load test data files"); + } + + unsigned binaryDataLen = binaryData.length(); + if (binaryDataLen == 0) + CPPUNIT_FAIL("Binary data is empty - test data files not found or empty"); + + DBGLOG("=== BINARY LOW MEMORY DESERIALIZATION PROFILING TEST ==="); + DBGLOG("Binary data size: %u bytes (decompressed)", binaryDataLen); + DBGLOG("Iterations: %d", iterations); + DBGLOG("Flags: 0x%02X (ipt_lowmem)", flags); + DBGLOG("Decompression completed BEFORE profiling begins"); + DBGLOG("This test is optimized for profiling - running deserialization only"); + + CCycleTimer timer; + __uint64 totalDeserializeNs{0}; + + MemoryBuffer streamBufferIn; + try + { + for (int i = 0; i < iterations; i++) + { + streamBufferIn.clear(); + streamBufferIn.append(binaryDataLen, binaryData.toByteArray()); + Owned in = createBufferedSerialInputStream(streamBufferIn); + + timer.reset(); +#ifdef CALLGRIND_PROFILING + CALLGRIND_START_INSTRUMENTATION; + CALLGRIND_TOGGLE_COLLECT; +#endif + Owned deserializedTree = createPTreeFromBinary(*in, flags); +#ifdef CALLGRIND_PROFILING + CALLGRIND_TOGGLE_COLLECT; + CALLGRIND_STOP_INSTRUMENTATION; +#endif + __uint64 deserializeElapsedNs = timer.elapsedNs(); + totalDeserializeNs += deserializeElapsedNs; + + // Keep a reference to prevent optimization + CPPUNIT_ASSERT(deserializedTree != nullptr); + } + } + catch (IException *e) + { + StringBuffer msg; + e->errorMessage(msg); + e->Release(); + CPPUNIT_FAIL(msg.str()); + } + catch (...) + { + CPPUNIT_FAIL("Unexpected exception during deserialization"); + } + + double avgDeserializeTimeMs = (totalDeserializeNs / iterations) / 1e6; + double totalDeserializeTimeMs = totalDeserializeNs / 1e6; + + DBGLOG("=== PROFILING TEST RESULTS ==="); + DBGLOG("Total deserialize time: %.6f ms", totalDeserializeTimeMs); + DBGLOG("Average deserialize time: %.6f ms", avgDeserializeTimeMs); + DBGLOG("Iterations per second: %.2f", 1000.0 / avgDeserializeTimeMs); + DBGLOG("=== PROFILING TEST COMPLETED ==="); + } + +private: + bool readBinaryFileRaw(const char *filePath, MemoryBuffer &output) + { + // Read file without decompression - for profiling tests + StringBuffer expandedPath; + const char *actualPath = expandTilde(filePath, expandedPath); + if (!actualPath) + return false; + Owned binaryFile = createIFile(actualPath); + if (!binaryFile->exists()) + return false; + + size32_t fileSize = (size32_t)binaryFile->size(); + Owned fileIO = binaryFile->open(IFOread); + output.reserveTruncate(fileSize); + size32_t bytesRead = fileIO->read(0, fileSize, output.bufferBase()); + output.setLength(bytesRead); + + return true; + } + + bool decompressIfNeeded(const char *filePath, MemoryBuffer &data) + { + // Decompress if file has .gz extension + StringBuffer expandedPath; + const char *actualPath = expandTilde(filePath, expandedPath); + if (!actualPath) + return false; + + const char *ext = pathExtension(actualPath); + if (ext && streq(ext, ".gz")) + { + StringBuffer tempOutput; + gunzip((const byte *)data.toByteArray(), data.length(), tempOutput); + data.clear(); + data.append(tempOutput.length(), tempOutput.str()); + } + + return true; + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(PTreeXmlTimingStressTest); +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(PTreeXmlTimingStressTest, "PTreeXmlTimingStressTest"); +CPPUNIT_TEST_SUITE_REGISTRATION(PTreeBinaryTimingStressTest); +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(PTreeBinaryTimingStressTest, "PTreeBinaryTimingStressTest"); +CPPUNIT_TEST_SUITE_REGISTRATION(PTreeCombinedTimingStressTest); +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(PTreeCombinedTimingStressTest, "PTreeCombinedTimingStressTest"); +CPPUNIT_TEST_SUITE_REGISTRATION(PTreeBinaryDeserializationProfilingStressTest); +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(PTreeBinaryDeserializationProfilingStressTest, "PTreeBinaryDeserializationProfilingStressTest"); + #include "jdebug.hpp" #include "jmutex.hpp" #include diff --git a/testing/unittests/unittests.cpp b/testing/unittests/unittests.cpp index cad22ec1832..d434f5826ad 100644 --- a/testing/unittests/unittests.cpp +++ b/testing/unittests/unittests.cpp @@ -277,7 +277,36 @@ int main(int argc, const char *argv[]) } } } - wasSuccessful = list || runner.run( "", false ); + try + { + wasSuccessful = list || runner.run( "", false ); + } + catch (IException *e) + { + StringBuffer msg; + msg.append("Uncaught IException while running tests: "); + e->errorMessage(msg); + EXCLOG(e, msg.str()); + e->Release(); + wasSuccessful = false; + } + catch (CppUnit::Exception &e) + { + // CppUnit exceptions don't inherit from std::exception in older versions + fprintf(stderr, "Uncaught CppUnit::Exception while running tests: %s\n", e.what()); + wasSuccessful = false; + } + catch (std::exception &e) + { + fprintf(stderr, "Uncaught std::exception while running tests: %s\n", e.what()); + wasSuccessful = false; + } + catch (...) + { + // Unknown exception types (e.g., thrown across DLL boundaries) + fprintf(stderr, "Uncaught unknown exception while running tests\n"); + wasSuccessful = false; + } } releaseAtoms(); ClearTypeCache(); // Clear this cache before the file hooks are unloaded @@ -971,7 +1000,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture for (int a = 0; a < 201; a++) ra[a] = 0; - + class T : public CThreaded { public: @@ -1037,7 +1066,7 @@ class RelaxedAtomicTimingTest : public CppUnit::TestFixture CriticalSection &lock; } t1a(count, ra[0], lock[0], mode), t2a(count, ra[0], lock[0], mode), t3a(count, ra[0], lock[0], mode), t1b(count, ra[0], lock[0], mode), t2b(count, ra[1], lock[1], mode), t3b(count, ra[2], lock[2], mode), - t1c(count, ra[0], lock[0], mode), t2c(count, ra[100], lock[100], mode), t3c(count, ra[200], lock[200], mode);; + t1c(count, ra[0], lock[0], mode), t2c(count, ra[100], lock[100], mode), t3c(count, ra[200], lock[200], mode);; DBGLOG("Testing RelaxedAtomics (test mode %u)", mode); t1a.start(false); t2a.start(false); @@ -1080,7 +1109,7 @@ class compressToBufferTest : public CppUnit::TestFixture bool testOne(unsigned len, CompressionMethod method, bool prevResult, const char *options=nullptr) { - constexpr const char *in = + constexpr const char *in = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello" "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello" "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello" @@ -1119,7 +1148,7 @@ class compressToBufferTest : public CppUnit::TestFixture DBGLOG("compressToBuffer %x size %u did not compress", (byte) method, len); ret = false; } - else + else { if (!prevResult) DBGLOG("compressToBuffer %x size %u compressed to %u in %lluns", (byte) method, len, compressed.length(), start.elapsedNs()); From b2f210ee22ed50c07c478af4e124d19f20eab7fd Mon Sep 17 00:00:00 2001 From: Dave Streeter Date: Fri, 31 Oct 2025 12:02:18 +0000 Subject: [PATCH 2/5] Added DedupKeyValuePairsTimingPerformanceStressTest to find the quickest dedup method for key/value pairs. Signed-off-by: Dave Streeter --- system/jlib/jptree.cpp | 17 +- system/jlib/jstream.cpp | 2 +- system/jlib/jstream.hpp | 2 +- testing/unittests/jlibtests.cpp | 232 ++++++++++-------------- testing/unittests/jlibtests2.cpp | 297 ++++++++++++++++++++++++++++++- 5 files changed, 402 insertions(+), 148 deletions(-) diff --git a/system/jlib/jptree.cpp b/system/jlib/jptree.cpp index 306360a5073..b6c32291d9e 100644 --- a/system/jlib/jptree.cpp +++ b/system/jlib/jptree.cpp @@ -3093,7 +3093,7 @@ void PTree::deserializeSelf(IBufferedSerialInputStream &src) // Read attributes until we encounter a zero byte (attribute list terminator) attrStringOffsets.clear(); - const char * base = peekAttributePairList(attrStringOffsets, src, skipLen); + const char * base = peekKeyValuePairList(attrStringOffsets, src, skipLen); if (base) // there is at least one attribute to process { setAttribute(base, attrStringOffsets); @@ -4153,7 +4153,16 @@ void CAtomPTree::setAttribute(const char *base, const std::vector &off if (numStringOffsets % 2 != 0) throwUnexpectedX("setAttribute error: offsets vector must contain pairs of key/value offsets"); - +/* + // Deduplicate keys in input + std::unordered_set seenKeys; + for (size_t i = 0; i < numStringOffsets; i += 2) + { + const char *key = base + offsets[i]; + if (key) + seenKeys.insert(i); + } +*/ // All attributes are treated as new i.e. do not exist in the tree already // Allocate them in one call size_t numNewAttributes = numStringOffsets / 2; @@ -4167,7 +4176,7 @@ void CAtomPTree::setAttribute(const char *base, const std::vector &off freeAttrArray(attrs, numAttrs); } - // process new additions only + // process offsets as new attributes size_t newAttributeIndex{0}; for (size_t i = 0; i < numStringOffsets; i += 2) { @@ -4176,8 +4185,6 @@ void CAtomPTree::setAttribute(const char *base, const std::vector &off if (!key || !validateXMLTag(key+1)) throw MakeIPTException(-1, "Invalid xml attribute: %s", ('\0'==*key ? "(null)" : key)); - if (findAttribute(key)) - fprintf(stderr, "DJPS found existing attribute key=\"%s\"\n", key); if (!val) val = ""; // cannot have NULL value diff --git a/system/jlib/jstream.cpp b/system/jlib/jstream.cpp index 2fb56d05e08..fbaccade3d2 100644 --- a/system/jlib/jstream.cpp +++ b/system/jlib/jstream.cpp @@ -568,7 +568,7 @@ const char * peekStringList(std::vector & matchOffsets, IBufferedSeria } } -const char * peekAttributePairList(std::vector & matchOffsets, IBufferedSerialInputStream & in, size32_t & len) +const char * peekKeyValuePairList(std::vector & matchOffsets, IBufferedSerialInputStream & in, size32_t & len) { size32_t scanned = 0; size32_t startNext = 0; diff --git a/system/jlib/jstream.hpp b/system/jlib/jstream.hpp index 1e61df33850..0e9d7861aef 100644 --- a/system/jlib/jstream.hpp +++ b/system/jlib/jstream.hpp @@ -77,7 +77,7 @@ extern jlib_decl const char * peekStringList(std::vector & matches, IB //Return a vector of offsets of the starts of null terminated Attribute Name/Value strings (Value can be empty string) // - terminated by a null string or end of file. //Returns a pointer to the base string if valid. -extern jlib_decl const char * peekAttributePairList(std::vector & matches, IBufferedSerialInputStream & in, size32_t & len); +extern jlib_decl const char * peekKeyValuePairList(std::vector & matches, IBufferedSerialInputStream & in, size32_t & len); interface ISerialOutputStream : extends IInterface { diff --git a/testing/unittests/jlibtests.cpp b/testing/unittests/jlibtests.cpp index 511adc68510..ca8e2cc0fdd 100644 --- a/testing/unittests/jlibtests.cpp +++ b/testing/unittests/jlibtests.cpp @@ -3628,14 +3628,11 @@ IPropertyTree *createBinaryDataCompressionTestPTree() */ // Helper: log two property trees as XML and produce a side-by-side text view to help diffing -static void logPTreeSideBySide(const char *leftLabel, IPropertyTree *left, const char *rightLabel, IPropertyTree *right) +static void logPTreeSideBySide(const char *leftLabel, IPropertyTree *leftTree, const char *rightLabel, IPropertyTree *rightTree) { StringBuffer leftXml, rightXml; - toXML(left, leftXml.clear(), 2, XML_Format|XML_SortTags); - toXML(right, rightXml.clear(), 2, XML_Format|XML_SortTags); - - DBGLOG("PTree %s (full XML):\n%s", leftLabel, leftXml.str()); - DBGLOG("PTree %s (full XML):\n%s", rightLabel, rightXml.str()); + toXML(leftTree, leftXml.clear(), 2, XML_Format|XML_SortTags); + toXML(rightTree, rightXml.clear(), 2, XML_Format|XML_SortTags); std::vector leftLines; std::vector rightLines; @@ -3652,6 +3649,62 @@ static void logPTreeSideBySide(const char *leftLabel, IPropertyTree *left, const rightLines.push_back(line); } + // Reformat left side so attributes appear on separate indented lines (to match common multiline formatting) + std::vector formattedLeftLines; + for (const auto &rawLine : leftLines) + { + std::string s = rawLine; + size_t lt = s.find('<'); + if (lt != std::string::npos && lt + 1 < s.size() && s[lt+1] != '/' && s.find('"') != std::string::npos) + { + // locate end of tag (position of '>') + size_t endPos = s.rfind('>'); + if (endPos == std::string::npos) + { + formattedLeftLines.push_back(s); + continue; + } + // find end of tag name + size_t pos = lt + 1; + while (pos < s.size() && !isspace((unsigned char)s[pos]) && s[pos] != '>' && s[pos] != '/') ++pos; + // first line: opening tag name + std::string tagName = s.substr(lt, pos - lt); + formattedLeftLines.push_back(tagName); + + // attribute section between pos and endPos + std::string attrs = (pos < endPos) ? s.substr(pos, endPos - pos) : std::string(); + size_t i = 0; + while (i < attrs.size()) + { + // skip whitespace + while (i < attrs.size() && isspace((unsigned char)attrs[i])) ++i; + if (i >= attrs.size()) break; + size_t start = i; + bool inQuote = false; + while (i < attrs.size()) + { + if (attrs[i] == '"') inQuote = !inQuote; + if (!inQuote && isspace((unsigned char)attrs[i])) break; + ++i; + } + std::string token = attrs.substr(start, i - start); + if (!token.empty()) + formattedLeftLines.push_back(std::string(2, ' ') + token); + } + + // closing marker + if (endPos > 0 && s[endPos - 1] == '/') + formattedLeftLines.push_back(std::string("/>") ); + else + formattedLeftLines.push_back(std::string(">") ); + } + else + { + formattedLeftLines.push_back(s); + } + } + leftLines.swap(formattedLeftLines); + size_t maxLines = leftLines.size() > rightLines.size() ? leftLines.size() : rightLines.size(); StringBuffer sb; sb.appendf("===== %s (expected) %s %s (actual) =====\n", leftLabel, "", rightLabel); @@ -3766,8 +3819,7 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture Owned deserializedCreatePTreeFromBinaryWithCreator = createPTreeFromBinary(*in3, nodeCreator); DBGLOG("=== createPTreeFromBinary() with custom nodeCreator completed ==="); - // Validation - verify both deserialized trees are equivalent to the original - DBGLOG("=== Starting tree comparisons ==="); + // Validation - verify both deserialized trees are equivalent to the original, log out any differences if (!areMatchingPTrees(originalTree, memoryBufferDeserialized)) logPTreeSideBySide("originalTree", originalTree, "memoryBufferDeserialized", memoryBufferDeserialized); CPPUNIT_ASSERT(areMatchingPTrees(originalTree, memoryBufferDeserialized)); @@ -3786,11 +3838,10 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture logPTreeSideBySide("originalTree", originalTree, "deserializedCreatePTreeFromBinaryWithCreator", deserializedCreatePTreeFromBinaryWithCreator); CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator)); - DBGLOG("=== Tree comparisons completed ==="); + DBGLOG("=== ROUND-TRIP TEST FOR: %s ===", testName); double serializeTimeMs = serializeElapsedNs / 1e6; double serializeToStreamTimeMs = serializeToStreamElapsedNs / 1e6; - DBGLOG("=== ROUND-TRIP TEST STARTED FOR: %s ===", testName); DBGLOG("=== SERIALIZATION TEST RESULTS:"); DBGLOG(" serialize() time: %.6f ms", serializeTimeMs); DBGLOG(" serializeToStream() time: %.6f ms", serializeToStreamTimeMs); @@ -3891,152 +3942,53 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture MemoryBuffer attrData; std::vector offsets; - // Test 1: Add new attributes (tests "new attribute" code path) - DBGLOG("=== Test 1: Adding new attributes ==="); - - // Add attribute: @newAttr1="newValue1" - offsets.push_back(attrData.length()); - attrData.append("@newAttr1").append('\0'); + // Test: Add attributes + DBGLOG("=== Test: Adding new attributes ==="); - offsets.push_back(attrData.length()); - attrData.append("newValue1").append('\0'); - - // Add attribute: @newAttr2="newValue2" - offsets.push_back(attrData.length()); - attrData.append("@newAttr2").append('\0'); + // Add 10 attributes (some values intentionally duplicated) + // Format: key, value, key, value, ... + auto addAttr = [&](const char *k, const char *v) + { + offsets.push_back(attrData.length()); + attrData.append(k).append('\0'); + offsets.push_back(attrData.length()); + attrData.append(v ? v : "").append('\0'); + }; - offsets.push_back(attrData.length()); - attrData.append("newValue2").append('\0'); + addAttr("@newAttr1", "newValue1"); + addAttr("@newAttr2", "newValue2"); + addAttr("@newAttr3", ""); + addAttr("@newAttr4", "dupValue1"); // Duplicate key + addAttr("@newAttr5", "newValue3"); + addAttr("@newAttr4", "dupValue2"); // Duplicate key + addAttr("@newAttr6", "newValue4"); + addAttr("@newAttr7", "newValue5"); + addAttr("@newAttr8", "newValue6"); + addAttr("@newAttr9", "newValue7"); + addAttr("@newAttr10", ""); DBGLOG("Calling setAttribute with %u offsets", (unsigned)offsets.size()); // Call setAttribute with the vector (testing new attribute insertion) const char *base = (const char *)attrData.toByteArray(); ptree->setAttribute(base, offsets); - DBGLOG("setAttribute completed for new attributes"); - // Verify new attributes were added using areMatchingPTrees Owned expected1 = createPTree("TestRoot", flags); expected1->setProp("@newAttr1", "newValue1"); expected1->setProp("@newAttr2", "newValue2"); + expected1->setProp("@newAttr3", ""); + expected1->setProp("@newAttr4", "dupValue2"); + expected1->setProp("@newAttr5", "newValue3"); + expected1->setProp("@newAttr6", "newValue4"); + expected1->setProp("@newAttr7", "newValue5"); + expected1->setProp("@newAttr8", "newValue6"); + expected1->setProp("@newAttr9", "newValue7"); + expected1->setProp("@newAttr10", ""); if (!areMatchingPTrees(expected1, tree)) logPTreeSideBySide("expected1", expected1, "tree", tree); CPPUNIT_ASSERT_MESSAGE("Test 1: Tree doesn't match expected after adding new attributes", areMatchingPTrees(expected1, tree)); - DBGLOG("Test 1 passed"); - - // Test 2: Update existing attributes (tests "update existing attribute" code path) - DBGLOG("=== Test 2: Updating existing attributes ==="); - attrData.clear(); - offsets.clear(); - - // Update attribute: @newAttr1="updatedValue1" - offsets.push_back(attrData.length()); - attrData.append("@newAttr1").append('\0'); - - offsets.push_back(attrData.length()); - attrData.append("updatedValue1").append('\0'); - - // Add one more new attribute: @newAttr3="newValue3" - offsets.push_back(attrData.length()); - attrData.append("@newAttr3").append('\0'); - - offsets.push_back(attrData.length()); - attrData.append("newValue3").append('\0'); - - DBGLOG("Calling setAttribute to update 1 existing + add 1 new"); - - // Call setAttribute again (testing both update and new attribute paths) - base = (const char *)attrData.toByteArray(); - ptree->setAttribute(base, offsets); - - DBGLOG("setAttribute completed for updates"); - - // Verify updates using areMatchingPTrees - Owned expected2 = createPTree("TestRoot", flags); - expected2->setProp("@newAttr1", "updatedValue1"); // Updated value - expected2->setProp("@newAttr2", "newValue2"); // Unchanged from Test 1 - expected2->setProp("@newAttr3", "newValue3"); // Newly added - if (!areMatchingPTrees(expected2, tree)) - logPTreeSideBySide("expected2", expected2, "tree", tree); - CPPUNIT_ASSERT_MESSAGE("Test 2: Tree doesn't match expected after updating attributes", areMatchingPTrees(expected2, tree)); - - DBGLOG("Test 2 passed"); - - // Test 3: Update attributes that were added via setProp (exercises the existing attribute update path) - DBGLOG("=== Test 3: Testing setAttribute on pre-existing attributes ==="); - - // Add attributes using traditional setProp method - tree->setProp("@preExisting", "original"); - - // Now update it using setAttribute with vector - attrData.clear(); - offsets.clear(); - - offsets.push_back(attrData.length()); - attrData.append("@preExisting").append('\0'); - offsets.push_back(attrData.length()); - attrData.append("modified").append('\0'); - - DBGLOG("Calling setAttribute to update pre-existing attribute"); - - base = (const char *)attrData.toByteArray(); - ptree->setAttribute(base, offsets); - - // Prepare the expected tree for comparison (expected3) - Owned expected3 = createPTree("TestRoot", flags); - expected3->setProp("@newAttr1", "updatedValue1"); - expected3->setProp("@newAttr2", "newValue2"); - expected3->setProp("@newAttr3", "newValue3"); - expected3->setProp("@preExisting", "modified"); // Updated via setAttribute - - // Log any differences between expected and actual tree. (No intentional mutation.) - - // Log the entire expected and actual trees side-by-side for easy comparison. - { - StringBuffer expXml, actXml; - toXML(expected3, expXml.clear(), 2, XML_Format|XML_SortTags); - toXML(tree, actXml.clear(), 2, XML_Format|XML_SortTags); - - // Log full trees separately first (helpful on their own) - DBGLOG("PTree expected (full XML):\n%s", expXml.str()); - DBGLOG("PTree actual (full XML):\n%s", actXml.str()); - - // Build side-by-side view by splitting into lines - std::vector expLines; - std::vector actLines; - { - std::istringstream es(expXml.str()); - std::string line; - while (std::getline(es, line)) - expLines.push_back(line); - } - { - std::istringstream as(actXml.str()); - std::string line; - while (std::getline(as, line)) - actLines.push_back(line); - } - - size_t maxLines = expLines.size() > actLines.size() ? expLines.size() : actLines.size(); - StringBuffer sideBySide; - sideBySide.append("===== Expected ============================== | ===== Actual ================================\n"); - for (size_t i = 0; i < maxLines; ++i) - { - const char *el = (i < expLines.size()) ? expLines[i].c_str() : ""; - const char *al = (i < actLines.size()) ? actLines[i].c_str() : ""; - // fixed-width column for expected side to ease visual diffing - sideBySide.appendf("%-80s | %s\n", el, al); - } - sideBySide.append("===== End side-by-side =============================================================\n"); - DBGLOG("PTree side-by-side comparison:\n%s", sideBySide.str()); - } - - // Verify the update worked using areMatchingPTrees - if (!areMatchingPTrees(expected3, tree)) - logPTreeSideBySide("expected3", expected3, "tree", tree); - CPPUNIT_ASSERT_MESSAGE("Test 3: Tree doesn't match expected after updating pre-existing attribute", areMatchingPTrees(expected3, tree)); + DBGLOG("=== Test: Adding new attributes passed ==="); - DBGLOG("Test 3 passed"); DBGLOG("=== testSetAttributeWithVector completed successfully ==="); } catch (IException *e) diff --git a/testing/unittests/jlibtests2.cpp b/testing/unittests/jlibtests2.cpp index 206a2e2b599..5d265f7254a 100644 --- a/testing/unittests/jlibtests2.cpp +++ b/testing/unittests/jlibtests2.cpp @@ -25,9 +25,10 @@ #include #include #include -#include #include #include +#include +#include #include #include "jsem.hpp" @@ -1166,4 +1167,298 @@ CPPUNIT_TEST_SUITE_REGISTRATION(IOURingTest); CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(IOURingTest, "IOURingTest"); +// Deduplication performance tests +class DedupKeyValuePairsTimingPerformanceStressTest : public CppUnit::TestFixture +{ +public: + CPPUNIT_TEST_SUITE(DedupKeyValuePairsTimingPerformanceStressTest); + CPPUNIT_TEST(testDedupMethods); + CPPUNIT_TEST_SUITE_END(); + + // Keep policy: keep first occurrence of a key (lowest original index) + using KV = std::pair; + + static std::vector buildDataset(size_t targetSize) + { + // For small target sizes keep the original handcrafted base to preserve existing test expectations + if (targetSize <= 11) + { + std::vector base = { + {"@newAttr1", "newValue1"}, + {"@newAttr2", "newValue2"}, + {"@newAttr3", ""}, + {"@newAttr4", "dupValue1"}, + {"@newAttr5", "newValue3"}, + {"@newAttr4", "dupValue2"}, + {"@newAttr6", "newValue4"}, + {"@newAttr7", "newValue5"}, + {"@newAttr8", "newValue6"}, + {"@newAttr9", "newValue7"}, + {"@newAttr10", ""}}; + std::vector out; + out.reserve(targetSize); + while (out.size() < targetSize) + { + size_t take = std::min(base.size(), targetSize - out.size()); + out.insert(out.end(), base.begin(), base.begin() + take); + } + return out; + } + + // For larger target sizes, generate mostly-unique keys, with ~1% duplicates and ~1% empty values. + std::vector out; + out.reserve(targetSize); + + // Deterministic RNG so results are reproducible + std::mt19937_64 rng(123456789); + for (size_t i = 0; i < targetSize; ++i) + { + std::string key = std::string("@newAttr") + std::to_string(i + 1); + std::string val = std::string("value") + std::to_string(i + 1); + out.emplace_back(std::move(key), std::move(val)); + } + + // Introduce roughly 1% duplicate keys + size_t dupCount = std::max(1, targetSize / 100); + std::uniform_int_distribution distIndex(0, targetSize - 1); + for (size_t d = 0; d < dupCount; ++d) + { + size_t idx = distIndex(rng); + size_t src = distIndex(rng); + if (src == idx) + src = (src + 1) % targetSize; + out[idx].first = out[src].first; + } + + // Introduce roughly 1% empty-string values + size_t emptyCount = std::max(1, targetSize / 100); + for (size_t e = 0; e < emptyCount; ++e) + { + size_t idx = distIndex(rng); + out[idx].second = std::string(); // empty value + } + + return out; + } + + // Linear dedup: for each element, search result vector for key (O(n^2) worst) + static std::vector linearDedup(const std::vector &input) + { + std::vector result; + result.reserve(input.size()); + for (const auto &kv : input) + { + bool found = false; + for (const auto &r : result) + { + if (r.first == kv.first) + { + found = true; + break; + } + } + if (!found) + result.push_back(kv); + } + return result; + } + + // Hashmap dedup: keep first occurrence using unordered_map + static std::vector hashmapDedup(const std::vector &input) + { + std::unordered_map> map; // key -> (index,value) + map.reserve(input.size() * 2); + for (size_t i = 0; i < input.size(); ++i) + { + const auto &kv = input[i]; + auto it = map.find(kv.first); + if (it == map.end()) + map.emplace(kv.first, std::make_pair(i, kv.second)); + } + // reconstruct vector preserving original order + std::vector> tmp; + tmp.reserve(map.size()); + for (auto &p : map) + tmp.push_back({p.second.first, {p.first, p.second.second}}); + std::sort(tmp.begin(), tmp.end(), [](auto &a, auto &b) + { return a.first < b.first; }); + std::vector out; + out.reserve(tmp.size()); + for (auto &t : tmp) + out.push_back(t.second); + return out; + } + + // Insert-sort dedup: maintain sorted-by-key vector of (key,index,value) and insert each element keeping first index + static std::vector insertSortDedup(const std::vector &input) + { + struct Item + { + std::string key; + size_t idx; + std::string val; + }; + std::vector sorted; + sorted.reserve(input.size()); + for (size_t i = 0; i < input.size(); ++i) + { + const auto &kv = input[i]; + auto it = std::lower_bound(sorted.begin(), sorted.end(), kv.first, [](const Item &a, const std::string &k) + { return a.key < k; }); + if (it != sorted.end() && it->key == kv.first) + { + // already have a first occurrence (lower index) -> skip + continue; + } + // insert maintaining sort + Item item{kv.first, i, kv.second}; + sorted.insert(it, std::move(item)); + } + // sorted currently by key; to preserve original order, sort by idx + std::sort(sorted.begin(), sorted.end(), [](const Item &a, const Item &b) + { return a.idx < b.idx; }); + std::vector out; + out.reserve(sorted.size()); + for (auto &it : sorted) + out.push_back({it.key, it.val}); + return out; + } + + // Sort+unique dedup: stable select first by original index + static std::vector sortUniqueDedup(const std::vector &input) + { + struct Item + { + std::string key; + size_t idx; + std::string val; + }; + std::vector tmp; + tmp.reserve(input.size()); + for (size_t i = 0; i < input.size(); ++i) + tmp.push_back({input[i].first, i, input[i].second}); + std::sort(tmp.begin(), tmp.end(), [](const Item &a, const Item &b) + { if (a.key!=b.key) return a.key < b.key; return a.idx < b.idx; }); + std::vector uniqueItems; + uniqueItems.reserve(tmp.size()); + for (size_t i = 0; i < tmp.size(); ++i) + { + if (i == 0 || tmp[i].key != tmp[i - 1].key) + uniqueItems.push_back(tmp[i]); + } + std::sort(uniqueItems.begin(), uniqueItems.end(), [](const Item &a, const Item &b) + { return a.idx < b.idx; }); + std::vector out; + out.reserve(uniqueItems.size()); + for (auto &it : uniqueItems) + out.push_back({it.key, it.val}); + return out; + } + + void testDedupMethods() + { + try + { + std::vector sizes = {5, 11, 20, 50, 110, 200, 300, 400, 500, 1100, 5500, 10000, 20000, 50000}; + + // Number of repetitions to average + const unsigned runs = 10; // number of repetitions to average + + // Print a single header for the table (report runs dynamically) + DBGLOG("Dedup timings (microseconds) (avg of %u runs) - methods: linear, hashmap, insertSort, sortUnique", runs); + DBGLOG("%-10s | %-10s | %-10s || %-10s | %-10s | %-12s | %-12s", "Size", "uniqueKeys", "emptyStrs", "linear", "hashmap", "insertSort", "sortUnique"); + DBGLOG("-------------------------------------------------------------------------------------------"); + + for (size_t sz : sizes) + { + uint64_t total_linear = 0, total_hash = 0, total_insert = 0, total_sortunique = 0; + using clk = std::chrono::high_resolution_clock; + + // Build input once and derive the canonical unique-key count from a deterministic run + auto data = buildDataset(sz); + size_t uniqueKeys = linearDedup(data).size(); + // count empty-string values in the dataset + size_t emptyCount = 0; + for (const auto &kv : data) + if (kv.second.empty()) + ++emptyCount; + + for (unsigned run = 0; run < runs; ++run) + { + auto t0 = clk::now(); + auto r_linear = linearDedup(data); + auto t1 = clk::now(); + auto r1 = std::chrono::duration_cast(t1 - t0).count(); + total_linear += (uint64_t)r1; + + t0 = clk::now(); + auto r_hash = hashmapDedup(data); + t1 = clk::now(); + auto r2 = std::chrono::duration_cast(t1 - t0).count(); + total_hash += (uint64_t)r2; + + t0 = clk::now(); + auto r_insert = insertSortDedup(data); + t1 = clk::now(); + auto r3 = std::chrono::duration_cast(t1 - t0).count(); + total_insert += (uint64_t)r3; + + t0 = clk::now(); + auto r_sortunique = sortUniqueDedup(data); + t1 = clk::now(); + auto r4 = std::chrono::duration_cast(t1 - t0).count(); + total_sortunique += (uint64_t)r4; + // Verify results all have the same keys in same order (first-occurrence policy) + CPPUNIT_ASSERT(r_linear.size() == r_hash.size()); + CPPUNIT_ASSERT(r_linear.size() == r_insert.size()); + CPPUNIT_ASSERT(r_linear.size() == r_sortunique.size()); + for (size_t i = 0; i < r_linear.size(); ++i) + { + CPPUNIT_ASSERT(r_linear[i].first == r_hash[i].first); + CPPUNIT_ASSERT(r_linear[i].first == r_insert[i].first); + CPPUNIT_ASSERT(r_linear[i].first == r_sortunique[i].first); + } + } + + uint64_t avg_linear = total_linear / runs; + uint64_t avg_hash = total_hash / runs; + uint64_t avg_insert = total_insert / runs; + uint64_t avg_sortunique = total_sortunique / runs; + + // determine fastest method (lowest average) - denoted with a * + uint64_t fastestTime = std::min({avg_linear, avg_hash, avg_insert, avg_sortunique}); + std::string s_linear = std::to_string((unsigned long long)avg_linear) + (avg_linear == fastestTime ? "*" : ""); + std::string s_hash = std::to_string((unsigned long long)avg_hash) + (avg_hash == fastestTime ? "*" : ""); + std::string s_insert = std::to_string((unsigned long long)avg_insert) + (avg_insert == fastestTime ? "*" : ""); + std::string s_sortunique = std::to_string((unsigned long long)avg_sortunique) + (avg_sortunique == fastestTime ? "*" : ""); + + DBGLOG("%-10zu | %-10zu | %-10zu || %-12s | %-12s | %-12s | %-12s", sz, + uniqueKeys, + emptyCount, + s_linear.c_str(), s_hash.c_str(), s_insert.c_str(), s_sortunique.c_str()); + } + DBGLOG("Note: '*' denotes fastest dedup time (lowest average)."); + } + catch (IException *e) + { + StringBuffer msg; + e->errorMessage(msg); + e->Release(); + CPPUNIT_FAIL(msg.str()); + } + catch (std::exception &e) + { + CPPUNIT_FAIL(e.what()); + } + catch (...) + { + CPPUNIT_FAIL("Unknown exception in testDedupMethods"); + } + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(DedupKeyValuePairsTimingPerformanceStressTest); +CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(DedupKeyValuePairsTimingPerformanceStressTest, "DedupKeyValuePairsTimingPerformanceStressTest"); + + #endif // _USE_CPPUNIT From 216db51234f29763387deb5e0227a6a56f66c419 Mon Sep 17 00:00:00 2001 From: Dave Streeter Date: Fri, 31 Oct 2025 15:20:05 +0000 Subject: [PATCH 3/5] 1.Dedupe commend 2.ifdef to switch deserializeSelf code Signed-off-by: Dave Streeter --- system/jlib/jptree.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/system/jlib/jptree.cpp b/system/jlib/jptree.cpp index b6c32291d9e..7789110ee38 100644 --- a/system/jlib/jptree.cpp +++ b/system/jlib/jptree.cpp @@ -3092,6 +3092,8 @@ void PTree::deserializeSelf(IBufferedSerialInputStream &src) read(src, flags); // Read attributes until we encounter a zero byte (attribute list terminator) +#define HPCC_35246 +#ifdef HPCC_35246 attrStringOffsets.clear(); const char * base = peekKeyValuePairList(attrStringOffsets, src, skipLen); if (base) // there is at least one attribute to process @@ -3101,7 +3103,7 @@ void PTree::deserializeSelf(IBufferedSerialInputStream &src) } else throwUnexpectedX("PTree deserialization error: end of stream, expected attribute name"); -/* DJPS +#else for (;;) { NextByteStatus status = isNextByteZero(src); @@ -3123,7 +3125,7 @@ void PTree::deserializeSelf(IBufferedSerialInputStream &src) src.skip(skipLen + 1); // +1 to skip over second null terminator. } -*/ +#endif if (value) delete value; size32_t size{0}; @@ -4147,22 +4149,20 @@ no findAttribute() call for existing attributes */ void CAtomPTree::setAttribute(const char *base, const std::vector &offsets) { + /** + * De-duplication of attributes: + * The attributes name is unique. However there maybe duplicates. + * Since only the most recent key is retrieved we can ignore duplicates. + * This will mean increased memory usage for the duplicates. + * However it will have a significant time saving on deserialization. + */ size_t numStringOffsets = offsets.size(); if (numStringOffsets < 2) return; if (numStringOffsets % 2 != 0) throwUnexpectedX("setAttribute error: offsets vector must contain pairs of key/value offsets"); -/* - // Deduplicate keys in input - std::unordered_set seenKeys; - for (size_t i = 0; i < numStringOffsets; i += 2) - { - const char *key = base + offsets[i]; - if (key) - seenKeys.insert(i); - } -*/ + // All attributes are treated as new i.e. do not exist in the tree already // Allocate them in one call size_t numNewAttributes = numStringOffsets / 2; From ec3e63fd482b3a846224c804fd385851c8f2122c Mon Sep 17 00:00:00 2001 From: Dave Streeter Date: Fri, 31 Oct 2025 16:24:33 +0000 Subject: [PATCH 4/5] Resolve duplicate data failing PTreeSerializationDeserializationTest Signed-off-by: Dave Streeter --- testing/unittests/jlibtests.cpp | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/testing/unittests/jlibtests.cpp b/testing/unittests/jlibtests.cpp index ca8e2cc0fdd..06fdbb904d6 100644 --- a/testing/unittests/jlibtests.cpp +++ b/testing/unittests/jlibtests.cpp @@ -3958,13 +3958,12 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture addAttr("@newAttr1", "newValue1"); addAttr("@newAttr2", "newValue2"); addAttr("@newAttr3", ""); - addAttr("@newAttr4", "dupValue1"); // Duplicate key - addAttr("@newAttr5", "newValue3"); - addAttr("@newAttr4", "dupValue2"); // Duplicate key - addAttr("@newAttr6", "newValue4"); - addAttr("@newAttr7", "newValue5"); - addAttr("@newAttr8", "newValue6"); - addAttr("@newAttr9", "newValue7"); + addAttr("@newAttr4", "newValue4"); + addAttr("@newAttr5", "newValue5"); + addAttr("@newAttr6", "newValue6"); + addAttr("@newAttr7", "newValue7"); + addAttr("@newAttr8", "newValue8"); + addAttr("@newAttr9", "newValue9"); addAttr("@newAttr10", ""); DBGLOG("Calling setAttribute with %u offsets", (unsigned)offsets.size()); // Call setAttribute with the vector (testing new attribute insertion) @@ -3976,12 +3975,12 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture expected1->setProp("@newAttr1", "newValue1"); expected1->setProp("@newAttr2", "newValue2"); expected1->setProp("@newAttr3", ""); - expected1->setProp("@newAttr4", "dupValue2"); - expected1->setProp("@newAttr5", "newValue3"); - expected1->setProp("@newAttr6", "newValue4"); - expected1->setProp("@newAttr7", "newValue5"); - expected1->setProp("@newAttr8", "newValue6"); - expected1->setProp("@newAttr9", "newValue7"); + expected1->setProp("@newAttr4", "newValue4"); + expected1->setProp("@newAttr5", "newValue5"); + expected1->setProp("@newAttr6", "newValue6"); + expected1->setProp("@newAttr7", "newValue7"); + expected1->setProp("@newAttr8", "newValue8"); + expected1->setProp("@newAttr9", "newValue9"); expected1->setProp("@newAttr10", ""); if (!areMatchingPTrees(expected1, tree)) logPTreeSideBySide("expected1", expected1, "tree", tree); From eee56dd8950b92bff8aa2be2b3bcb96fd7be18f1 Mon Sep 17 00:00:00 2001 From: Dave Streeter Date: Fri, 31 Oct 2025 16:53:26 +0000 Subject: [PATCH 5/5] Prevent duplicate calls to areMatchingPTrees Signed-off-by: Dave Streeter --- testing/unittests/jlibtests.cpp | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/testing/unittests/jlibtests.cpp b/testing/unittests/jlibtests.cpp index 06fdbb904d6..cba8e486669 100644 --- a/testing/unittests/jlibtests.cpp +++ b/testing/unittests/jlibtests.cpp @@ -3820,23 +3820,27 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture DBGLOG("=== createPTreeFromBinary() with custom nodeCreator completed ==="); // Validation - verify both deserialized trees are equivalent to the original, log out any differences - if (!areMatchingPTrees(originalTree, memoryBufferDeserialized)) + bool areTreesMatching = areMatchingPTrees(originalTree, memoryBufferDeserialized); + if (!areTreesMatching) logPTreeSideBySide("originalTree", originalTree, "memoryBufferDeserialized", memoryBufferDeserialized); - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, memoryBufferDeserialized)); + CPPUNIT_ASSERT(areTreesMatching); - if (!areMatchingPTrees(originalTree, streamDeserialized)) + areTreesMatching = areMatchingPTrees(originalTree, streamDeserialized); + if (!areTreesMatching) logPTreeSideBySide("originalTree", originalTree, "streamDeserialized", streamDeserialized); - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, streamDeserialized)); + CPPUNIT_ASSERT(areTreesMatching); - if (!areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithFlags)) + areTreesMatching = areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithFlags); + if (!areTreesMatching) logPTreeSideBySide("originalTree", originalTree, "deserializedCreatePTreeFromBinaryWithFlags", deserializedCreatePTreeFromBinaryWithFlags); - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithFlags)); + CPPUNIT_ASSERT(areTreesMatching); CPPUNIT_ASSERT(nodeCreator->wasCalled); // Verify nodeCreator was called - if (!areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator)) + areTreesMatching = areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator); + if (!areTreesMatching) logPTreeSideBySide("originalTree", originalTree, "deserializedCreatePTreeFromBinaryWithCreator", deserializedCreatePTreeFromBinaryWithCreator); - CPPUNIT_ASSERT(areMatchingPTrees(originalTree, deserializedCreatePTreeFromBinaryWithCreator)); + CPPUNIT_ASSERT(areTreesMatching); DBGLOG("=== ROUND-TRIP TEST FOR: %s ===", testName); @@ -3982,9 +3986,10 @@ class PTreeSerializationDeserializationTest : public CppUnit::TestFixture expected1->setProp("@newAttr8", "newValue8"); expected1->setProp("@newAttr9", "newValue9"); expected1->setProp("@newAttr10", ""); - if (!areMatchingPTrees(expected1, tree)) + bool areTreesMatching = areMatchingPTrees(expected1, tree); + if (!areTreesMatching) logPTreeSideBySide("expected1", expected1, "tree", tree); - CPPUNIT_ASSERT_MESSAGE("Test 1: Tree doesn't match expected after adding new attributes", areMatchingPTrees(expected1, tree)); + CPPUNIT_ASSERT_MESSAGE("Test 1: Tree doesn't match expected after adding new attributes", areTreesMatching); DBGLOG("=== Test: Adding new attributes passed ==="); @@ -4160,9 +4165,10 @@ class PTreeTimingTestBase : public CppUnit::TestFixture totalSerializeNs += serializeElapsedNs; // Validation - serialized data matches - if (!areMatchingPTrees(copyDeserializedTree, deserializedTree)) + bool areTreesMatching = areMatchingPTrees(copyDeserializedTree, deserializedTree); + if (!areTreesMatching) logPTreeSideBySide("copyDeserializedTree", copyDeserializedTree, "tree", deserializedTree); - CPPUNIT_ASSERT(areMatchingPTrees(copyDeserializedTree, deserializedTree)); + CPPUNIT_ASSERT(areTreesMatching); } TimingResults results;