Skip to content

Commit 1b0402b

Browse files
henrikedinMongoDB Bot
authored andcommitted
SERVER-97368 Enable TTL deletes for time-series collections containing extended range data (#29695)
GitOrigin-RevId: b600855
1 parent e5fbf48 commit 1b0402b

File tree

5 files changed

+443
-169
lines changed

5 files changed

+443
-169
lines changed

jstests/noPassthrough/timeseries/timeseries_extended_range_ttl.js

Lines changed: 0 additions & 69 deletions
This file was deleted.

jstests/noPassthrough/timeseries/timeseries_ttl.js

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,39 @@ testCase((coll, bucketsColl) => {
119119
assert.eq(0, bucketsColl.find().itcount());
120120
});
121121

122+
testCase((coll, bucketsColl) => {
123+
// Inserts measurements that fall into extended time range. Make sure TTL is able to make
124+
// progress in the presence of these dates. All inserts fall into separate buckets.
125+
126+
const nowTime = new Date();
127+
128+
assert.commandWorked(coll.insertMany([
129+
// This date is sorted at the end of the _id index and is eligible for TTL deletion
130+
{[timeFieldName]: new ISODate("1969-12-31T23:59:59"), [metaFieldName]: "localhost"},
131+
// This date is sorted at the beginning of the _id index but is NOT eligible for TTL
132+
// deletion
133+
{[timeFieldName]: new ISODate("2038-01-19T03:15:00"), [metaFieldName]: "localhost"},
134+
// This date is sorted at the end of the _id index but is NOT eligible for TTL deletion
135+
{[timeFieldName]: new ISODate("2106-02-07T06:29:00"), [metaFieldName]: "localhost"},
136+
// Insert a date 5 minutes prior to now, this is will not be deleted as the max bucket span
137+
// prevents it even if the bucket minimum is past the expiry.
138+
{
139+
[timeFieldName]: new Date(nowTime.getTime() - (1000 * 5 * 60)),
140+
[metaFieldName]: "localhost"
141+
},
142+
// Insert earlier than the bucket span. This is eligible for deletion.
143+
{
144+
[timeFieldName]:
145+
new Date(nowTime.getTime() - (1000 * defaultBucketMaxRange) - (1000 * 5 * 60)),
146+
[metaFieldName]: "localhost"
147+
}
148+
]));
149+
150+
TTLUtil.waitForPass(coll.getDB("test"));
151+
assert.eq(3, coll.find().itcount());
152+
assert.eq(3, bucketsColl.find().itcount());
153+
});
154+
122155
// Make a collection TTL using collMod. Ensure data expires correctly.
123156
(function newlyTTLWithCollMod() {
124157
const coll = testDB.getCollection('ts');

src/mongo/db/ttl/ttl.cpp

Lines changed: 121 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@
7272
#include "mongo/db/query/index_bounds.h"
7373
#include "mongo/db/query/internal_plans.h"
7474
#include "mongo/db/query/plan_yield_policy.h"
75-
#include "mongo/db/query/record_id_bound.h"
7675
#include "mongo/db/query/write_ops/insert.h"
7776
#include "mongo/db/record_id_helpers.h"
7877
#include "mongo/db/repl/member_state.h"
@@ -84,6 +83,7 @@
8483
#include "mongo/db/service_context.h"
8584
#include "mongo/db/shard_role.h"
8685
#include "mongo/db/stats/resource_consumption_metrics.h"
86+
#include "mongo/db/timeseries/timeseries_constants.h"
8787
#include "mongo/db/timeseries/timeseries_gen.h"
8888
#include "mongo/db/transaction_resources.h"
8989
#include "mongo/db/ttl/ttl_collection_cache.h"
@@ -159,6 +159,7 @@ std::unique_ptr<BatchedDeleteStageParams> getBatchedDeleteStageParams(bool batch
159159
// 'safe' handling for time-series collections.
160160
Date_t safeExpirationDate(OperationContext* opCtx,
161161
const CollectionPtr& coll,
162+
Date_t at,
162163
std::int64_t expireAfterSeconds) {
163164
if (auto timeseries = coll->getTimeseriesOptions()) {
164165
const auto bucketMaxSpan = Seconds(*timeseries->getBucketMaxSpanSeconds());
@@ -168,10 +169,10 @@ Date_t safeExpirationDate(OperationContext* opCtx,
168169
// time value of a bucket. A bucket may have newer data, so we cannot safely delete
169170
// the entire bucket yet until the maximum bucket range has passed, even if the
170171
// minimum value can be expired.
171-
return Date_t::now() - Seconds(expireAfterSeconds) - bucketMaxSpan;
172+
return at - Seconds(expireAfterSeconds) - bucketMaxSpan;
172173
}
173174

174-
return Date_t::now() - Seconds(expireAfterSeconds);
175+
return at - Seconds(expireAfterSeconds);
175176
}
176177

177178
// Computes and returns the start 'RecordIdBound' with the correct type for a bounded, clustered
@@ -328,7 +329,9 @@ void TTLMonitor::run() {
328329
try {
329330
const auto opCtxPtr = cc().makeOperationContext();
330331
writeConflictRetry(opCtxPtr.get(), "TTL pass", NamespaceString::kEmpty, [&] {
331-
_doTTLPass(opCtxPtr.get());
332+
hangTTLMonitorBetweenPasses.pauseWhileSet(opCtxPtr.get());
333+
334+
_doTTLPass(opCtxPtr.get(), Date_t::now());
332335
});
333336
} catch (const DBException& ex) {
334337
LOGV2_WARNING(22537,
@@ -350,16 +353,14 @@ void TTLMonitor::shutdown() {
350353
LOGV2(3684101, "Finished shutting down TTL collection monitor thread");
351354
}
352355

353-
void TTLMonitor::_doTTLPass(OperationContext* opCtx) {
356+
void TTLMonitor::_doTTLPass(OperationContext* opCtx, Date_t at) {
354357
// Don't do work if we are a secondary (TTL will be handled by primary)
355358
auto replCoordinator = repl::ReplicationCoordinator::get(opCtx);
356359
if (replCoordinator && replCoordinator->getSettings().isReplSet() &&
357360
!replCoordinator->getMemberState().primary()) {
358361
return;
359362
}
360363

361-
hangTTLMonitorBetweenPasses.pauseWhileSet(opCtx);
362-
363364
// Increment the metric after the TTL work has been finished.
364365
ON_BLOCK_EXIT([&] { ttlPasses.increment(); });
365366

@@ -369,11 +370,11 @@ void TTLMonitor::_doTTLPass(OperationContext* opCtx) {
369370
// indicates that it did not delete everything possible, we continue performing sub-passes.
370371
// This maintains the semantic that a full TTL pass deletes everything it possibly can
371372
// before sleeping periodically.
372-
moreToDelete = _doTTLSubPass(opCtx);
373+
moreToDelete = _doTTLSubPass(opCtx, at);
373374
}
374375
}
375376

376-
bool TTLMonitor::_doTTLSubPass(OperationContext* opCtx) {
377+
bool TTLMonitor::_doTTLSubPass(OperationContext* opCtx, Date_t at) {
377378
// If part of replSet but not in a readable state (e.g. during initial sync), skip.
378379
if (repl::ReplicationCoordinator::get(opCtx)->getSettings().isReplSet() &&
379380
!repl::ReplicationCoordinator::get(opCtx)->getMemberState().readable())
@@ -400,7 +401,7 @@ bool TTLMonitor::_doTTLSubPass(OperationContext* opCtx) {
400401
TTLCollectionCache::InfoMap moreWork;
401402
for (const auto& [uuid, infos] : work) {
402403
for (const auto& info : infos) {
403-
bool moreToDelete = _doTTLIndexDelete(opCtx, &ttlCollectionCache, uuid, info);
404+
bool moreToDelete = _doTTLIndexDelete(opCtx, at, &ttlCollectionCache, uuid, info);
404405
if (moreToDelete) {
405406
moreWork[uuid].push_back(info);
406407
}
@@ -417,6 +418,7 @@ bool TTLMonitor::_doTTLSubPass(OperationContext* opCtx) {
417418
}
418419

419420
bool TTLMonitor::_doTTLIndexDelete(OperationContext* opCtx,
421+
Date_t at,
420422
TTLCollectionCache* ttlCollectionCache,
421423
const UUID& uuid,
422424
const TTLCollectionCache::Info& info) {
@@ -478,16 +480,31 @@ bool TTLMonitor::_doTTLIndexDelete(OperationContext* opCtx,
478480
return false;
479481
}
480482

481-
if (collectionPtr->getRequiresTimeseriesExtendedRangeSupport()) {
482-
return false;
483-
}
484-
485483
ResourceConsumption::ScopedMetricsCollector scopedMetrics(opCtx, nss->dbName());
486484

487485
if (info.isClustered()) {
488-
return _deleteExpiredWithCollscan(opCtx, ttlCollectionCache, coll);
486+
const auto& collOptions = collectionPtr->getCollectionOptions();
487+
uassert(5400701,
488+
"collection is not clustered but is described as being TTL",
489+
collOptions.clusteredIndex);
490+
invariant(collectionPtr->isClustered());
491+
492+
auto expireAfterSeconds = collOptions.expireAfterSeconds;
493+
if (!expireAfterSeconds) {
494+
ttlCollectionCache->deregisterTTLClusteredIndex(coll.uuid());
495+
return false;
496+
}
497+
498+
if (collectionPtr->getRequiresTimeseriesExtendedRangeSupport()) {
499+
return _deleteExpiredWithCollscanForTimeseriesExtendedRange(
500+
opCtx, at, ttlCollectionCache, coll, *expireAfterSeconds);
501+
} else {
502+
return _deleteExpiredWithCollscan(
503+
opCtx, at, ttlCollectionCache, coll, *expireAfterSeconds);
504+
}
489505
} else {
490-
return _deleteExpiredWithIndex(opCtx, ttlCollectionCache, coll, info.getIndexName());
506+
return _deleteExpiredWithIndex(
507+
opCtx, at, ttlCollectionCache, coll, info.getIndexName());
491508
}
492509
} catch (const ExceptionForCat<ErrorCategory::StaleShardVersionError>& ex) {
493510
// The TTL index tried to delete some information from a sharded collection
@@ -548,6 +565,7 @@ bool TTLMonitor::_doTTLIndexDelete(OperationContext* opCtx,
548565
}
549566

550567
bool TTLMonitor::_deleteExpiredWithIndex(OperationContext* opCtx,
568+
Date_t at,
551569
TTLCollectionCache* ttlCollectionCache,
552570
const CollectionAcquisition& collection,
553571
std::string indexName) {
@@ -574,7 +592,7 @@ bool TTLMonitor::_deleteExpiredWithIndex(OperationContext* opCtx,
574592

575593
auto expireAfterSeconds = spec[IndexDescriptor::kExpireAfterSecondsFieldName].safeNumberLong();
576594
const Date_t kDawnOfTime = Date_t::fromMillisSinceEpoch(std::numeric_limits<long long>::min());
577-
const auto expirationDate = safeExpirationDate(opCtx, collectionPtr, expireAfterSeconds);
595+
const auto expirationDate = safeExpirationDate(opCtx, collectionPtr, at, expireAfterSeconds);
578596
const BSONObj startKey = BSON("" << kDawnOfTime);
579597
const BSONObj endKey = BSON("" << expirationDate);
580598

@@ -648,28 +666,91 @@ bool TTLMonitor::_deleteExpiredWithIndex(OperationContext* opCtx,
648666
}
649667

650668
bool TTLMonitor::_deleteExpiredWithCollscan(OperationContext* opCtx,
669+
Date_t at,
651670
TTLCollectionCache* ttlCollectionCache,
652-
const CollectionAcquisition& collection) {
653-
const auto& collectionPtr = collection.getCollectionPtr();
654-
const auto& collOptions = collectionPtr->getCollectionOptions();
655-
uassert(5400701,
656-
"collection is not clustered but is described as being TTL",
657-
collOptions.clusteredIndex);
658-
invariant(collectionPtr->isClustered());
659-
660-
auto expireAfterSeconds = collOptions.expireAfterSeconds;
661-
if (!expireAfterSeconds) {
662-
ttlCollectionCache->deregisterTTLClusteredIndex(collection.uuid());
663-
return false;
664-
}
665-
671+
const CollectionAcquisition& collection,
672+
int64_t expireAfterSeconds) {
666673
LOGV2_DEBUG(5400704, 1, "running TTL job for clustered collection", logAttrs(collection.nss()));
674+
const auto& collectionPtr = collection.getCollectionPtr();
667675

668676
const auto startId = makeCollScanStartBound(collectionPtr, Date_t{});
669677

670-
const auto expirationDate = safeExpirationDate(opCtx, collectionPtr, *expireAfterSeconds);
678+
const auto expirationDate = safeExpirationDate(opCtx, collectionPtr, at, expireAfterSeconds);
671679
const auto endId = makeCollScanEndBound(collectionPtr, expirationDate);
672680

681+
return _performDeleteExpiredWithCollscan(
682+
opCtx, collection, startId, endId, /*forward*/ true, /*filter*/ nullptr);
683+
}
684+
685+
bool TTLMonitor::_deleteExpiredWithCollscanForTimeseriesExtendedRange(
686+
OperationContext* opCtx,
687+
Date_t at,
688+
TTLCollectionCache* ttlCollectionCache,
689+
const CollectionAcquisition& collection,
690+
int64_t expireAfterSeconds) {
691+
// We cannot rely on the _id index for time-series data with extended time ranges. In theory
692+
// data eligible for deletion could be located anywhere in the collection. It would not be
693+
// performant to consider any bucket document. We instead run the deletion in two separate
694+
// batches: [epoch, at-expiry] and [2038, 2106]. The second range will include data prior to
695+
// the epoch unless they are too far from the epoch that cause then to be truncated into the
696+
// [at-expiry, 2038] range that we don't consider for deletion. This is an acceptible tradeoff
697+
// until we have a new _id format for time-series.
698+
LOGV2_DEBUG(9736801,
699+
1,
700+
"running TTL job for timeseries collection with extended range",
701+
logAttrs(collection.nss()));
702+
703+
const auto& collectionPtr = collection.getCollectionPtr();
704+
bool passTargetMet = false;
705+
706+
auto timeSeriesOptions = collectionPtr->getTimeseriesOptions();
707+
std::string timeField =
708+
timeseries::kControlMaxFieldNamePrefix.toString() + timeSeriesOptions->getTimeField();
709+
LTEMatchExpression filter(boost::optional<StringData>{timeField},
710+
Value{at - Seconds(expireAfterSeconds)});
711+
712+
// Delete from the beginning of the clustered _id index. In the typical case we consider
713+
// anything from the epoch to at-expiry eligible for deletion. We add a filter to ensure we
714+
// don't delete any data after 2038 that is not eligible for deletion.
715+
{
716+
const auto startId = makeCollScanStartBound(collectionPtr, Date_t{});
717+
const auto expirationDate =
718+
safeExpirationDate(opCtx, collectionPtr, at, expireAfterSeconds);
719+
const auto endId = makeCollScanEndBound(collectionPtr, expirationDate);
720+
721+
passTargetMet |= _performDeleteExpiredWithCollscan(
722+
opCtx, collection, startId, endId, /*forward*/ true, &filter);
723+
}
724+
725+
// Delete from the end of the clustered _id index. In the typical case nothing should be
726+
// deleted. But data prior to 1970 is sorted at the end and is eligible for deletion. We add a
727+
// filter to ensure we only delete such data. {
728+
{
729+
// 0x80000000 (in seconds) is the first value that no longer fits in a signed 32bit integer.
730+
// We subtract the bucket span to get the beginning of the range we should consider
731+
// deleting.
732+
const auto startId = makeCollScanStartBound(
733+
collectionPtr,
734+
Date_t::fromMillisSinceEpoch((static_cast<long long>(0x80000000) -
735+
*timeSeriesOptions->getBucketMaxSpanSeconds()) *
736+
1000));
737+
738+
const auto endId = makeCollScanEndBound(
739+
collectionPtr, Date_t::fromMillisSinceEpoch(static_cast<long long>(0xFFFFFFFF) * 1000));
740+
741+
passTargetMet |= _performDeleteExpiredWithCollscan(
742+
opCtx, collection, startId, endId, /*forward*/ false, &filter);
743+
}
744+
return passTargetMet;
745+
}
746+
747+
748+
bool TTLMonitor::_performDeleteExpiredWithCollscan(OperationContext* opCtx,
749+
const CollectionAcquisition& collection,
750+
const RecordIdBound& startBound,
751+
const RecordIdBound& endBound,
752+
bool forward,
753+
const MatchExpression* filter) {
673754
auto params = std::make_unique<DeleteStageParams>();
674755
params->isMulti = true;
675756

@@ -686,11 +767,12 @@ bool TTLMonitor::_deleteExpiredWithCollscan(OperationContext* opCtx,
686767
collection,
687768
std::move(params),
688769
PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
689-
InternalPlanner::Direction::FORWARD,
690-
startId,
691-
endId,
770+
forward ? InternalPlanner::Direction::FORWARD : InternalPlanner::Direction::BACKWARD,
771+
startBound,
772+
endBound,
692773
CollectionScanParams::ScanBoundInclusion::kIncludeBothStartAndEndRecords,
693-
getBatchedDeleteStageParams(batchingEnabled));
774+
getBatchedDeleteStageParams(batchingEnabled),
775+
filter);
694776

695777
try {
696778
const auto numDeleted = exec->executeDelete();
@@ -706,7 +788,9 @@ bool TTLMonitor::_deleteExpiredWithCollscan(OperationContext* opCtx,
706788
"Deleted expired documents using clustered index scan",
707789
logAttrs(collection.nss()),
708790
"numDeleted"_attr = numDeleted,
709-
"duration"_attr = duration);
791+
"duration"_attr = duration,
792+
"extendedRange"_attr =
793+
collection.getCollectionPtr()->getRequiresTimeseriesExtendedRangeSupport());
710794
}
711795
if (batchingEnabled) {
712796
auto batchedDeleteStats = exec->getBatchedDeleteStats();

0 commit comments

Comments
 (0)