From 7d1dc6094e82cbc96a8d38979f3b8402d056c656 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 7 Jul 2025 15:11:03 +0300 Subject: [PATCH] Fix Arrow arena problem --- yql/essentials/minikql/mkql_alloc.cpp | 72 +++++++++++++++++---------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/yql/essentials/minikql/mkql_alloc.cpp b/yql/essentials/minikql/mkql_alloc.cpp index f0f9c07d2eb4..996860cbdd05 100644 --- a/yql/essentials/minikql/mkql_alloc.cpp +++ b/yql/essentials/minikql/mkql_alloc.cpp @@ -7,9 +7,9 @@ #include -namespace NKikimr { +namespace NKikimr::NMiniKQL { -namespace NMiniKQL { +static ui8 ZeroSizeObject alignas(ArrowAlignment)[0]; constexpr ui64 ArrowSizeForArena = (TAllocState::POOL_PAGE_SIZE >> 2); @@ -268,7 +268,14 @@ void TPagedArena::Clear() noexcept { } } +namespace { + void* MKQLArrowAllocateOnArena(ui64 size) { + Y_ENSURE(size); + // If size is zero we can get in trouble: when `page->Offset == page->Size`. + // The zero size leads to return `ptr` just after the current page. + // Then getting start of page for such pointer returns next page - which may be unmapped or unrelevant to `ptr`. + TAllocState* state = TlsAllocState; Y_ENSURE(state); @@ -287,8 +294,8 @@ void* MKQLArrowAllocateOnArena(ui64 size) { page = (TMkqlArrowHeader*)GetAlignedPage(); NYql::NUdf::SanitizerMakeRegionAccessible(page, sizeof(TMkqlArrowHeader)); - page->Offset = sizeof(TMkqlArrowHeader); - page->Size = pageSize; + page->Offset = 0; + page->Size = pageSize - sizeof(TMkqlArrowHeader); // for consistency with CleanupArrowList() page->UseCount = 1; if (state->EnableArrowTracking) { @@ -299,14 +306,20 @@ void* MKQLArrowAllocateOnArena(ui64 size) { } } - void* ptr = (ui8*)page + page->Offset; + void* ptr = (ui8*)page + page->Offset + sizeof(TMkqlArrowHeader); page->Offset += alignedSize; ++page->UseCount; + + Y_DEBUG_ABORT_UNLESS(TAllocState::GetPageStart(ptr) == page); + return ptr; } -namespace { void* MKQLArrowAllocateImpl(ui64 size) { + if (Y_UNLIKELY(size == 0)) { + return reinterpret_cast(ZeroSizeObject); + } + if (Y_LIKELY(!TAllocState::IsDefaultAllocatorUsed())) { if (size <= ArrowSizeForArena) { return MKQLArrowAllocateOnArena(size); @@ -345,20 +358,6 @@ void* MKQLArrowAllocateImpl(ui64 size) { header->Size = size; return header + 1; } -} // namespace - -void* MKQLArrowAllocate(ui64 size) { - auto sizeWithRedzones = NYql::NUdf::GetSizeToAlloc(size); - void* mem = MKQLArrowAllocateImpl(sizeWithRedzones); - return NYql::NUdf::WrapPointerWithRedZones(mem, sizeWithRedzones); -} - -void* MKQLArrowReallocate(const void* mem, ui64 prevSize, ui64 size) { - auto res = MKQLArrowAllocate(size); - memcpy(res, mem, Min(prevSize, size)); - MKQLArrowFree(mem, prevSize); - return res; -} void MKQLArrowFreeOnArena(const void* ptr) { auto* page = (TMkqlArrowHeader*)TAllocState::GetPageStart(ptr); @@ -366,7 +365,7 @@ void MKQLArrowFreeOnArena(const void* ptr) { if (!page->Entry.IsUnlinked()) { TAllocState* state = TlsAllocState; Y_ENSURE(state); - state->OffloadFree(page->Size); + state->OffloadFree(page->Size + sizeof(TMkqlArrowHeader)); page->Entry.Unlink(); auto it = state->ArrowBuffers.find(page); @@ -380,8 +379,12 @@ void MKQLArrowFreeOnArena(const void* ptr) { return; } -namespace { void MKQLArrowFreeImpl(const void* mem, ui64 size) { + if (Y_UNLIKELY(mem == reinterpret_cast(ZeroSizeObject))) { + Y_DEBUG_ABORT_UNLESS(size == 0); + return; + } + if (Y_LIKELY(!TAllocState::IsDefaultAllocatorUsed())) { if (size <= ArrowSizeForArena) { return MKQLArrowFreeOnArena(mem); @@ -409,8 +412,22 @@ void MKQLArrowFreeImpl(const void* mem, ui64 size) { ReleaseAlignedPage(header, fullSize); } + } // namespace +void* MKQLArrowAllocate(ui64 size) { + auto sizeWithRedzones = NYql::NUdf::GetSizeToAlloc(size); + void* mem = MKQLArrowAllocateImpl(sizeWithRedzones); + return NYql::NUdf::WrapPointerWithRedZones(mem, sizeWithRedzones); +} + +void* MKQLArrowReallocate(const void* mem, ui64 prevSize, ui64 size) { + auto res = MKQLArrowAllocate(size); + memcpy(res, mem, Min(prevSize, size)); + MKQLArrowFree(mem, prevSize); + return res; +} + void MKQLArrowFree(const void* mem, ui64 size) { mem = NYql::NUdf::UnwrapPointerWithRedZones(mem, size); auto sizeWithRedzones = NYql::NUdf::GetSizeToAlloc(size); @@ -418,6 +435,11 @@ void MKQLArrowFree(const void* mem, ui64 size) { } void MKQLArrowUntrack(const void* mem, ui64 size) { + if (Y_UNLIKELY(mem == reinterpret_cast(ZeroSizeObject))) { + Y_DEBUG_ABORT_UNLESS(size == 0); + return; + } + mem = NYql::NUdf::GetOriginalAllocatedObject(mem, size); TAllocState* state = TlsAllocState; Y_ENSURE(state); @@ -437,7 +459,7 @@ void MKQLArrowUntrack(const void* mem, ui64 size) { if (!page->Entry.IsUnlinked()) { page->Entry.Unlink(); // unlink page immediately so we don't accidentally free untracked memory within `TAllocState` state->ArrowBuffers.erase(it); - state->OffloadFree(page->Size); + state->OffloadFree(page->Size + sizeof(TMkqlArrowHeader)); } return; @@ -459,6 +481,4 @@ void MKQLArrowUntrack(const void* mem, ui64 size) { } } -} // NMiniKQL - -} // NKikimr +} // namespace NKikimr::NMiniKQL