diff --git a/changelog.md b/changelog.md index 08c4bd097d0f..6f60bf7b7f35 100644 --- a/changelog.md +++ b/changelog.md @@ -28,6 +28,7 @@ errors. to more efficiently calculate the symmetric difference of bitsets. - `strutils.multiReplace` overload for character set replacements in a single pass. Useful for string sanitation. Follows existing multiReplace semantics. +- `system.setLenUninit` for the `string` type. Allows setting length without initializing new memory on growth. [//]: # "Changes:" diff --git a/lib/system.nim b/lib/system.nim index 4a9d8cc0b87a..b105eb462067 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2287,6 +2287,27 @@ when notJSnotNims and hasAlloc: when not defined(nimV2): include "system/repr" +func setLenUninit*(s: var string, newlen: Natural) {.nodestroy.} = + ## Sets the length of string `s` to `newlen`. + ## New slots will not be initialized. + ## + ## If the new length is smaller than the new length, + ## `s` will be truncated. + let n = max(newLen, 0) + when nimvm: + s.setLen(n) + else: + when notJSnotNims: + when defined(nimSeqsV2): + {.noSideEffect.}: + let str = unsafeAddr s + setLengthStrV2Uninit(cast[ptr NimStringV2](str)[], newlen) + else: + {.noSideEffect.}: + setLengthStrUninit(s, newlen) + else: s.setLen(n) + + when notJSnotNims and hasThreadSupport and hostOS != "standalone": when not defined(nimPreviewSlimSystem): include "system/channels_builtin" diff --git a/lib/system/strs_v2.nim b/lib/system/strs_v2.nim index 95e76b1f8fae..b8d6acf11c64 100644 --- a/lib/system/strs_v2.nim +++ b/lib/system/strs_v2.nim @@ -166,6 +166,26 @@ proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} = s.p.data[newLen] = '\0' s.len = newLen +proc setLengthStrV2Uninit(s: var NimStringV2, newLen: int) = + if newLen == 0: + discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations" + else: + if isLiteral(s): + let oldP = s.p + s.p = allocPayload(newLen) + s.p.cap = newLen + if s.len > 0: + copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen)) + s.p.data[newLen] = '\0' + elif newLen > s.len: + let oldCap = s.p.cap and not strlitFlag + if newLen > oldCap: + let newCap = max(newLen, resize(oldCap)) + s.p = reallocPayload0(s.p, oldCap, newCap) + s.p.cap = newCap + s.p.data[newLen] = '\0' + s.len = newLen + proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} = if a.p == b.p and a.len == b.len: return if isLiteral(b): diff --git a/lib/system/sysstr.nim b/lib/system/sysstr.nim index b864da853188..364ff48dc2de 100644 --- a/lib/system/sysstr.nim +++ b/lib/system/sysstr.nim @@ -156,7 +156,6 @@ proc addChar(s: NimString, c: char): NimString = result = rawNewStringNoInit(r) result.len = s.len copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len+1) - result.reserved = r result.data[result.len] = c result.data[result.len+1] = '\0' inc(result.len) @@ -202,7 +201,6 @@ proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} = result = rawNewStringNoInit(sp) result.len = dest.len copyMem(addr result.data[0], unsafeAddr(dest.data[0]), dest.len+1) - result.reserved = sp #result = rawNewString(sp) #copyMem(result, dest, dest.len + sizeof(TGenericSeq)) # DO NOT UPDATE LEN YET: dest.len = newLen @@ -220,22 +218,42 @@ proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} = proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} = let n = max(newLen, 0) if s == nil: - if n == 0: - return s - else: - result = mnewString(n) + return if n == 0: s else: mnewString(n) elif n <= s.space: result = s else: let sp = max(resize(s.space), n) result = rawNewStringNoInit(sp) - result.len = s.len copyMem(addr result.data[0], unsafeAddr(s.data[0]), s.len) zeroMem(addr result.data[s.len], n - s.len) - result.reserved = sp result.len = n result.data[n] = '\0' +proc setLengthStrUninit(s: var string, newlen: Natural) {.nodestroy.} = + ## Sets the `s` length to `newlen` without zeroing memory on growth. + ## Terminating zero for cstring compatibility is set. + var str = cast[NimString](s.unsafeAddr) + let n = max(newLen, 0) + if str == nil: + if n == 0: return + else: + str = rawNewStringNoInit(n) + str.data[n] = '\0' + str.len = n + s = cast[string](str) + else: + if n > str.space: + let sp = max(resize(str.space), n) + str = rawNewStringNoInit(sp) + copyMem(addr str.data[0], unsafeAddr s[0], s.len) + str.data[n] = '\0' + str.len = n + s = cast[string](str) + elif n < s.len: + str.data[n] = '\0' + str.len = n + else: return + # ----------------- sequences ---------------------------------------------- proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} = diff --git a/tests/stdlib/tstring.nim b/tests/stdlib/tstring.nim index b9b3c78a3821..be7faa42a910 100644 --- a/tests/stdlib/tstring.nim +++ b/tests/stdlib/tstring.nim @@ -120,5 +120,36 @@ proc main() = doAssert c.len == 0 doAssert c.high == -1 + block: # setLen #setLenUninit + proc checkStrInternals(s: string; expectedLen: int) = + doAssert s.len == expectedLen + when nimvm: discard + else: + when defined(UncheckedArray): # skip JS + let cs = s.cstring # allows to get data address without IndexDefect + let arr = cast[ptr UncheckedArray[char]](unsafeAddr cs[0]) + doAssert arr[expectedLen] == '\0', "(no terminating zero)" + + const numbers = "1234567890" + block setLen: + var s = numbers + s.setLen(0) + s.checkStrInternals(expectedLen = 0) + doAssert s == "" + + block setLenUninit: + var s = numbers + s.setLenUninit(numbers.len) # noop + s.checkStrInternals(expectedLen = numbers.len) + doAssert s == numbers + + s.setLenUninit(5) # trim + s.checkStrInternals(expectedLen = 5) + doAssert s == "12345" + + s.setLenUninit(11) # growth + s.checkStrInternals(expectedLen = 11) + doAssert s[0..4] == numbers[0..4] + static: main() main()