Skip to content

Commit b82d7e8

Browse files
authored
stdlib: substr uses copymem if available, improve docs (#24792)
- `system.substr` now uses `copymem` when available, introducing a small template for nimvm detection (#12517 #12518) - Docs are updated to clarify behaviour on out-of-bounds input - Runnable examples cover more edge cases and do not repeat between overloads - Docs now explain the difference between overloads What bothers me is that the `substr*(a: openArray[char]): string =` which was added by @beef331 is practically an implementation of #14810, which is just a conversion from `openArray` to `string` but somehow it ended up being a `substr` overload, even though its behaviour is totally different, _the "substringing" is performed by a previous step_ (conversion to openArray) and the bounds are not checked. I'm not sure it's that great for overloads to differ in subtle ways so much. What are the cases that `substr` covers now, that prohibit renaming it to `toString` (or something like that)?
1 parent ddd83f8 commit b82d7e8

File tree

2 files changed

+86
-32
lines changed

2 files changed

+86
-32
lines changed

changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,19 @@ errors.
2222
## Standard library additions and changes
2323

2424
[//]: # "Additions:"
25+
2526
- `setutils.symmetricDifference` along with its operator version
2627
`` setutils.`-+-` `` and in-place version `setutils.toggle` have been added
2728
to more efficiently calculate the symmetric difference of bitsets.
2829
- `strutils.multiReplace` overload for character set replacements in a single pass.
2930
Useful for string sanitation. Follows existing multiReplace semantics.
3031

3132
[//]: # "Changes:"
33+
3234
- `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type.
3335

36+
- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
37+
3438
## Language changes
3539

3640
- An experimental option `--experimental:typeBoundOps` has been added that

lib/system.nim

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2769,41 +2769,89 @@ template once*(body: untyped): untyped =
27692769
27702770
{.pop.} # warning[GcMem]: off, warning[Uninit]: off
27712771
2772-
proc substr*(s: openArray[char]): string =
2773-
## Copies a slice of `s` into a new string and returns this new
2774-
## string.
2772+
template NotJSnotVMnotNims(): static bool = # hack, see: #12517 #12518
2773+
when nimvm:
2774+
false
2775+
else:
2776+
notJSnotNims
2777+
2778+
proc substr*(a: openArray[char]): string =
2779+
## Returns a new string, copying contents of `a`.
2780+
##
2781+
## .. warning:: As opposed to other `substr` overloads, no additional input
2782+
## validation and clamping is performed!
2783+
##
2784+
## This proc does not prevent raising an `IndexDefect` when `a` is being
2785+
## passed using a `toOpenArray` call with out-of-bounds indexes:
2786+
## * `doAssertRaises(IndexDefect): discard "abc".toOpenArray(-9, 9).substr()`
2787+
##
2788+
## If clamping is required, consider using
2789+
## `substr(s: string; first, last: int) <#substr,string,int,int>`_:
2790+
## * `doAssert "abc".substr(-9, 9) == "abc"`
27752791
runnableExamples:
2776-
let a = "abcdefgh"
2777-
assert a.substr(2, 5) == "cdef"
2778-
assert a.substr(2) == "cdefgh"
2779-
assert a.substr(5, 99) == "fgh"
2780-
result = newString(s.len)
2781-
for i, ch in s:
2782-
result[i] = ch
2783-
2784-
proc substr*(s: string, first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
2785-
## Copies a slice of `s` into a new string and returns this new
2786-
## string.
2787-
##
2788-
## The bounds `first` and `last` denote the indices of
2789-
## the first and last characters that shall be copied. If `last`
2790-
## is omitted, it is treated as `high(s)`. If `last >= s.len`, `s.len`
2791-
## is used instead: This means `substr` can also be used to `cut`:idx:
2792-
## or `limit`:idx: a string's length.
2792+
let a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
2793+
assert a.substr() == "abcdefgh"
2794+
assert a.toOpenArray(2, 5).substr() == "cdef"
2795+
assert a.toOpenArray(2, high(a)).substr() == "cdefgh" # From index 2 to `high(a)`
2796+
doAssertRaises(IndexDefect): discard a.toOpenArray(5, 99).substr()
2797+
{.cast(noSideEffect).}:
2798+
result = newStringUninit(a.len)
2799+
when NotJSnotVMnotNims:
2800+
if a.len > 0:
2801+
copyMem(result[0].addr, a[0].unsafeAddr, a.len)
2802+
else:
2803+
for i, ch in a:
2804+
result[i] = ch
2805+
2806+
proc substr*(s: string; first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
2807+
## Returns a new string containing a substring (slice) of `s`,
2808+
## copying characters from index `first` to index `last` inclusive.
2809+
##
2810+
## Index values are validated and capped:
2811+
## - Negative `first` is clamped to 0
2812+
## - If `last >= s.len`, it is clamped to `high(s)`
2813+
## - If `last < first`, returns an empty string
2814+
## This means `substr` can also be used to `cut`:idx: or `limit`:idx:
2815+
## a string's length.
2816+
##
2817+
## .. note::
2818+
## If index values are ensured to be in-bounds, for performance
2819+
## critical cases consider using a non-clamping overload
2820+
## `substr(a: openArray[char]) <#substr,openArray[char]>`_
27932821
runnableExamples:
27942822
let a = "abcdefgh"
2795-
assert a.substr(2, 5) == "cdef"
2796-
assert a.substr(2) == "cdefgh"
2797-
assert a.substr(5, 99) == "fgh"
2798-
2799-
let first = max(first, 0)
2800-
let L = max(min(last, high(s)) - first + 1, 0)
2801-
result = newString(L)
2802-
for i in 0 .. L-1:
2803-
result[i] = s[i+first]
2823+
assert a.substr(2, 5) == "cdef" # Normal substring
2824+
# Invalid indexes
2825+
assert a.substr(5, 99) == "fgh" # From index 5 to `high(a)`
2826+
assert a.substr(42, 99) == "" # `first` out of bounds
2827+
assert a.substr(100, 5) == "" # `first > last`
2828+
assert a.substr(-1, 2) == "abc" # Negative `first` clamped to 0
2829+
let
2830+
first = max(first, 0)
2831+
last = min(last, high(s))
2832+
L = max(last - first + 1, 0)
2833+
{.cast(noSideEffect).}:
2834+
result = newStringUninit(L)
2835+
when NotJSnotVMnotNims:
2836+
if L > 0:
2837+
copyMem(result[0].addr, s[first].unsafeAddr, L)
2838+
else:
2839+
for i in 0..<L:
2840+
result[i] = s[i + first]
28042841

28052842
proc substr*(s: string, first = 0): string =
2806-
result = substr(s, first, high(s))
2843+
## Convenience `substr <#substr,string,int,int>`_ overload that returns
2844+
## a substring from `first` to the end of the string.
2845+
##
2846+
## `first` value is validated and capped:
2847+
## - `first >= s.len` returns an empty string
2848+
## - Negative `first` is clamped to 0.
2849+
runnableExamples:
2850+
let a = "abcdefgh"
2851+
assert a.substr(2) == "cdefgh" # From index 2 to string end (`high(a)`)
2852+
assert a.substr(100) == "" # `first` out of bounds
2853+
assert a.substr(-1) == "abcdefgh" # Negative `first` clamped to 0
2854+
substr(s, first, high(s))
28072855

28082856
when defined(nimconfig):
28092857
include "system/nimscript"
@@ -2818,8 +2866,10 @@ when not defined(js):
28182866
28192867
proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
28202868
magic: "Slice".}
2821-
## Allows passing the slice of `x` from the element at `first` to the element
2822-
## at `last` to `openArray[T]` parameters without copying it.
2869+
## Returns a non-owning slice (a `view`:idx:) of `x` from the element at
2870+
## index `first` to `last` inclusive. Allows passing slices without copying,
2871+
## as opposed to using the slice operator
2872+
## `\`[]\` <#[],openArray[T],HSlice[U: Ordinal,V: Ordinal]>`_.
28232873
##
28242874
## Example:
28252875
## ```nim

0 commit comments

Comments
 (0)