Skip to content

Commit 1e2218a

Browse files
Allow concatenating small InlineStrings to return an InlineString (#59)
* Allow concatenating small InlineStrings to return an InlineString * More specific test for issue#2 * fixup! More specific test for issue#2 * Update src/InlineStrings.jl * fixup! Update src/InlineStrings.jl * Add more string concat tests * Add comments * fixup! Add more string concat tests * More micro-optimizations for tiny inline strings * fixup! More micro-optimizations for tiny inline strings
1 parent f26f964 commit 1e2218a

File tree

2 files changed

+63
-6
lines changed

2 files changed

+63
-6
lines changed

src/InlineStrings.jl

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ for sz in (1, 4, 8, 16, 32, 64, 128, 256)
7373
end
7474
end
7575

76+
const SmallInlineStrings = Union{String1, String3, String7, String15}
77+
7678
# used to zero out n lower bytes of an inline string
7779
clear_n_bytes(s, n) = Base.shl_int(Base.lshr_int(s, 8 * n), 8 * n)
7880
_bswap(x::T) where {T <: InlineString} = T === InlineString1 ? x : Base.bswap_int(x)
@@ -595,10 +597,11 @@ end
595597
end
596598

597599
const BaseStrs = Union{Char, String, SubString{String}}
598-
Base.string(a::InlineString) = String(a)
600+
Base.string(a::InlineString) = a
599601
Base.string(a::InlineString...) = _string(a...)
600602
Base.string(a::BaseStrs, b::InlineString) = _string(a, b)
601603
Base.string(a::BaseStrs, b::BaseStrs, c::InlineString) = _string(a, b, c)
604+
602605
@inline function _string(a::Union{BaseStrs, InlineString}...)
603606
n = 0
604607
for v in a
@@ -616,6 +619,40 @@ Base.string(a::BaseStrs, b::BaseStrs, c::InlineString) = _string(a, b, c)
616619
return out
617620
end
618621

622+
# For more and/or bigger InlineStrings creating a `Base.String` is faster
623+
const _SmallerInlineStrings = Union{InlineString1, InlineString3, InlineString7}
624+
Base.string(a::_SmallerInlineStrings, b::_SmallerInlineStrings, c::_SmallerInlineStrings) =
625+
_string(_string(a, b), c)
626+
const _SmallestInlineStrings = Union{InlineString1, InlineString3}
627+
Base.string(a::_SmallestInlineStrings, b::_SmallestInlineStrings, c::_SmallestInlineStrings, d::_SmallestInlineStrings) =
628+
_string(_string(_string(a, b), c), d)
629+
630+
# Only benefit from keeping the small-ish strings as InlineStrings
631+
function _string(a::Ta, b::Tb) where {Ta <: SmallInlineStrings, Tb <: SmallInlineStrings}
632+
T = summed_type(Ta, Tb)
633+
lb_a = Int(!isa(a, InlineString1)) # no "length byte" to remove if InlineString1
634+
lb_b = Int(!isa(b, InlineString1))
635+
len_a = sizeof(a)
636+
len_b = sizeof(b)
637+
# Remove length byte (lshr), grow to new size (zext), move chars forward (shl).
638+
a2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(a, 8*lb_a)), 8 * (sizeof(T) - sizeof(Ta) + lb_a))
639+
b2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(b, 8*lb_b)), 8 * (sizeof(T) - sizeof(Tb) + lb_b - len_a))
640+
lb = _oftype(T, len_a + len_b) # new length byte
641+
return Base.or_int(Base.or_int(a2, b2), lb)
642+
end
643+
644+
summed_type(::Type{InlineString1}, ::Type{InlineString1}) = InlineString3
645+
summed_type(::Type{InlineString3}, ::Type{InlineString1}) = InlineString7
646+
summed_type(::Type{InlineString3}, ::Type{InlineString3}) = InlineString7
647+
summed_type(::Type{InlineString7}, ::Type{InlineString1}) = InlineString15
648+
summed_type(::Type{InlineString7}, ::Type{InlineString3}) = InlineString15
649+
summed_type(::Type{InlineString7}, ::Type{InlineString7}) = InlineString15
650+
summed_type(::Type{InlineString15}, ::Type{InlineString1}) = InlineString31
651+
summed_type(::Type{InlineString15}, ::Type{InlineString3}) = InlineString31
652+
summed_type(::Type{InlineString15}, ::Type{InlineString7}) = InlineString31
653+
summed_type(::Type{InlineString15}, ::Type{InlineString15}) = InlineString31
654+
summed_type(a::Type{<:SmallInlineStrings}, b::Type{<:SmallInlineStrings}) = summed_type(b, a)
655+
619656
function Base.repeat(x::T, r::Integer) where {T <: InlineString}
620657
r < 0 && throw(ArgumentError("can't repeat a string $r times"))
621658
r == 0 && return ""
@@ -886,9 +923,6 @@ end
886923
## InlineString sorting
887924
using Base.Sort, Base.Order
888925

889-
# Only small-ish InlineStrings benefit from RadixSort algorithm
890-
const SmallInlineStrings = Union{String1, String3, String7, String15}
891-
892926
# And under certain thresholds, MergeSort is faster than RadixSort, even for small InlineStrings
893927
const MergeSortThresholds = Dict(
894928
1 => 2^5,
@@ -900,6 +934,7 @@ const MergeSortThresholds = Dict(
900934
struct InlineStringSortAlg <: Algorithm end
901935
const InlineStringSort = InlineStringSortAlg()
902936

937+
# Only small-ish InlineStrings benefit from RadixSort algorithm
903938
Base.Sort.defalg(::AbstractArray{<:Union{SmallInlineStrings, Missing}}) = InlineStringSort
904939

905940
struct Radix

test/runtests.jl

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ x = InlineString1(buf)
6969
@test InlineString(String1("a")) === String1("a")
7070

7171
# https://github.com/JuliaData/InlineStrings.jl/issues/2
72-
x = InlineString("hey")
73-
@test typeof(string(x)) == String
72+
@test eltype(string.(AbstractString[])) == AbstractString
7473

7574
# https://github.com/JuliaData/InlineStrings.jl/issues/8
7675
# construction from pointer
@@ -277,6 +276,7 @@ const INLINES = map(InlineString, STRINGS)
277276
@test Array{UInt8}(x) == Array{UInt8}(y)
278277
@test isascii(x) == isascii(y)
279278
@test x * x == y * y
279+
@test x * x * x == y * y * y
280280
@test x^5 == y^5
281281
@test string(x) == string(y)
282282
@test join([x, x]) == join([y, y])
@@ -349,6 +349,28 @@ const INLINES = map(InlineString, STRINGS)
349349
end
350350
end
351351

352+
@testset "`string` / `*`" begin
353+
# Check `string` overload handles `String1` being concat with other small InlineStrings,
354+
# because it is easy to mishandle `String1` as it doesn't have a length byte.
355+
a = "a"
356+
@test String1(a) * String1(a) == a * a
357+
@test String1(a) * String1(a) isa InlineString3
358+
b = "bb"
359+
@test String1(a) * String3(b) == a * b
360+
@test String1(a) * String3(b) isa InlineString7
361+
@test String1(a) * String7(b) == a * b
362+
@test String1(a) * String7(b) isa InlineString15
363+
@test String1(a) * String15(b) == a * b
364+
@test String1(a) * String15(b) isa InlineString31
365+
@test String1(a) * String3(b) * String7(b) == a * b * b
366+
@test String1(a) * String3(b) * String7(b) isa InlineString15
367+
# Check some other combination of small inline strings also work as expected
368+
@test String3(a) * String7(b) == a * b
369+
@test String3(a) * String7(b) isa InlineString15
370+
@test String3(a) * String3(b) * String7(b) == a * b * b
371+
@test String3(a) * String3(b) * String7(b) isa InlineString15
372+
end
373+
352374
@testset "InlineString parsing" begin
353375
testcases = [
354376
("", InlineString7(""), NamedTuple(), OK | EOF),

0 commit comments

Comments
 (0)