Skip to content

Commit e68b579

Browse files
chrisseatonLillian Zhang
authored andcommitted
Improvements to Array#sample
Co-authored with Lillian
1 parent 08187f1 commit e68b579

File tree

1 file changed

+85
-47
lines changed

1 file changed

+85
-47
lines changed

src/main/ruby/truffleruby/core/array.rb

Lines changed: 85 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,7 @@ def sample(count=undefined, options=undefined)
975975
end
976976

977977
if count and count < 0
978-
raise ArgumentError, 'count must be greater than 0'
978+
raise ArgumentError, 'count must be >= 0'
979979
end
980980

981981
rng = options[:random] if options
@@ -996,9 +996,9 @@ def sample(count=undefined, options=undefined)
996996
return [at(rng.rand(size))]
997997
when 2
998998
i = rng.rand(size)
999-
j = rng.rand(size)
1000-
if i == j
1001-
j = i == 0 ? i + 1 : i - 1
999+
j = rng.rand(size - 1)
1000+
if j >= i
1001+
j += 1
10021002
end
10031003
return [at(i), at(j)]
10041004
else
@@ -1007,69 +1007,107 @@ def sample(count=undefined, options=undefined)
10071007
end
10081008

10091009
def sample_many(count, rng)
1010-
if size / count > 3
1011-
abandon = false
1012-
1013-
result = Array.new count
1014-
i = 1
1010+
if count <= 70 # three implementations; choice determined experimentally
1011+
if 2.0 * size / count <= count + 13
1012+
sample_many_swap(count, rng)
1013+
else
1014+
sample_many_quad(count, rng)
1015+
end
1016+
else
1017+
if size <= -1100.0 + 59.5 * count
1018+
sample_many_swap(count, rng)
1019+
else
1020+
sample_many_hash(count,rng)
1021+
end
1022+
end
1023+
end
1024+
private :sample_many
10151025

1016-
result[0] = rng.rand(size)
1017-
while i < count
1018-
k = rng.rand(size)
1026+
def sample_many_swap(count, rng)
1027+
#linear dependence on array size, therefore very slow for small count / size
1028+
result = Array.new(self)
10191029

1020-
spin = false
1021-
spin_count = 0
1030+
count.times do |c|
1031+
result.__send__ :swap, c, rng.rand(size)
1032+
end
1033+
end
10221034

1023-
while true # rubocop:disable Lint/LiteralAsCondition
1024-
j = 0
1025-
while j < i
1026-
if k == result[j]
1027-
spin = true
1028-
break
1029-
end
1035+
def sample_many_quad(count, rng)
1036+
#quadratic time due to linear time collision check but low overhead
1037+
result = Array.new count
1038+
i = 1
10301039

1031-
j += 1
1032-
end
1040+
result[0] = rng.rand(size)
10331041

1034-
if spin
1035-
if (spin_count += 1) > 100
1036-
abandon = true
1037-
break
1038-
end
1042+
while i < count
1043+
k = rng.rand(size)
1044+
spin = false
10391045

1040-
k = rng.rand(size)
1041-
else
1046+
while true # rubocop:disable Lint/LiteralAsCondition
1047+
j = 0
1048+
while j < i
1049+
if k == result[j]
1050+
spin = true
10421051
break
10431052
end
1053+
1054+
j += 1
10441055
end
10451056

1046-
break if abandon
1057+
if spin
1058+
k = rng.rand(size)
1059+
spin = false
1060+
else
1061+
break
1062+
end
1063+
end
10471064

1048-
result[i] = k
1065+
result[i] = k
1066+
i += 1
1067+
end
10491068

1050-
i += 1
1051-
end
1069+
i = 0
1070+
while i < count
1071+
result[i] = at result[i]
1072+
i += 1
1073+
end
10521074

1053-
unless abandon
1054-
i = 0
1055-
while i < count
1056-
result[i] = at result[i]
1057-
i += 1
1058-
end
1075+
result
1076+
end
1077+
1078+
def sample_many_hash(count, rng)
1079+
# use hash for constant time collision check but higher overhead
1080+
result = Array.new count
1081+
i = 1
1082+
1083+
result[0] = rng.rand(size)
1084+
result_set = { result[0] => 0 }
10591085

1060-
return result
1086+
while i < count
1087+
k = rng.rand(size)
1088+
1089+
while true # rubocop:disable Lint/LiteralAsCondition
1090+
if result_set.include?(k)
1091+
k = rng.rand(size)
1092+
else
1093+
break
1094+
end
10611095
end
1062-
end
10631096

1064-
result = Array.new(self)
1097+
result[i] = k
1098+
result_set[i] = k
10651099

1066-
count.times do |c|
1067-
result.__send__ :swap, c, rng.rand(size)
1100+
i += 1
10681101
end
10691102

1070-
count == size ? result : result[0, count]
1103+
i = 0
1104+
while i < count
1105+
result[i] = at result[i]
1106+
i += 1
1107+
end
1108+
1109+
result
10711110
end
1072-
private :sample_many
10731111

10741112
def select!(&block)
10751113
return to_enum(:select!) { size } unless block_given?

0 commit comments

Comments
 (0)