Skip to content

Commit a6fcc19

Browse files
committed
[GR-15869] Hash code improvement.
PullRequest: truffleruby/842
2 parents 33c2fe4 + 6f9bf85 commit a6fcc19

File tree

10 files changed

+173
-39
lines changed

10 files changed

+173
-39
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ Compatibility
1717
* `String#-@` now performs string deduplication (#1608).
1818
* `Hash#merge` now preserves the key order from the original hash for merged values (#1650).
1919

20+
Changes:
21+
22+
* Hash code calculation has been improved to reduce hash collisions for `Hash` and other cases.
23+
2024
# 1.0 RC 17
2125
# 19.0.0, May 2019
2226

spec/ruby/core/hash/hash_spec.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
{ 0=>2, 11=>1 }.hash.should == { 11=>1, 0=>2 }.hash
1212
end
1313

14+
it "returns a value in which element values do not cancel each other out" do
15+
{ a: 2, b: 2 }.hash.should_not == { a: 7, b: 7 }.hash
16+
end
17+
18+
it "returns a value in which element keys and values do not cancel each other out" do
19+
{ :a => :a }.hash.should_not == { :b => :b }.hash
20+
end
21+
1422
it "generates a hash for recursive hash structures" do
1523
h = {}
1624
h[:a] = h

src/main/java/org/truffleruby/core/VMPrimitiveNodes.java

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import org.truffleruby.language.control.ExitException;
6868
import org.truffleruby.language.control.RaiseException;
6969
import org.truffleruby.language.control.ThrowException;
70+
import org.truffleruby.language.dispatch.CallDispatchHeadNode;
7071
import org.truffleruby.language.methods.InternalMethod;
7172
import org.truffleruby.language.methods.LookupMethodNode;
7273
import org.truffleruby.language.methods.LookupMethodNodeGen;
@@ -444,4 +445,79 @@ public DynamicObject negativeCount(int count) {
444445

445446
}
446447

448+
@Primitive(name = "vm_hash_start", needsSelf = false)
449+
public abstract static class VMHashStart extends PrimitiveArrayArgumentsNode {
450+
451+
@Specialization
452+
public long startHash(long salt) {
453+
return getContext().getHashing(this).start(salt);
454+
}
455+
456+
@Specialization(guards = "isRubyBignum(salt)")
457+
public long startHashBigNum(DynamicObject salt) {
458+
return getContext().getHashing(this).start(Layouts.BIGNUM.getValue(salt).hashCode());
459+
}
460+
461+
@Specialization(guards = "!isRubyNumber(salt)")
462+
public Object startHashNotNumber(Object salt,
463+
@Cached("createPrivate()") CallDispatchHeadNode coerceToIntNode,
464+
@Cached("createBinaryProfile()") ConditionProfile isIntegerProfile,
465+
@Cached("createBinaryProfile()") ConditionProfile isLongProfile,
466+
@Cached("createBinaryProfile()") ConditionProfile isBignumProfile) {
467+
Object result = coerceToIntNode.call(coreLibrary().getTruffleTypeModule(), "coerce_to_int", salt);
468+
if (isIntegerProfile.profile(result instanceof Integer)) {
469+
return getContext().getHashing(this).start((int) result);
470+
} else if (isLongProfile.profile(result instanceof Long)) {
471+
return getContext().getHashing(this).start((long) result);
472+
} else if (isBignumProfile.profile(Layouts.BIGNUM.isBignum(result))) {
473+
return getContext().getHashing(this).start(Layouts.BIGNUM.getValue((DynamicObject) result).hashCode());
474+
} else {
475+
throw new UnsupportedOperationException();
476+
}
477+
478+
}
479+
}
480+
481+
@Primitive(name = "vm_hash_update", needsSelf = false)
482+
public abstract static class VMHashUpdate extends PrimitiveArrayArgumentsNode {
483+
484+
@Specialization
485+
public long updateHash(long hash, long value) {
486+
return Hashing.update(hash, value);
487+
}
488+
489+
@Specialization(guards = "isRubyBignum(value)")
490+
public long updateHash(long hash, DynamicObject value) {
491+
return Hashing.update(hash, Layouts.BIGNUM.getValue(value).hashCode());
492+
}
493+
494+
495+
@Specialization(guards = "!isRubyNumber(value)")
496+
public Object updateHash(long hash, Object value,
497+
@Cached("createPrivate()") CallDispatchHeadNode coerceToIntNode,
498+
@Cached("createBinaryProfile()") ConditionProfile isIntegerProfile,
499+
@Cached("createBinaryProfile()") ConditionProfile isLongProfile,
500+
@Cached("createBinaryProfile()") ConditionProfile isBignumProfile) {
501+
Object result = coerceToIntNode.call(coreLibrary().getTruffleTypeModule(), "coerce_to_int", value);
502+
if (isIntegerProfile.profile(result instanceof Integer)) {
503+
return Hashing.update(hash, (int) result);
504+
} else if (isLongProfile.profile(result instanceof Long)) {
505+
return Hashing.update(hash, (long) result);
506+
} else if (isBignumProfile.profile(Layouts.BIGNUM.isBignum(result))) {
507+
return Hashing.update(hash, Layouts.BIGNUM.getValue((DynamicObject) result).hashCode());
508+
} else {
509+
throw new UnsupportedOperationException();
510+
}
511+
512+
}
513+
}
514+
515+
@Primitive(name = "vm_hash_end", needsSelf = false)
516+
public abstract static class VMHashEnd extends PrimitiveArrayArgumentsNode {
517+
518+
@Specialization
519+
public long endHash(long hash) {
520+
return Hashing.end(hash);
521+
}
522+
}
447523
}

src/main/java/org/truffleruby/core/numeric/IntegerNodes.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1467,24 +1467,6 @@ public DynamicObject toS(DynamicObject value, int base) {
14671467

14681468
}
14691469

1470-
@Primitive(name = "integer_memhash")
1471-
public static abstract class IntegerMemhashNode extends PrimitiveArrayArgumentsNode {
1472-
1473-
private static final int CLASS_SALT = 94974697; // random number, stops hashes for similar
1474-
// values but different classes being the
1475-
// same, static because we want
1476-
// deterministic hashes
1477-
1478-
@Specialization
1479-
public long memhashLongLong(long a, long b) {
1480-
long h = getContext().getHashing(this).start(CLASS_SALT);
1481-
h = Hashing.update(h, a);
1482-
h = Hashing.update(h, b);
1483-
return Hashing.end(h);
1484-
}
1485-
1486-
}
1487-
14881470
@Primitive(name = "integer_fits_into_int")
14891471
public static abstract class FixnumFitsIntoIntNode extends PrimitiveArrayArgumentsNode {
14901472

src/main/ruby/core/complex.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,19 @@ def to_s
334334
result
335335
end
336336

337+
# Random number for hash codes. Stops hashes for similar values in
338+
# different classes from clashing, but defined as a constant so
339+
# that hashes will be deterministic.
340+
341+
CLASS_SALT = 0x37f7c8ee
342+
343+
private_constant :CLASS_SALT
344+
337345
def hash
338-
Truffle.invoke_primitive :integer_memhash, @real.hash, @imag.hash
346+
val = Truffle.invoke_primitive :vm_hash_start, CLASS_SALT
347+
val = Truffle.invoke_primitive :vm_hash_update, val, @real.hash
348+
val = Truffle.invoke_primitive :vm_hash_update, val, @imag.hash
349+
Truffle.invoke_primitive :vm_hash_end, val
339350
end
340351

341352
def inspect

src/main/ruby/core/hash.rb

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,16 +345,27 @@ def to_h
345345
end
346346
end
347347

348+
# Random number for hash codes. Stops hashes for similar values in
349+
# different classes from clashing, but defined as a constant so
350+
# that hashes will be deterministic.
351+
352+
CLASS_SALT = 0xC5F7D8
353+
354+
private_constant :CLASS_SALT
355+
348356
def hash
349-
val = size
357+
val = Truffle.invoke_primitive :vm_hash_start, CLASS_SALT
358+
val = Truffle.invoke_primitive :vm_hash_update, val, size
350359
Thread.detect_outermost_recursion self do
351360
each_pair do |key,value|
352-
val ^= key.hash
353-
val ^= value.hash
361+
entry_val = Truffle.invoke_primitive :vm_hash_start, key.hash
362+
entry_val = Truffle.invoke_primitive :vm_hash_update, entry_val, value.hash
363+
# We have to combine these with xor as the hash must not depend on hash order.
364+
val ^= Truffle.invoke_primitive :vm_hash_end, entry_val
354365
end
355366
end
356367

357-
val
368+
Truffle.invoke_primitive :vm_hash_end, val
358369
end
359370

360371
def delete_if(&block)

src/main/ruby/core/range.rb

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,20 @@ def first(n=undefined)
182182
super
183183
end
184184

185+
# Random number for hash codes. Stops hashes for similar values in
186+
# different classes from clashing, but defined as a constant so
187+
# that hashes will be deterministic.
188+
189+
CLASS_SALT = 0xb36df84d
190+
191+
private_constant :CLASS_SALT
192+
185193
def hash
186-
excl = exclude_end? ? 1 : 0
187-
hash = excl
188-
hash ^= self.begin.hash << 1
189-
hash ^= self.end.hash << 9
190-
hash ^= excl << 24;
191-
# Are we throwing away too much here for a good hash value distribution?
192-
hash & Truffle::Platform::LONG_MAX
194+
val = Truffle.invoke_primitive(:vm_hash_start, CLASS_SALT)
195+
val = Truffle.invoke_primitive(:vm_hash_update, val, exclude_end? ? 1 : 0)
196+
val = Truffle.invoke_primitive(:vm_hash_update, val, self.begin.hash)
197+
val = Truffle.invoke_primitive(:vm_hash_update, val, self.end.hash)
198+
Truffle.invoke_primitive(:vm_hash_end, val)
193199
end
194200

195201
def include?(value)

src/main/ruby/core/rational.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,19 @@ def floor(precision = 0)
206206
end
207207
end
208208

209+
# Random number for hash codes. Stops hashes for similar values in
210+
# different classes from clashing, but defined as a constant so
211+
# that hashes will be deterministic.
212+
213+
CLASS_SALT = 0x3c654ce9
214+
215+
private_constant :CLASS_SALT
216+
209217
def hash
210-
@numerator.hash ^ @denominator.hash
218+
val = Truffle.invoke_primitive :vm_hash_start, CLASS_SALT
219+
val = Truffle.invoke_primitive :vm_hash_update, val, @numerator.hash
220+
val = Truffle.invoke_primitive :vm_hash_update, val, @denominator.hash
221+
Truffle.invoke_primitive :vm_hash_end, val
211222
end
212223

213224
def inspect

src/main/ruby/core/struct.rb

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -266,12 +266,21 @@ def each_pair
266266
self
267267
end
268268

269+
# Random number for hash codes. Stops hashes for similar values in
270+
# different classes from clashing, but defined as a constant so
271+
# that hashes will be deterministic.
272+
273+
CLASS_SALT = 0xa1982d79
274+
275+
private_constant :CLASS_SALT
276+
269277
def hash
270-
hash_val = size
271-
return _attrs.size if Thread.detect_outermost_recursion self do
272-
_attrs.each { |var| hash_val ^= instance_variable_get(:"@#{var}").hash }
278+
val = Truffle.invoke_primitive(:vm_hash_start, CLASS_SALT)
279+
val = Truffle.invoke_primitive(:vm_hash_update, val, size)
280+
return val if Thread.detect_outermost_recursion self do
281+
_attrs.each { |var| Truffle.invoke_primitive(:vm_hash_update, val, instance_variable_get(:"@#{var}").hash) }
273282
end
274-
hash_val
283+
Truffle.invoke_primitive(:vm_hash_end, val)
275284
end
276285

277286
def length
@@ -355,20 +364,25 @@ def self._specialize(attrs)
355364
hashes << "#{vars[-1]}.hash"
356365
end
357366

367+
hash_calculation = hashes.map do |calc|
368+
"hash = Truffle.invoke_primitive :vm_hash_update, hash, #{calc}"
369+
end.join("\n")
370+
358371
code = <<-CODE
359372
def initialize(#{args.join(", ")})
360373
#{assigns.join(';')}
361374
self
362375
end
363376
364377
def hash
365-
hash = #{hashes.size}
378+
hash = Truffle.invoke_primitive :vm_hash_start, CLASS_SALT
379+
hash = Truffle.invoke_primitive :vm_hash_update, hash, #{hashes.size}
366380
367381
return hash if Thread.detect_outermost_recursion(self) do
368-
hash = hash ^ #{hashes.join(' ^ ')}
382+
#{hash_calculation}
369383
end
370384
371-
hash
385+
Truffle.invoke_primitive :vm_hash_end, hash
372386
end
373387
374388
def to_a

src/main/ruby/core/time.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,19 @@ def zone
112112
end
113113
end
114114

115+
# Random number for hash codes. Stops hashes for similar values in
116+
# different classes from clashing, but defined as a constant so
117+
# that hashes will be deterministic.
118+
119+
CLASS_SALT = 0xf39684d6
120+
121+
private_constant :CLASS_SALT
122+
115123
def hash
116-
tv_sec ^ tv_nsec
124+
val = Truffle.invoke_primitive :vm_hash_start, CLASS_SALT
125+
val = Truffle.invoke_primitive :vm_hash_update, val, tv_sec
126+
val = Truffle.invoke_primitive :vm_hash_update, val, tv_nsec
127+
Truffle.invoke_primitive :vm_hash_end, val
117128
end
118129

119130
def eql?(other)

0 commit comments

Comments
 (0)