Skip to content

Commit f0aec30

Browse files
committed
Merge pull request #17 from tesseract2048/master
Optimize MurmurHash, benchmarked 33% faster
2 parents 6549226 + 770f0cf commit f0aec30

File tree

1 file changed

+16
-71
lines changed

1 file changed

+16
-71
lines changed

fnlp-core/src/main/java/org/fnlp/util/hash/MurmurHash.java

Lines changed: 16 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,7 @@
1919

2020
package org.fnlp.util.hash;
2121

22-
import java.io.UnsupportedEncodingException;
23-
24-
25-
/**
22+
/**
2623
*
2724
* murmur hash 2.0.
2825
*
@@ -44,8 +41,21 @@ public final class MurmurHash extends AbstractHashCode {
4441
// all methods static; private constructor.
4542
public MurmurHash() {}
4643

44+
protected byte[] toBytesWithoutEncoding(String str) {
45+
int len = str.length();
46+
int pos = 0;
47+
byte[] buf = new byte[len << 1];
48+
for (int i = 0; i < len; i++) {
49+
char c = str.charAt(i);
50+
buf[pos++] = (byte) (c & 0xFF);
51+
buf[pos++] = (byte) (c >> 8);
52+
}
53+
return buf;
54+
}
55+
4756
public int hashcode(String str) {
48-
return hash32(str);
57+
byte[] bytes = toBytesWithoutEncoding(str);
58+
return hash32(bytes, bytes.length);
4959
}
5060

5161
/**
@@ -106,31 +116,6 @@ public int hash32( final byte[] data, int length) {
106116
}
107117

108118

109-
/**
110-
* Generates 32 bit hash from a string.
111-
*
112-
* @param text string to hash
113-
* @return 32 bit hash of the given string
114-
*/
115-
public int hash32( final String text) {
116-
final byte[] bytes = text.getBytes();
117-
return hash32( bytes, bytes.length);
118-
}
119-
120-
121-
/**
122-
* Generates 32 bit hash from a substring.
123-
*
124-
* @param text string to hash
125-
* @param from starting index
126-
* @param length length of the substring to hash
127-
* @return 32 bit hash of the given string
128-
*/
129-
public int hash32( final String text, int from, int length) {
130-
return hash32( text.substring( from, from+length));
131-
}
132-
133-
134119
/**
135120
* Generates 64 bit hash from byte array of the given length and seed.
136121
*
@@ -192,44 +177,4 @@ public long hash64( final byte[] data, int length) {
192177
return hash64( data, length, 0xe17a1465);
193178
}
194179

195-
196-
/**
197-
* Generates 64 bit hash from a string.
198-
*
199-
* @param text string to hash
200-
* @return 64 bit hash of the given string
201-
*/
202-
public long hash64( final String text) {
203-
204-
final byte[] bytes;
205-
try {
206-
bytes = text.getBytes("utf-8");
207-
208-
return hash32( bytes, bytes.length);
209-
} catch (UnsupportedEncodingException e) {
210-
throw new RuntimeException("utf-8 encoding should be available on system.", e);
211-
}
212-
}
213-
214-
215-
216-
/**
217-
* Generates 64 bit hash from a substring.
218-
*
219-
* @param text string to hash
220-
* @param from starting index
221-
* @param length length of the substring to hash
222-
* @return 64 bit hash of the given array
223-
*/
224-
public long hash64( final String text, int from, int length) {
225-
226-
final byte[] bytes;
227-
try {
228-
bytes = text.getBytes("utf-8");
229-
230-
return hash64( bytes, bytes.length);
231-
} catch (UnsupportedEncodingException e) {
232-
throw new RuntimeException("utf-8 encoding should be available on system.", e);
233-
}
234-
}
235-
}
180+
}

0 commit comments

Comments
 (0)