Skip to content

Commit c8bbcaf

Browse files
missa-primeJatin Bhateja
authored andcommitted
8348638: Performance regression in Math.tanh
Reviewed-by: jbhateja, epeter, sviswanathan
1 parent 84f570c commit c8bbcaf

File tree

2 files changed

+171
-18
lines changed

2 files changed

+171
-18
lines changed

src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, Intel Corporation. All rights reserved.
2+
* Copyright (c) 2024, 2025, Intel Corporation. All rights reserved.
33
* Intel Math Library (LIBM) Source Code
44
*
55
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -46,7 +46,7 @@
4646
// for |x| in [23/64,3*2^7)
4747
// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p)
4848
//
49-
// For |x| in [2^{-4},2^5):
49+
// For |x| in [2^{-4},22):
5050
// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5
5151
// Let R=1/(1+T0+p*T0), truncated to 35 significant bits
5252
// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33}
@@ -66,11 +66,11 @@
6666
//
6767
// For |x|<2^{-64}: x is returned
6868
//
69-
// For |x|>=2^32: return +/-1
69+
// For |x|>=22: return +/-1
7070
//
7171
// Special cases:
7272
// tanh(NaN) = quiet NaN, and raise invalid exception
73-
// tanh(INF) = that INF
73+
// tanh(+/-INF) = +/-1
7474
// tanh(+/-0) = +/-0
7575
//
7676
/******************************************************************************/
@@ -324,23 +324,25 @@ address StubGenerator::generate_libmTanh() {
324324
__ enter(); // required for proper stackwalking of RuntimeStub frame
325325

326326
__ bind(B1_2);
327+
__ pextrw(rcx, xmm0, 3);
328+
__ movl(rdx, 32768);
329+
__ andl(rdx, rcx);
330+
__ andl(rcx, 32767);
331+
__ cmpl(rcx, 16438);
332+
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); // Branch only if |x| >= 22
327333
__ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/);
328334
__ xorpd(xmm4, xmm4);
329335
__ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/);
330336
__ movsd(xmm2, ExternalAddress(L2E + 8), r11 /*rscratch*/);
331337
__ movl(rax, 32768);
332338
__ pinsrw(xmm4, rax, 3);
333339
__ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/);
334-
__ pextrw(rcx, xmm0, 3);
335340
__ andpd(xmm3, xmm0);
336341
__ andnpd(xmm4, xmm0);
337342
__ pshufd(xmm5, xmm4, 68);
338-
__ movl(rdx, 32768);
339-
__ andl(rdx, rcx);
340-
__ andl(rcx, 32767);
341343
__ subl(rcx, 16304);
342-
__ cmpl(rcx, 144);
343-
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1);
344+
__ cmpl(rcx, 134);
345+
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); // Branch only if |x| is not in [2^{-4},22)
344346
__ subsd(xmm4, xmm3);
345347
__ mulsd(xmm3, xmm1);
346348
__ mulsd(xmm2, xmm5);
@@ -427,8 +429,8 @@ address StubGenerator::generate_libmTanh() {
427429

428430
__ bind(L_2TAG_PACKET_0_0_1);
429431
__ addl(rcx, 960);
430-
__ cmpl(rcx, 1104);
431-
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1);
432+
__ cmpl(rcx, 1094);
433+
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); // Branch only if |x| not in [2^{-64}, 2^{-4})
432434
__ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/);
433435
__ pshufd(xmm1, xmm0, 68);
434436
__ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/);
@@ -449,11 +451,8 @@ address StubGenerator::generate_libmTanh() {
449451
__ jmp(B1_4);
450452

451453
__ bind(L_2TAG_PACKET_1_0_1);
452-
__ addl(rcx, 15344);
453-
__ cmpl(rcx, 16448);
454-
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1);
455454
__ cmpl(rcx, 16);
456-
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
455+
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); // Branch only if |x| is denormalized
457456
__ xorpd(xmm2, xmm2);
458457
__ movl(rax, 17392);
459458
__ pinsrw(xmm2, rax, 3);
@@ -468,7 +467,7 @@ address StubGenerator::generate_libmTanh() {
468467

469468
__ bind(L_2TAG_PACKET_2_0_1);
470469
__ cmpl(rcx, 32752);
471-
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1);
470+
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); // Branch only if |x| is INF or NaN
472471
__ xorpd(xmm2, xmm2);
473472
__ movl(rcx, 15344);
474473
__ pinsrw(xmm2, rcx, 3);
@@ -489,7 +488,7 @@ address StubGenerator::generate_libmTanh() {
489488
__ movdl(rcx, xmm2);
490489
__ orl(rcx, rax);
491490
__ cmpl(rcx, 0);
492-
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1);
491+
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); // Branch only if |x| is not NaN
493492
__ addsd(xmm0, xmm0);
494493

495494
__ bind(B1_4);
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
package org.openjdk.bench.java.lang;
24+
25+
import java.util.concurrent.TimeUnit;
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Measurement;
30+
import org.openjdk.jmh.annotations.Mode;
31+
import org.openjdk.jmh.annotations.Param;
32+
import org.openjdk.jmh.annotations.Scope;
33+
import org.openjdk.jmh.annotations.Setup;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Level;
36+
import org.openjdk.jmh.annotations.Warmup;
37+
import org.openjdk.jmh.annotations.OperationsPerInvocation;
38+
import org.openjdk.jmh.annotations.OutputTimeUnit;
39+
import org.openjdk.jmh.runner.Runner;
40+
import org.openjdk.jmh.runner.RunnerException;
41+
import org.openjdk.jmh.runner.options.Options;
42+
import org.openjdk.jmh.runner.options.OptionsBuilder;
43+
44+
import java.util.Random;
45+
46+
public class TanhPerf {
47+
48+
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.MILLISECONDS)
49+
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.MILLISECONDS)
50+
@Fork(2)
51+
@BenchmarkMode(Mode.Throughput)
52+
@State(Scope.Thread)
53+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
54+
public static class TanhPerfRanges {
55+
public static int tanhInputCount = 2048;
56+
57+
@Param({"0", "1", "2", "3"})
58+
public int tanhRangeIndex;
59+
60+
public double [] tanhPosRandInputs;
61+
public double [] tanhNegRandInputs;
62+
public int tanhInputIndex = 0;
63+
public double tanhRangeInputs[][] = {{0.0, 0x1.0P-55}, {0x1.0P-55, 1.0}, {1.0, 22.0}, {22.1, 1.7976931348623157E308} };
64+
65+
@Setup
66+
public void setupValues() {
67+
Random random = new Random(1023);
68+
69+
// Fill the positive and negative tanh vectors with random values
70+
tanhPosRandInputs = new double[tanhInputCount];
71+
tanhNegRandInputs = new double[tanhInputCount];
72+
73+
for (int i = 0; i < tanhInputCount; i++) {
74+
double tanhLowerBound = tanhRangeInputs[tanhRangeIndex][0];
75+
double tanhUpperBound = tanhRangeInputs[tanhRangeIndex][1];
76+
tanhPosRandInputs[i] = random.nextDouble(tanhLowerBound, tanhUpperBound);
77+
tanhNegRandInputs[i] = random.nextDouble(-tanhUpperBound, -tanhLowerBound);
78+
}
79+
}
80+
81+
@Benchmark
82+
@OperationsPerInvocation(2048)
83+
public double tanhPosRangeDouble() {
84+
double res = 0.0;
85+
for (int i = 0; i < tanhInputCount; i++) {
86+
res += Math.tanh(tanhPosRandInputs[i]);
87+
}
88+
return res;
89+
}
90+
91+
@Benchmark
92+
@OperationsPerInvocation(2048)
93+
public double tanhNegRangeDouble() {
94+
double res = 0.0;
95+
for (int i = 0; i < tanhInputCount; i++) {
96+
res += Math.tanh(tanhNegRandInputs[i]);
97+
}
98+
return res;
99+
}
100+
}
101+
102+
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS)
103+
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.SECONDS)
104+
@Fork(2)
105+
@BenchmarkMode(Mode.Throughput)
106+
@State(Scope.Thread)
107+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
108+
public static class TanhPerfConstant {
109+
public static final double constDoubleTiny = 0x1.0P-57;
110+
public static final double constDoubleSmall = 0x1.0P-54;
111+
public static final double constDouble1 = 1.0;
112+
public static final double constDouble21 = 21.0;
113+
public static final double constDoubleLarge = 23.0;
114+
115+
@Benchmark
116+
public double tanhConstDoubleTiny() {
117+
return Math.tanh(constDoubleTiny);
118+
}
119+
120+
@Benchmark
121+
public double tanhConstDoubleSmall() {
122+
return Math.tanh(constDoubleSmall);
123+
}
124+
125+
@Benchmark
126+
public double tanhConstDouble1() {
127+
return Math.tanh(constDouble1);
128+
}
129+
130+
@Benchmark
131+
public double tanhConstDouble21() {
132+
return Math.tanh(constDouble21);
133+
}
134+
135+
@Benchmark
136+
public double tanhConstDoubleLarge() {
137+
return Math.tanh(constDoubleLarge);
138+
}
139+
}
140+
141+
public static void main(String[] args) throws RunnerException {
142+
Options opt = new OptionsBuilder()
143+
.include(TanhPerfRanges.class.getSimpleName())
144+
.build();
145+
146+
new Runner(opt).run();
147+
148+
opt = new OptionsBuilder()
149+
.include(TanhPerfConstant.class.getSimpleName())
150+
.build();
151+
152+
new Runner(opt).run();
153+
}
154+
}

0 commit comments

Comments
 (0)