Skip to content

Commit 49d6777

Browse files
HeliC829gopherbot
authored andcommitted
crypto/internal/fips140/subtle: add assembly implementation of xorBytes for mips64x
goos: linux goarch: mips64le pkg: crypto/subtle │ oldsubtle │ newsubtle │ │ sec/op │ sec/op vs base │ ConstantTimeByteEq-4 5.011n ± 0% 5.014n ± 0% ~ (p=0.110 n=8) ConstantTimeEq-4 3.342n ± 0% 3.342n ± 0% ~ (p=0.993 n=8) ConstantTimeLessOrEq-4 4.455n ± 0% 4.458n ± 0% ~ (p=0.182 n=8) XORBytes/8Bytes-4 36.48n ± 0% 26.73n ± 0% -26.74% (p=0.000 n=8) XORBytes/128Bytes-4 70.21n ± 0% 50.14n ± 0% -28.59% (p=0.000 n=8) XORBytes/2048Bytes-4 566.1n ± 0% 257.2n ± 0% -54.58% (p=0.000 n=8) XORBytes/8192Bytes-4 2123.0n ± 0% 966.8n ± 0% -54.46% (p=0.000 n=8) XORBytes/32768Bytes-4 13.740µ ± 0% 5.614µ ± 0% -59.14% (p=0.000 n=8) XORBytesAlignment/8Bytes0Offset-4 38.98n ± 0% 26.53n ± 0% -31.95% (p=0.000 n=8) XORBytesAlignment/8Bytes1Offset-4 43.27n ± 0% 26.54n ± 0% -38.68% (p=0.000 n=8) XORBytesAlignment/8Bytes2Offset-4 43.28n ± 0% 26.54n ± 0% -38.69% (p=0.000 n=8) XORBytesAlignment/8Bytes3Offset-4 43.32n ± 0% 26.54n ± 0% -38.74% (p=0.000 n=8) XORBytesAlignment/8Bytes4Offset-4 43.49n ± 0% 26.53n ± 0% -38.99% (p=0.000 n=8) XORBytesAlignment/8Bytes5Offset-4 43.53n ± 0% 26.54n ± 0% -39.03% (p=0.000 n=8) XORBytesAlignment/8Bytes6Offset-4 43.48n ± 0% 26.53n ± 0% -38.98% (p=0.000 n=8) XORBytesAlignment/8Bytes7Offset-4 43.46n ± 1% 26.53n ± 0% -38.96% (p=0.000 n=8) XORBytesAlignment/128Bytes0Offset-4 71.84n ± 0% 47.70n ± 1% -33.60% (p=0.000 n=8) XORBytesAlignment/128Bytes1Offset-4 260.60n ± 0% 59.87n ± 0% -77.03% (p=0.000 n=8) XORBytesAlignment/128Bytes2Offset-4 260.60n ± 0% 59.81n ± 0% -77.05% (p=0.000 n=8) XORBytesAlignment/128Bytes3Offset-4 260.55n ± 0% 59.89n ± 0% -77.01% (p=0.000 n=8) XORBytesAlignment/128Bytes4Offset-4 260.60n ± 0% 59.84n ± 0% -77.04% (p=0.000 n=8) XORBytesAlignment/128Bytes5Offset-4 260.70n ± 0% 59.82n ± 0% -77.05% (p=0.000 n=8) XORBytesAlignment/128Bytes6Offset-4 260.60n ± 0% 59.89n ± 0% -77.02% (p=0.000 n=8) XORBytesAlignment/128Bytes7Offset-4 260.70n ± 0% 59.85n ± 0% -77.04% (p=0.000 n=8) XORBytesAlignment/2048Bytes0Offset-4 552.2n ± 1% 250.0n ± 0% -54.73% (p=0.000 n=8) XORBytesAlignment/2048Bytes1Offset-4 3603.0n ± 0% 548.6n ± 0% -84.77% (p=0.000 n=8) XORBytesAlignment/2048Bytes2Offset-4 3602.0n ± 0% 548.6n ± 0% -84.77% (p=0.000 n=8) XORBytesAlignment/2048Bytes3Offset-4 3604.0n ± 0% 548.6n ± 0% -84.78% (p=0.000 n=8) XORBytesAlignment/2048Bytes4Offset-4 3603.5n ± 0% 548.9n ± 0% -84.77% (p=0.000 n=8) XORBytesAlignment/2048Bytes5Offset-4 3603.0n ± 0% 548.8n ± 0% -84.77% (p=0.000 n=8) XORBytesAlignment/2048Bytes6Offset-4 3602.0n ± 0% 548.6n ± 0% -84.77% (p=0.000 n=8) XORBytesAlignment/2048Bytes7Offset-4 3601.5n ± 0% 548.5n ± 0% -84.77% (p=0.000 n=8) geomean 220.0n 81.91n -62.77% │ oldsubtle │ newsubtle │ │ B/s │ B/s vs base │ XORBytes/8Bytes-4 209.1Mi ± 0% 285.5Mi ± 0% +36.52% (p=0.000 n=8) XORBytes/128Bytes-4 1.698Gi ± 0% 2.378Gi ± 0% +40.04% (p=0.000 n=8) XORBytes/2048Bytes-4 3.369Gi ± 0% 7.418Gi ± 0% +120.17% (p=0.000 n=8) XORBytes/8192Bytes-4 3.594Gi ± 0% 7.892Gi ± 0% +119.59% (p=0.000 n=8) XORBytes/32768Bytes-4 2.221Gi ± 0% 5.436Gi ± 0% +144.76% (p=0.000 n=8) XORBytesAlignment/8Bytes0Offset-4 195.7Mi ± 0% 287.6Mi ± 0% +46.96% (p=0.000 n=8) XORBytesAlignment/8Bytes1Offset-4 176.3Mi ± 0% 287.5Mi ± 0% +63.06% (p=0.000 n=8) XORBytesAlignment/8Bytes2Offset-4 176.3Mi ± 0% 287.4Mi ± 0% +63.07% (p=0.000 n=8) XORBytesAlignment/8Bytes3Offset-4 176.1Mi ± 0% 287.5Mi ± 0% +63.25% (p=0.000 n=8) XORBytesAlignment/8Bytes4Offset-4 175.5Mi ± 0% 287.6Mi ± 0% +63.90% (p=0.000 n=8) XORBytesAlignment/8Bytes5Offset-4 175.3Mi ± 0% 287.5Mi ± 0% +64.02% (p=0.000 n=8) XORBytesAlignment/8Bytes6Offset-4 175.5Mi ± 0% 287.6Mi ± 0% +63.86% (p=0.000 n=8) XORBytesAlignment/8Bytes7Offset-4 175.5Mi ± 0% 287.6Mi ± 0% +63.85% (p=0.000 n=8) XORBytesAlignment/128Bytes0Offset-4 1.659Gi ± 0% 2.499Gi ± 1% +50.61% (p=0.000 n=8) XORBytesAlignment/128Bytes1Offset-4 468.4Mi ± 0% 2039.0Mi ± 0% +335.30% (p=0.000 n=8) XORBytesAlignment/128Bytes2Offset-4 468.4Mi ± 0% 2040.9Mi ± 0% +335.73% (p=0.000 n=8) XORBytesAlignment/128Bytes3Offset-4 468.5Mi ± 0% 2038.1Mi ± 0% +335.02% (p=0.000 n=8) XORBytesAlignment/128Bytes4Offset-4 468.4Mi ± 0% 2040.0Mi ± 0% +335.52% (p=0.000 n=8) XORBytesAlignment/128Bytes5Offset-4 468.2Mi ± 0% 2040.5Mi ± 0% +335.82% (p=0.000 n=8) XORBytesAlignment/128Bytes6Offset-4 468.4Mi ± 0% 2038.2Mi ± 0% +335.13% (p=0.000 n=8) XORBytesAlignment/128Bytes7Offset-4 468.2Mi ± 0% 2039.4Mi ± 0% +335.58% (p=0.000 n=8) XORBytesAlignment/2048Bytes0Offset-4 3.454Gi ± 1% 7.629Gi ± 0% +120.90% (p=0.000 n=8) XORBytesAlignment/2048Bytes1Offset-4 542.1Mi ± 0% 3560.1Mi ± 0% +556.68% (p=0.000 n=8) XORBytesAlignment/2048Bytes2Offset-4 542.3Mi ± 0% 3560.1Mi ± 0% +556.48% (p=0.000 n=8) XORBytesAlignment/2048Bytes3Offset-4 541.9Mi ± 0% 3560.0Mi ± 0% +556.93% (p=0.000 n=8) XORBytesAlignment/2048Bytes4Offset-4 542.0Mi ± 0% 3558.8Mi ± 0% +556.67% (p=0.000 n=8) XORBytesAlignment/2048Bytes5Offset-4 542.1Mi ± 3% 3558.8Mi ± 0% +556.53% (p=0.000 n=8) XORBytesAlignment/2048Bytes6Offset-4 542.2Mi ± 0% 3560.2Mi ± 0% +556.57% (p=0.000 n=8) XORBytesAlignment/2048Bytes7Offset-4 542.3Mi ± 0% 3560.5Mi ± 0% +556.56% (p=0.000 n=8) geomean 514.9Mi 1.496Gi +197.56% Change-Id: I649fa6bfca31296d65cccdf5fceb3dcfa0c588a1 Reviewed-on: https://go-review.googlesource.com/c/go/+/666255 Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent 7bc714d commit 49d6777

File tree

3 files changed

+155
-2
lines changed

3 files changed

+155
-2
lines changed

src/crypto/internal/fips140/subtle/xor_asm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build (amd64 || arm64 || loong64 || ppc64 || ppc64le || riscv64) && !purego
5+
//go:build (amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64) && !purego
66

77
package subtle
88

src/crypto/internal/fips140/subtle/xor_generic.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build (!amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64) || purego
5+
//go:build (!amd64 && !arm64 && !loong64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !riscv64) || purego
66

77
package subtle
88

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build (mips64 || mips64le) && !purego
6+
7+
#include "textflag.h"
8+
9+
// func xorBytes(dst, a, b *byte, n int)
10+
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
11+
MOVV dst+0(FP), R1
12+
MOVV a+8(FP), R2
13+
MOVV b+16(FP), R3
14+
MOVV n+24(FP), R4
15+
16+
xor_64_check:
17+
SGTU $64, R4, R5 // R5 = 1 if (64 > R4)
18+
BNE R5, xor_32_check
19+
xor_64:
20+
MOVV (R2), R6
21+
MOVV 8(R2), R7
22+
MOVV 16(R2), R8
23+
MOVV 24(R2), R9
24+
MOVV (R3), R10
25+
MOVV 8(R3), R11
26+
MOVV 16(R3), R12
27+
MOVV 24(R3), R13
28+
XOR R6, R10
29+
XOR R7, R11
30+
XOR R8, R12
31+
XOR R9, R13
32+
MOVV R10, (R1)
33+
MOVV R11, 8(R1)
34+
MOVV R12, 16(R1)
35+
MOVV R13, 24(R1)
36+
MOVV 32(R2), R6
37+
MOVV 40(R2), R7
38+
MOVV 48(R2), R8
39+
MOVV 56(R2), R9
40+
MOVV 32(R3), R10
41+
MOVV 40(R3), R11
42+
MOVV 48(R3), R12
43+
MOVV 56(R3), R13
44+
XOR R6, R10
45+
XOR R7, R11
46+
XOR R8, R12
47+
XOR R9, R13
48+
MOVV R10, 32(R1)
49+
MOVV R11, 40(R1)
50+
MOVV R12, 48(R1)
51+
MOVV R13, 56(R1)
52+
ADDV $64, R2
53+
ADDV $64, R3
54+
ADDV $64, R1
55+
SUBV $64, R4
56+
SGTU $64, R4, R5
57+
BEQ R0, R5, xor_64
58+
BEQ R0, R4, end
59+
60+
xor_32_check:
61+
SGTU $32, R4, R5
62+
BNE R5, xor_16_check
63+
xor_32:
64+
MOVV (R2), R6
65+
MOVV 8(R2), R7
66+
MOVV 16(R2), R8
67+
MOVV 24(R2), R9
68+
MOVV (R3), R10
69+
MOVV 8(R3), R11
70+
MOVV 16(R3), R12
71+
MOVV 24(R3), R13
72+
XOR R6, R10
73+
XOR R7, R11
74+
XOR R8, R12
75+
XOR R9, R13
76+
MOVV R10, (R1)
77+
MOVV R11, 8(R1)
78+
MOVV R12, 16(R1)
79+
MOVV R13, 24(R1)
80+
ADDV $32, R2
81+
ADDV $32, R3
82+
ADDV $32, R1
83+
SUBV $32, R4
84+
BEQ R0, R4, end
85+
86+
xor_16_check:
87+
SGTU $16, R4, R5
88+
BNE R5, xor_8_check
89+
xor_16:
90+
MOVV (R2), R6
91+
MOVV 8(R2), R7
92+
MOVV (R3), R8
93+
MOVV 8(R3), R9
94+
XOR R6, R8
95+
XOR R7, R9
96+
MOVV R8, (R1)
97+
MOVV R9, 8(R1)
98+
ADDV $16, R2
99+
ADDV $16, R3
100+
ADDV $16, R1
101+
SUBV $16, R4
102+
BEQ R0, R4, end
103+
104+
xor_8_check:
105+
SGTU $8, R4, R5
106+
BNE R5, xor_4_check
107+
xor_8:
108+
MOVV (R2), R6
109+
MOVV (R3), R7
110+
XOR R6, R7
111+
MOVV R7, (R1)
112+
ADDV $8, R1
113+
ADDV $8, R2
114+
ADDV $8, R3
115+
SUBV $8, R4
116+
BEQ R0, R4, end
117+
118+
xor_4_check:
119+
SGTU $4, R4, R5
120+
BNE R5, xor_2_check
121+
xor_4:
122+
MOVW (R2), R6
123+
MOVW (R3), R7
124+
XOR R6, R7
125+
MOVW R7, (R1)
126+
ADDV $4, R2
127+
ADDV $4, R3
128+
ADDV $4, R1
129+
SUBV $4, R4
130+
BEQ R0, R4, end
131+
132+
xor_2_check:
133+
SGTU $2, R4, R5
134+
BNE R5, xor_1
135+
xor_2:
136+
MOVH (R2), R6
137+
MOVH (R3), R7
138+
XOR R6, R7
139+
MOVH R7, (R1)
140+
ADDV $2, R2
141+
ADDV $2, R3
142+
ADDV $2, R1
143+
SUBV $2, R4
144+
BEQ R0, R4, end
145+
146+
xor_1:
147+
MOVB (R2), R6
148+
MOVB (R3), R7
149+
XOR R6, R7
150+
MOVB R7, (R1)
151+
152+
end:
153+
RET

0 commit comments

Comments
 (0)