Skip to content

Commit 703c2de

Browse files
committed
[GR-52910] Backport to 24.0: Fix caching of Regexps.
PullRequest: truffleruby/4219
2 parents 264c7e0 + 9cacd81 commit 703c2de

18 files changed

+482
-62
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
Bug fixes:
44
* Fix `rb_global_variable()` for `Float` and bignum values during the `Init_` function (#3478, @eregon).
55

6+
Performance:
7+
* Fix inline caching for Regexp creation from Strings (#3492, @andrykonchin, @eregon).
8+
69
# 24.0.0
710

811
New features:

spec/tags/truffle/splitting_tags.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
slow:Critical methods whic must split are under 100 AST nodes
2+
slow:Critical methods which must split are under 100 AST nodes
Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
# truffleruby_primitives: true
2+
3+
# Copyright (c) 2024 Oracle and/or its affiliates. All rights reserved. This
4+
# code is released under a tri EPL/GPL/LGPL license. You can use it,
5+
# redistribute it and/or modify it under the terms of the:
6+
#
7+
# Eclipse Public License version 2.0, or
8+
# GNU General Public License version 2, or
9+
# GNU Lesser General Public License version 2.1.
10+
11+
require_relative '../../ruby/spec_helper'
12+
13+
# This test requires splitting (--engine.Splitting) which is only available with the OptimizedTruffleRuntime.
14+
# It fails under --experimental-engine-caching because CallInternalMethodNode does not have cached specializations for
15+
# !isSingleContext() and so ends up using an IndirectCallNode which prevents splitting.
16+
guard -> { TruffleRuby.jit? && !Truffle::Boot.get_option('experimental-engine-caching') && Truffle::Boot.get_option("default-cache") != 0 } do
17+
describe "Inline caching for dynamically-created Regexp works for" do
18+
before :each do
19+
@performance_warnings, Warning[:performance] = Warning[:performance], true
20+
end
21+
22+
after :each do
23+
Warning[:performance] = @performance_warnings
24+
end
25+
26+
it "Regexp.new" do
27+
# Check that separate call sites with fixed input does not warn
28+
-> {
29+
Regexp.new("a")
30+
Regexp.new("b")
31+
Regexp.new("c")
32+
Regexp.new("d")
33+
Regexp.new("e")
34+
Regexp.new("f")
35+
Regexp.new("g")
36+
Regexp.new("h")
37+
Regexp.new("i")
38+
Regexp.new("j")
39+
}.should_not complain
40+
41+
# Check that calling it with many different inputs has the warning
42+
-> {
43+
("a".."z").each do |pattern|
44+
Regexp.new(pattern)
45+
end
46+
}.should complain(/unbounded creation of regexps/)
47+
end
48+
49+
it "Regexp.union with 1 argument" do
50+
# Check that separate call sites with fixed input do not warn
51+
-> {
52+
Regexp.union("a")
53+
Regexp.union("b")
54+
Regexp.union("c")
55+
Regexp.union("d")
56+
Regexp.union("e")
57+
Regexp.union("f")
58+
Regexp.union("g")
59+
Regexp.union("h")
60+
Regexp.union("i")
61+
Regexp.union("j")
62+
}.should_not complain
63+
64+
# Check that calling it with many different inputs has the warning
65+
-> {
66+
("a".."z").each do |pattern|
67+
Regexp.union(pattern)
68+
end
69+
}.should complain(/unbounded creation of regexps/)
70+
end
71+
72+
it "Regexp.union with multiple arguments" do
73+
# Check that separate call sites with fixed input do not warn
74+
-> {
75+
Regexp.union("h", "a")
76+
Regexp.union("h", "b")
77+
Regexp.union("h", "c")
78+
Regexp.union("h", "d")
79+
Regexp.union("h", "e")
80+
Regexp.union("h", "f")
81+
Regexp.union("h", "g")
82+
Regexp.union("h", "h")
83+
Regexp.union("h", "i")
84+
Regexp.union("h", "j")
85+
}.should_not complain
86+
87+
# Check that calling it with many different inputs has the warning
88+
-> {
89+
("a".."z").each do |pattern|
90+
Regexp.union("h", pattern)
91+
end
92+
}.should complain(/unbounded creation of regexps/)
93+
end
94+
95+
it "interpolated Regexp" do
96+
# Check that calling it with many different inputs has the warning
97+
-> {
98+
("a".."z").each do |pattern|
99+
/#{pattern}/
100+
end
101+
}.should complain(/unstable interpolated regexps/)
102+
end
103+
104+
it "String#scan" do
105+
# Check that separate call sites with fixed input do not warn
106+
-> {
107+
"zzz".scan("a")
108+
"zzz".scan("b")
109+
"zzz".scan("c")
110+
"zzz".scan("d")
111+
"zzz".scan("e")
112+
"zzz".scan("f")
113+
"zzz".scan("g")
114+
"zzz".scan("h")
115+
"zzz".scan("i")
116+
"zzz".scan("j")
117+
}.should_not complain
118+
119+
# Check that calling it with many different inputs has the warning
120+
-> {
121+
# "a".."z" and not just "a".."j" because there can be some late heuristic megamorphic splitting by TRegex (ExecCompiledRegexNode)
122+
("a".."z").each do |pattern|
123+
"zzz".scan(pattern)
124+
end
125+
}.should complain(/unbounded creation of regexps/)
126+
end
127+
128+
it "String#sub" do
129+
# Don't use String explicitly to trigger Truffle::Type.coerce_to_regexp. String argument is handled with
130+
# Primitive.matchdata_create_single_group and isn't converted to Regexp immediately.
131+
pattern = Class.new do
132+
def initialize(string) = @string = string
133+
def to_str = @string
134+
end
135+
136+
# Check that separate call sites with fixed input do not warn
137+
-> {
138+
"zzz".sub(pattern.new("a"), "replacement")
139+
"zzz".sub(pattern.new("b"), "replacement")
140+
"zzz".sub(pattern.new("c"), "replacement")
141+
"zzz".sub(pattern.new("d"), "replacement")
142+
"zzz".sub(pattern.new("e"), "replacement")
143+
"zzz".sub(pattern.new("f"), "replacement")
144+
"zzz".sub(pattern.new("g"), "replacement")
145+
"zzz".sub(pattern.new("h"), "replacement")
146+
"zzz".sub(pattern.new("i"), "replacement")
147+
"zzz".sub(pattern.new("j"), "replacement")
148+
}.should_not complain
149+
150+
# Check that calling it with many different inputs has the warning
151+
-> {
152+
("a".."z").each do |s|
153+
"zzz".sub(pattern.new(s), "replacement")
154+
end
155+
}.should complain(/unbounded creation of regexps/)
156+
end
157+
158+
it "String#sub!" do
159+
# Don't use String explicitly to trigger Truffle::Type.coerce_to_regexp. String argument is handled with
160+
# Primitive.matchdata_create_single_group and isn't converted to Regexp immediately.
161+
pattern = Class.new do
162+
def initialize(string) = @string = string
163+
def to_str = @string
164+
end
165+
166+
# Check that separate call sites with fixed input do not warn
167+
-> {
168+
"zzz".sub!(pattern.new("a"), "replacement")
169+
"zzz".sub!(pattern.new("b"), "replacement")
170+
"zzz".sub!(pattern.new("c"), "replacement")
171+
"zzz".sub!(pattern.new("d"), "replacement")
172+
"zzz".sub!(pattern.new("e"), "replacement")
173+
"zzz".sub!(pattern.new("f"), "replacement")
174+
"zzz".sub!(pattern.new("g"), "replacement")
175+
"zzz".sub!(pattern.new("h"), "replacement")
176+
"zzz".sub!(pattern.new("i"), "replacement")
177+
"zzz".sub!(pattern.new("j"), "replacement")
178+
}.should_not complain
179+
180+
# Check that calling it with many different inputs has the warning
181+
-> {
182+
("a".."z").each do |s|
183+
"zzz".sub!(pattern.new(s), "replacement")
184+
end
185+
}.should complain(/unbounded creation of regexps/)
186+
end
187+
188+
it "String#gsub" do
189+
# Don't use String explicitly to trigger Truffle::Type.coerce_to_regexp. String argument is handled with
190+
# Primitive.matchdata_create_single_group and isn't converted to Regexp immediately.
191+
pattern = Class.new do
192+
def initialize(string) = @string = string
193+
def to_str = @string
194+
end
195+
196+
# Check that separate call sites with fixed input do not warn
197+
-> {
198+
"zzz".gsub(pattern.new("a"), "replacement")
199+
"zzz".gsub(pattern.new("b"), "replacement")
200+
"zzz".gsub(pattern.new("c"), "replacement")
201+
"zzz".gsub(pattern.new("d"), "replacement")
202+
"zzz".gsub(pattern.new("e"), "replacement")
203+
"zzz".gsub(pattern.new("f"), "replacement")
204+
"zzz".gsub(pattern.new("g"), "replacement")
205+
"zzz".gsub(pattern.new("h"), "replacement")
206+
"zzz".gsub(pattern.new("i"), "replacement")
207+
"zzz".gsub(pattern.new("j"), "replacement")
208+
}.should_not complain
209+
210+
# Check that calling it with many different inputs has the warning
211+
-> {
212+
("a".."z").each do |s|
213+
"zzz".gsub(pattern.new(s), "replacement")
214+
end
215+
}.should complain(/unbounded creation of regexps/)
216+
end
217+
218+
it "String#gsub!" do
219+
# Don't use String explicitly to trigger Truffle::Type.coerce_to_regexp. String argument is handled with
220+
# Primitive.matchdata_create_single_group and isn't converted to Regexp immediately.
221+
pattern = Class.new do
222+
def initialize(string) = @string = string
223+
def to_str = @string
224+
end
225+
226+
# Check that separate call sites with fixed input do not warn
227+
-> {
228+
"zzz".gsub!(pattern.new("a"), "replacement")
229+
"zzz".gsub!(pattern.new("b"), "replacement")
230+
"zzz".gsub!(pattern.new("c"), "replacement")
231+
"zzz".gsub!(pattern.new("d"), "replacement")
232+
"zzz".gsub!(pattern.new("e"), "replacement")
233+
"zzz".gsub!(pattern.new("f"), "replacement")
234+
"zzz".gsub!(pattern.new("g"), "replacement")
235+
"zzz".gsub!(pattern.new("h"), "replacement")
236+
"zzz".gsub!(pattern.new("i"), "replacement")
237+
"zzz".gsub!(pattern.new("j"), "replacement")
238+
}.should_not complain
239+
240+
# Check that calling it with many different inputs has the warning
241+
-> {
242+
("a".."z").each do |s|
243+
"zzz".gsub!(pattern.new(s), "replacement")
244+
end
245+
}.should complain(/unbounded creation of regexps/)
246+
end
247+
248+
it "String#match" do
249+
# Check that separate call sites with fixed input do not warn
250+
-> {
251+
"zzz".match("a")
252+
"zzz".match("b")
253+
"zzz".match("c")
254+
"zzz".match("d")
255+
"zzz".match("e")
256+
"zzz".match("f")
257+
"zzz".match("g")
258+
"zzz".match("h")
259+
"zzz".match("i")
260+
"zzz".match("j")
261+
}.should_not complain
262+
263+
# Check that calling it with many different inputs has the warning
264+
-> {
265+
("a".."z").each do |pattern|
266+
"zzz".match(pattern)
267+
end
268+
}.should complain(/unbounded creation of regexps/)
269+
end
270+
271+
it "String#match?" do
272+
# Check that separate call sites with fixed input do not warn
273+
-> {
274+
"zzz".match?("a")
275+
"zzz".match?("b")
276+
"zzz".match?("c")
277+
"zzz".match?("d")
278+
"zzz".match?("e")
279+
"zzz".match?("f")
280+
"zzz".match?("g")
281+
"zzz".match?("h")
282+
"zzz".match?("i")
283+
"zzz".match?("j")
284+
}.should_not complain
285+
286+
# Check that calling it with many different inputs has the warning
287+
-> {
288+
("a".."z").each do |pattern|
289+
"zzz".match?(pattern)
290+
end
291+
}.should complain(/unbounded creation of regexps/)
292+
end
293+
294+
it "Symbol#match" do
295+
# Check that separate call sites with fixed input do not warn
296+
-> {
297+
:zzz.match("a")
298+
:zzz.match("b")
299+
:zzz.match("c")
300+
:zzz.match("d")
301+
:zzz.match("e")
302+
:zzz.match("f")
303+
:zzz.match("g")
304+
:zzz.match("h")
305+
:zzz.match("i")
306+
:zzz.match("j")
307+
}.should_not complain
308+
309+
# Check that calling it with many different inputs has the warning
310+
-> {
311+
("a".."z").each do |pattern|
312+
:zzz.match(pattern)
313+
end
314+
}.should complain(/unbounded creation of regexps/)
315+
end
316+
317+
it "Symbol#match?" do
318+
# Check that separate call sites with fixed input do not warn
319+
-> {
320+
:zzz.match?("a")
321+
:zzz.match?("b")
322+
:zzz.match?("c")
323+
:zzz.match?("d")
324+
:zzz.match?("e")
325+
:zzz.match?("f")
326+
:zzz.match?("g")
327+
:zzz.match?("h")
328+
:zzz.match?("i")
329+
:zzz.match?("j")
330+
}.should_not complain
331+
332+
# Check that calling it with many different inputs has the warning
333+
-> {
334+
("a".."z").each do |pattern|
335+
:zzz.match?(pattern)
336+
end
337+
}.should complain(/unbounded creation of regexps/)
338+
end
339+
end
340+
end

spec/truffle/splitting_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
require_relative '../ruby/spec_helper'
1010

11-
describe 'Critical methods whic must split' do
11+
describe 'Critical methods which must split' do
1212
it 'are under 100 AST nodes' do
1313
code = <<-'EOF'
1414
require 'strscan'

src/main/java/org/truffleruby/core/CoreLibrary.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ public final class CoreLibrary {
202202
public final RubyModule truffleRegexpOperationsModule;
203203
public final RubyModule truffleRandomOperationsModule;
204204
public final RubyModule truffleThreadOperationsModule;
205+
public final RubyModule truffleWarningOperationsModule;
205206
public final RubyClass encodingCompatibilityErrorClass;
206207
public final RubyClass encodingUndefinedConversionErrorClass;
207208
public final RubyClass methodClass;
@@ -508,6 +509,7 @@ public CoreLibrary(RubyContext context, RubyLanguage language) {
508509
defineModule(truffleModule, "ReadlineHistory");
509510
truffleRandomOperationsModule = defineModule(truffleModule, "RandomOperations");
510511
truffleThreadOperationsModule = defineModule(truffleModule, "ThreadOperations");
512+
truffleWarningOperationsModule = defineModule(truffleModule, "WarningOperations");
511513
defineModule(truffleModule, "WeakRefOperations");
512514
handleClass = defineClass(truffleModule, objectClass, "Handle");
513515
warningModule = defineModule("Warning");

src/main/java/org/truffleruby/core/regexp/InterpolatedRegexpNode.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import org.truffleruby.core.encoding.RubyEncoding;
1616
import org.truffleruby.core.regexp.InterpolatedRegexpNodeFactory.RegexpBuilderNodeGen;
1717
import org.truffleruby.core.string.TStringWithEncoding;
18-
import org.truffleruby.language.NotOptimizedWarningNode;
18+
import org.truffleruby.language.PerformanceWarningNode;
1919
import org.truffleruby.language.RubyBaseNode;
2020
import org.truffleruby.language.RubyContextSourceNode;
2121
import org.truffleruby.language.RubyNode;
@@ -103,8 +103,9 @@ Object fast(TStringWithEncoding[] parts,
103103

104104
@Specialization(replaces = "fast")
105105
Object slow(TStringWithEncoding[] parts,
106-
@Cached NotOptimizedWarningNode notOptimizedWarningNode) {
107-
notOptimizedWarningNode.warn("unstable interpolated regexps are not optimized");
106+
@Cached PerformanceWarningNode performanceWarningNode) {
107+
performanceWarningNode.warn(
108+
"unstable interpolated regexps cause deoptimization loops which hurt performance significantly, avoid creating regexps dynamically where possible or cache them to fix this");
108109
return createRegexp(parts);
109110
}
110111

0 commit comments

Comments
 (0)