Skip to content

Commit c7125aa

Browse files
mhaessigrobcasloz
andcommitted
8020282: Generated code quality: redundant LEAs in the chained dereferences
Co-authored-by: Roberto Castañeda Lozano <rcastanedalo@openjdk.org> Reviewed-by: kvn, rcastanedalo
1 parent 0dce98b commit c7125aa

File tree

6 files changed

+668
-0
lines changed

6 files changed

+668
-0
lines changed

src/hotspot/cpu/x86/peephole_x86_64.cpp

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#ifdef COMPILER2
2626

27+
#include "opto/addnode.hpp"
2728
#include "peephole_x86_64.hpp"
2829
#include "adfiles/ad_x86.hpp"
2930

@@ -235,6 +236,143 @@ bool Peephole::test_may_remove(Block* block, int block_index, PhaseCFG* cfg_, Ph
235236
return false;
236237
}
237238

239+
// This function removes redundant lea instructions that result from chained dereferences that
240+
// match to leaPCompressedOopOffset, leaP8Narrow, or leaP32Narrow. This happens for ideal graphs
241+
// of the form LoadN -> DecodeN -> AddP. Matching with any leaP* rule consumes both the AddP and
242+
// the DecodeN. However, after matching the DecodeN is added back as the base for the leaP*,
243+
// which is necessary if the oop derived by the leaP* gets added to an OopMap, because OopMaps
244+
// cannot contain derived oops with narrow oops as a base.
245+
// This results in the following graph after matching:
246+
// LoadN
247+
// | \
248+
// | decodeHeapOop_not_null
249+
// | / \
250+
// leaP* MachProj (leaf)
251+
// The decode_heap_oop_not_null will emit a lea with an unused result if the derived oop does
252+
// not end up in an OopMap.
253+
// This peephole recognizes graphs of the shape as shown above, ensures that the result of the
254+
// decode is only used by the derived oop and removes that decode if this is the case. Further,
255+
// multiple leaP*s can have the same decode as their base. This peephole will remove the decode
256+
// if all leaP*s and the decode share the same parent.
257+
// Additionally, if the register allocator spills the result of the LoadN we can get such a graph:
258+
// LoadN
259+
// |
260+
// DefinitionSpillCopy
261+
// / \
262+
// MemToRegSpillCopy MemToRegSpillCopy
263+
// | /
264+
// | decodeHeapOop_not_null
265+
// | / \
266+
// leaP* MachProj (leaf)
267+
// In this case where the common parent of the leaP* and the decode is one MemToRegSpillCopy
268+
// away, this peephole can also recognize the decode as redundant and also remove the spill copy
269+
// if that is only used by the decode.
270+
bool Peephole::lea_remove_redundant(Block* block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc* ra_,
271+
MachNode* (*new_root)(), uint inst0_rule) {
272+
MachNode* lea_derived_oop = block->get_node(block_index)->as_Mach();
273+
assert(lea_derived_oop->rule() == inst0_rule, "sanity");
274+
assert(lea_derived_oop->ideal_Opcode() == Op_AddP, "sanity");
275+
276+
MachNode* decode = lea_derived_oop->in(AddPNode::Base)->isa_Mach();
277+
if (decode == nullptr || decode->ideal_Opcode() != Op_DecodeN) {
278+
return false;
279+
}
280+
281+
// Check that the lea and the decode live in the same block.
282+
if (!block->contains(decode)) {
283+
return false;
284+
}
285+
286+
Node* lea_address = lea_derived_oop->in(AddPNode::Address);
287+
Node* decode_address = decode->in(1);
288+
289+
bool is_spill = lea_address != decode_address &&
290+
lea_address->is_SpillCopy() &&
291+
decode_address->is_SpillCopy();
292+
293+
// If this is a spill, move lea_address and decode_address one node further up to the
294+
// grandparents of lea_derived_oop and decode respectively. This lets us look through
295+
// the indirection of the spill.
296+
if (is_spill) {
297+
decode_address = decode_address->in(1);
298+
lea_address = lea_address->in(1);
299+
}
300+
301+
// The leaP* and the decode must have the same parent. If we have a spill, they must have
302+
// the same grandparent.
303+
if (lea_address != decode_address) {
304+
return false;
305+
}
306+
307+
// Ensure the decode only has the leaP*s (with the same (grand)parent) and a MachProj leaf as children.
308+
MachProjNode* proj = nullptr;
309+
for (DUIterator_Fast imax, i = decode->fast_outs(imax); i < imax; i++) {
310+
Node* out = decode->fast_out(i);
311+
if (out == lea_derived_oop) {
312+
continue;
313+
}
314+
if (out->is_MachProj() && out->outcnt() == 0) {
315+
proj = out->as_MachProj();
316+
continue;
317+
}
318+
if (out->is_Mach()) {
319+
MachNode* other_lea = out->as_Mach();
320+
if ((other_lea->rule() == leaP32Narrow_rule ||
321+
other_lea->rule() == leaP8Narrow_rule ||
322+
other_lea->rule() == leaPCompressedOopOffset_rule) &&
323+
other_lea->in(AddPNode::Base) == decode &&
324+
(is_spill ? other_lea->in(AddPNode::Address)->in(1)
325+
: other_lea->in(AddPNode::Address)) == decode_address) {
326+
continue;
327+
}
328+
}
329+
// There is other stuff we do not expect...
330+
return false;
331+
}
332+
333+
// Ensure the MachProj is in the same block as the decode and the lea.
334+
if (proj == nullptr || !block->contains(proj)) {
335+
// This should only fail if we are stressing scheduling.
336+
assert(StressGCM, "should be scheduled contiguously otherwise");
337+
return false;
338+
}
339+
340+
// We now have verified that the decode is redundant and can be removed with a peephole.
341+
// Remove the projection
342+
block->find_remove(proj);
343+
cfg_->map_node_to_block(proj, nullptr);
344+
345+
// Rewire the base of all leas currently depending on the decode we are removing.
346+
for (DUIterator_Fast imax, i = decode->fast_outs(imax); i < imax; i++) {
347+
Node* dependant_lea = decode->fast_out(i);
348+
if (dependant_lea->is_Mach() && dependant_lea->as_Mach()->ideal_Opcode() == Op_AddP) {
349+
dependant_lea->set_req(AddPNode::Base, decode_address);
350+
// This deleted something in the out array, hence adjust i, imax.
351+
--i;
352+
--imax;
353+
}
354+
}
355+
356+
// Remove spill for the decode if the spill node does not have any other uses.
357+
if (is_spill) {
358+
MachNode* decode_spill = decode->in(1)->as_Mach();
359+
if (decode_spill->outcnt() == 1 && block->contains(decode_spill)) {
360+
decode_spill->set_removed();
361+
block->find_remove(decode_spill);
362+
cfg_->map_node_to_block(decode_spill, nullptr);
363+
decode_spill->del_req(1);
364+
}
365+
}
366+
367+
// Remove the decode
368+
decode->set_removed();
369+
block->find_remove(decode);
370+
cfg_->map_node_to_block(decode, nullptr);
371+
decode->del_req(1);
372+
373+
return true;
374+
}
375+
238376
bool Peephole::lea_coalesce_reg(Block* block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc* ra_,
239377
MachNode* (*new_root)(), uint inst0_rule) {
240378
return lea_coalesce_helper(block, block_index, cfg_, ra_, new_root, inst0_rule, false);

src/hotspot/cpu/x86/peephole_x86_64.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class Peephole {
3636
MachNode* (*new_root)(), uint inst0_rule);
3737
static bool test_may_remove(Block* block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc* ra_,
3838
MachNode* (*new_root)(), uint inst0_rule);
39+
static bool lea_remove_redundant(Block* block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc* ra_,
40+
MachNode* (*new_root)(), uint inst0_rule);
3941
};
4042

4143
#endif // CPU_X86_PEEPHOLE_X86_64_HPP

src/hotspot/cpu/x86/x86_64.ad

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14593,6 +14593,24 @@ peephole
1459314593
peepreplace (leaL_rReg_immI2_peep());
1459414594
%}
1459514595

14596+
peephole
14597+
%{
14598+
peepmatch (leaPCompressedOopOffset);
14599+
peepprocedure (lea_remove_redundant);
14600+
%}
14601+
14602+
peephole
14603+
%{
14604+
peepmatch (leaP8Narrow);
14605+
peepprocedure (lea_remove_redundant);
14606+
%}
14607+
14608+
peephole
14609+
%{
14610+
peepmatch (leaP32Narrow);
14611+
peepprocedure (lea_remove_redundant);
14612+
%}
14613+
1459614614
// These peephole rules matches instructions which set flags and are followed by a testI/L_reg
1459714615
// The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
1459814616

0 commit comments

Comments
 (0)