|
24 | 24 |
|
25 | 25 | #ifdef COMPILER2
|
26 | 26 |
|
| 27 | +#include "opto/addnode.hpp" |
27 | 28 | #include "peephole_x86_64.hpp"
|
28 | 29 | #include "adfiles/ad_x86.hpp"
|
29 | 30 |
|
@@ -235,6 +236,143 @@ bool Peephole::test_may_remove(Block* block, int block_index, PhaseCFG* cfg_, Ph
|
235 | 236 | return false;
|
236 | 237 | }
|
237 | 238 |
|
| 239 | +// This function removes redundant lea instructions that result from chained dereferences that |
| 240 | +// match to leaPCompressedOopOffset, leaP8Narrow, or leaP32Narrow. This happens for ideal graphs |
| 241 | +// of the form LoadN -> DecodeN -> AddP. Matching with any leaP* rule consumes both the AddP and |
| 242 | +// the DecodeN. However, after matching the DecodeN is added back as the base for the leaP*, |
| 243 | +// which is necessary if the oop derived by the leaP* gets added to an OopMap, because OopMaps |
| 244 | +// cannot contain derived oops with narrow oops as a base. |
| 245 | +// This results in the following graph after matching: |
| 246 | +// LoadN |
| 247 | +// | \ |
| 248 | +// | decodeHeapOop_not_null |
| 249 | +// | / \ |
| 250 | +// leaP* MachProj (leaf) |
| 251 | +// The decode_heap_oop_not_null will emit a lea with an unused result if the derived oop does |
| 252 | +// not end up in an OopMap. |
| 253 | +// This peephole recognizes graphs of the shape as shown above, ensures that the result of the |
| 254 | +// decode is only used by the derived oop and removes that decode if this is the case. Further, |
| 255 | +// multiple leaP*s can have the same decode as their base. This peephole will remove the decode |
| 256 | +// if all leaP*s and the decode share the same parent. |
| 257 | +// Additionally, if the register allocator spills the result of the LoadN we can get such a graph: |
| 258 | +// LoadN |
| 259 | +// | |
| 260 | +// DefinitionSpillCopy |
| 261 | +// / \ |
| 262 | +// MemToRegSpillCopy MemToRegSpillCopy |
| 263 | +// | / |
| 264 | +// | decodeHeapOop_not_null |
| 265 | +// | / \ |
| 266 | +// leaP* MachProj (leaf) |
| 267 | +// In this case where the common parent of the leaP* and the decode is one MemToRegSpillCopy |
| 268 | +// away, this peephole can also recognize the decode as redundant and also remove the spill copy |
| 269 | +// if that is only used by the decode. |
| 270 | +bool Peephole::lea_remove_redundant(Block* block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc* ra_, |
| 271 | + MachNode* (*new_root)(), uint inst0_rule) { |
| 272 | + MachNode* lea_derived_oop = block->get_node(block_index)->as_Mach(); |
| 273 | + assert(lea_derived_oop->rule() == inst0_rule, "sanity"); |
| 274 | + assert(lea_derived_oop->ideal_Opcode() == Op_AddP, "sanity"); |
| 275 | + |
| 276 | + MachNode* decode = lea_derived_oop->in(AddPNode::Base)->isa_Mach(); |
| 277 | + if (decode == nullptr || decode->ideal_Opcode() != Op_DecodeN) { |
| 278 | + return false; |
| 279 | + } |
| 280 | + |
| 281 | + // Check that the lea and the decode live in the same block. |
| 282 | + if (!block->contains(decode)) { |
| 283 | + return false; |
| 284 | + } |
| 285 | + |
| 286 | + Node* lea_address = lea_derived_oop->in(AddPNode::Address); |
| 287 | + Node* decode_address = decode->in(1); |
| 288 | + |
| 289 | + bool is_spill = lea_address != decode_address && |
| 290 | + lea_address->is_SpillCopy() && |
| 291 | + decode_address->is_SpillCopy(); |
| 292 | + |
| 293 | + // If this is a spill, move lea_address and decode_address one node further up to the |
| 294 | + // grandparents of lea_derived_oop and decode respectively. This lets us look through |
| 295 | + // the indirection of the spill. |
| 296 | + if (is_spill) { |
| 297 | + decode_address = decode_address->in(1); |
| 298 | + lea_address = lea_address->in(1); |
| 299 | + } |
| 300 | + |
| 301 | + // The leaP* and the decode must have the same parent. If we have a spill, they must have |
| 302 | + // the same grandparent. |
| 303 | + if (lea_address != decode_address) { |
| 304 | + return false; |
| 305 | + } |
| 306 | + |
| 307 | + // Ensure the decode only has the leaP*s (with the same (grand)parent) and a MachProj leaf as children. |
| 308 | + MachProjNode* proj = nullptr; |
| 309 | + for (DUIterator_Fast imax, i = decode->fast_outs(imax); i < imax; i++) { |
| 310 | + Node* out = decode->fast_out(i); |
| 311 | + if (out == lea_derived_oop) { |
| 312 | + continue; |
| 313 | + } |
| 314 | + if (out->is_MachProj() && out->outcnt() == 0) { |
| 315 | + proj = out->as_MachProj(); |
| 316 | + continue; |
| 317 | + } |
| 318 | + if (out->is_Mach()) { |
| 319 | + MachNode* other_lea = out->as_Mach(); |
| 320 | + if ((other_lea->rule() == leaP32Narrow_rule || |
| 321 | + other_lea->rule() == leaP8Narrow_rule || |
| 322 | + other_lea->rule() == leaPCompressedOopOffset_rule) && |
| 323 | + other_lea->in(AddPNode::Base) == decode && |
| 324 | + (is_spill ? other_lea->in(AddPNode::Address)->in(1) |
| 325 | + : other_lea->in(AddPNode::Address)) == decode_address) { |
| 326 | + continue; |
| 327 | + } |
| 328 | + } |
| 329 | + // There is other stuff we do not expect... |
| 330 | + return false; |
| 331 | + } |
| 332 | + |
| 333 | + // Ensure the MachProj is in the same block as the decode and the lea. |
| 334 | + if (proj == nullptr || !block->contains(proj)) { |
| 335 | + // This should only fail if we are stressing scheduling. |
| 336 | + assert(StressGCM, "should be scheduled contiguously otherwise"); |
| 337 | + return false; |
| 338 | + } |
| 339 | + |
| 340 | + // We now have verified that the decode is redundant and can be removed with a peephole. |
| 341 | + // Remove the projection |
| 342 | + block->find_remove(proj); |
| 343 | + cfg_->map_node_to_block(proj, nullptr); |
| 344 | + |
| 345 | + // Rewire the base of all leas currently depending on the decode we are removing. |
| 346 | + for (DUIterator_Fast imax, i = decode->fast_outs(imax); i < imax; i++) { |
| 347 | + Node* dependant_lea = decode->fast_out(i); |
| 348 | + if (dependant_lea->is_Mach() && dependant_lea->as_Mach()->ideal_Opcode() == Op_AddP) { |
| 349 | + dependant_lea->set_req(AddPNode::Base, decode_address); |
| 350 | + // This deleted something in the out array, hence adjust i, imax. |
| 351 | + --i; |
| 352 | + --imax; |
| 353 | + } |
| 354 | + } |
| 355 | + |
| 356 | + // Remove spill for the decode if the spill node does not have any other uses. |
| 357 | + if (is_spill) { |
| 358 | + MachNode* decode_spill = decode->in(1)->as_Mach(); |
| 359 | + if (decode_spill->outcnt() == 1 && block->contains(decode_spill)) { |
| 360 | + decode_spill->set_removed(); |
| 361 | + block->find_remove(decode_spill); |
| 362 | + cfg_->map_node_to_block(decode_spill, nullptr); |
| 363 | + decode_spill->del_req(1); |
| 364 | + } |
| 365 | + } |
| 366 | + |
| 367 | + // Remove the decode |
| 368 | + decode->set_removed(); |
| 369 | + block->find_remove(decode); |
| 370 | + cfg_->map_node_to_block(decode, nullptr); |
| 371 | + decode->del_req(1); |
| 372 | + |
| 373 | + return true; |
| 374 | +} |
| 375 | + |
238 | 376 | bool Peephole::lea_coalesce_reg(Block* block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc* ra_,
|
239 | 377 | MachNode* (*new_root)(), uint inst0_rule) {
|
240 | 378 | return lea_coalesce_helper(block, block_index, cfg_, ra_, new_root, inst0_rule, false);
|
|
0 commit comments