From 1064e297fbf1ff6142a5c9bcc856474028863b42 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin@nvidia.com>
Date: Wed, 9 Jul 2025 17:21:48 -0700
Subject: [PATCH 1/2] [flang] Optimize redundant array repacking.

This patch allows optimizing redundant array repacking, when
the source array is statically known to be contiguous.
This is part of the implementation plan for the array repacking
feature, though, it does not affect any real life use case
as long as FIR inlining is not a thing. I experimented with
simple cases of FIR inling using `-inline-all`, and I recorded
these cases in optimize-array-repacking.fir tests.
---
 .../flang/Optimizer/Builder/HLFIRTools.h      |   8 +
 .../flang/Optimizer/Dialect/FIROpsSupport.h   |  14 +-
 .../flang/Optimizer/Transforms/Passes.h       |  34 +-
 .../flang/Optimizer/Transforms/Passes.td      |  10 +
 flang/lib/Optimizer/Builder/HLFIRTools.cpp    |  40 +-
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 157 ++++-
 flang/lib/Optimizer/Passes/Pipelines.cpp      |   4 +
 flang/lib/Optimizer/Transforms/CMakeLists.txt |   1 +
 .../Optimizer/Transforms/LoopVersioning.cpp   |   4 +-
 .../Transforms/OptimizeArrayRepacking.cpp     |  90 +++
 flang/test/Driver/bbc-mlir-pass-pipeline.f90  |   2 +
 flang/test/Driver/mlir-pass-pipeline.f90      |   2 +
 flang/test/Fir/basic-program.fir              |   2 +
 .../simplify-hlfir-intrinsics-cshift.fir      |  36 +-
 .../simplify-hlfir-intrinsics-matmul.fir      | 107 +--
 .../Transforms/optimize-array-repacking.fir   | 660 ++++++++++++++++++
 16 files changed, 982 insertions(+), 189 deletions(-)
 create mode 100644 flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp
 create mode 100644 flang/test/Transforms/optimize-array-repacking.fir

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 2cbad6e268a38..49dfc85dc76e6 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -542,6 +542,14 @@ Entity gen1DSection(mlir::Location loc, fir::FirOpBuilder &builder,
 /// contiguous.
 bool designatePreservesContinuity(hlfir::DesignateOp op);
 
+/// Return true iff the given \p base desribes an object
+/// that is contiguous. If \p checkWhole is true, then
+/// the object must be contiguous in all dimensions,
+/// otherwise, it must be contiguous in the innermost dimension.
+/// This function is an extension of hlfir::Entity::isSimplyContiguous(),
+/// and it can be used on pure FIR representation as well as on HLFIR.
+bool isSimplyContiguous(mlir::Value base, bool checkWhole = true);
+
 } // namespace hlfir
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
index 0a2337be7455e..ae471eb0d2e04 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
@@ -238,7 +238,19 @@ std::optional<int64_t> getAllocaByteSize(fir::AllocaOp alloca,
 /// When \p checkWhole is false, then the checking is only done
 /// for continuity in the innermost dimension, otherwise,
 /// the checking is done for continuity of the whole result of rebox.
-bool reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole = true);
+/// The caller may specify \p mayHaveNonDefaultLowerBounds, if it is known,
+/// to allow better handling of the rebox operations representing
+/// full array slices.
+bool reboxPreservesContinuity(fir::ReboxOp rebox,
+                              bool mayHaveNonDefaultLowerBounds = true,
+                              bool checkWhole = true);
+
+/// Return true, if \p embox operation produces a contiguous
+/// entity.
+/// When \p checkWhole is false, then the checking is only done
+/// for continuity in the innermost dimension, otherwise,
+/// the checking is done for continuity of the whole result of embox
+bool isContiguousEmbox(fir::EmboxOp embox, bool checkWhole = true);
 
 } // namespace fir
 
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index dc8a5b9141ad2..6f5dff4687cbb 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -31,39 +31,7 @@ namespace fir {
 // Passes defined in Passes.td
 //===----------------------------------------------------------------------===//
 
-#define GEN_PASS_DECL_ABSTRACTRESULTOPT
-#define GEN_PASS_DECL_AFFINEDIALECTPROMOTION
-#define GEN_PASS_DECL_AFFINEDIALECTDEMOTION
-#define GEN_PASS_DECL_ANNOTATECONSTANTOPERANDS
-#define GEN_PASS_DECL_ARRAYVALUECOPY
-#define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
-#define GEN_PASS_DECL_CHARACTERCONVERSION
-#define GEN_PASS_DECL_CFGCONVERSION
-#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
-#define GEN_PASS_DECL_CUFDEVICEGLOBAL
-#define GEN_PASS_DECL_CUFGPUTOLLVMCONVERSION
-#define GEN_PASS_DECL_CUFOPCONVERSION
-#define GEN_PASS_DECL_CUFCOMPUTESHAREDMEMORYOFFSETSANDSIZE
-#define GEN_PASS_DECL_EXTERNALNAMECONVERSION
-#define GEN_PASS_DECL_MEMREFDATAFLOWOPT
-#define GEN_PASS_DECL_SIMPLIFYINTRINSICS
-#define GEN_PASS_DECL_MEMORYALLOCATIONOPT
-#define GEN_PASS_DECL_SIMPLIFYREGIONLITE
-#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
-#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
-#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
-#define GEN_PASS_DECL_ADDDEBUGINFO
-#define GEN_PASS_DECL_STACKARRAYS
-#define GEN_PASS_DECL_STACKRECLAIM
-#define GEN_PASS_DECL_LOOPVERSIONING
-#define GEN_PASS_DECL_ADDALIASTAGS
-#define GEN_PASS_DECL_VSCALEATTR
-#define GEN_PASS_DECL_FUNCTIONATTR
-#define GEN_PASS_DECL_CONSTANTARGUMENTGLOBALISATIONOPT
-#define GEN_PASS_DECL_COMPILERGENERATEDNAMESCONVERSION
-#define GEN_PASS_DECL_SETRUNTIMECALLATTRIBUTES
-#define GEN_PASS_DECL_GENRUNTIMECALLSFORTEST
-#define GEN_PASS_DECL_SIMPLIFYFIROPERATIONS
+#define GEN_PASS_DECL
 
 #include "flang/Optimizer/Transforms/Passes.h.inc"
 
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 34842f9785942..b230f60b4c59e 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -551,4 +551,14 @@ def SimplifyFIROperations : Pass<"simplify-fir-operations", "mlir::ModuleOp"> {
       "Prefer expanding without using Fortran runtime calls.">];
 }
 
+def OptimizeArrayRepacking
+    : Pass<"optimize-array-repacking", "mlir::func::FuncOp"> {
+  let summary = "Optimizes redundant array repacking operations";
+  let description = [{
+    If the source of fir.pack_array is known to be contiguous,
+    then this pass erases such operations. The corresponding
+    fir.unpack_array operations are also removed.
+  }];
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index b54cccbef14fa..e59a6bf2bf224 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -212,10 +212,17 @@ bool hlfir::Entity::mayHaveNonDefaultLowerBounds() const {
   if (auto varIface = getIfVariableInterface())
     return isShapeWithLowerBounds(varIface.getShape());
   // Go through chain of fir.box converts.
-  if (auto convert = getDefiningOp<fir::ConvertOp>())
+  if (auto convert = getDefiningOp<fir::ConvertOp>()) {
     return hlfir::Entity{convert.getValue()}.mayHaveNonDefaultLowerBounds();
-  // TODO: Embox and Rebox do not have hlfir variable interface, but are
-  // easy to reason about.
+  } else if (auto rebox = getDefiningOp<fir::ReboxOp>()) {
+    // If slicing is involved, then the resulting box has
+    // default lower bounds. If there is no slicing,
+    // then the result depends on the shape operand
+    // (whether it has non default lower bounds or not).
+    return !rebox.getSlice() && isShapeWithLowerBounds(rebox.getShape());
+  } else if (auto embox = getDefiningOp<fir::EmboxOp>()) {
+    return !embox.getSlice() && isShapeWithLowerBounds(embox.getShape());
+  }
   return true;
 }
 
@@ -1646,3 +1653,30 @@ bool hlfir::designatePreservesContinuity(hlfir::DesignateOp op) {
   }
   return true;
 }
+
+bool hlfir::isSimplyContiguous(mlir::Value base, bool checkWhole) {
+  hlfir::Entity entity{base};
+  if (entity.isSimplyContiguous())
+    return true;
+
+  // Look at the definition.
+  mlir::Operation *def = base.getDefiningOp();
+  if (!def)
+    return false;
+
+  return mlir::TypeSwitch<mlir::Operation *, bool>(def)
+      .Case<fir::EmboxOp>(
+          [&](auto op) { return fir::isContiguousEmbox(op, checkWhole); })
+      .Case<fir::ReboxOp>([&](auto op) {
+        hlfir::Entity box{op.getBox()};
+        return fir::reboxPreservesContinuity(
+                   op, box.mayHaveNonDefaultLowerBounds(), checkWhole) &&
+               isSimplyContiguous(box, checkWhole);
+      })
+      .Case<fir::DeclareOp, hlfir::DeclareOp>([&](auto op) {
+        return isSimplyContiguous(op.getMemref(), checkWhole);
+      })
+      .Case<fir::ConvertOp>(
+          [&](auto op) { return isSimplyContiguous(op.getValue()); })
+      .Default([](auto &&) { return false; });
+}
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index ecfa2939e96a6..d04306a48e922 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -1944,6 +1944,128 @@ llvm::LogicalResult fir::EmboxOp::verify() {
   return mlir::success();
 }
 
+/// Returns true if \p extent matches the extent of the \p box's
+/// dimension \p dim.
+bool isBoxExtent(mlir::Value box, std::int64_t dim, mlir::Value extent) {
+  if (auto op = extent.getDefiningOp<fir::BoxDimsOp>())
+    if (op.getVal() == box && op.getExtent() == extent)
+      if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
+        return *dimOperand == dim;
+  return false;
+}
+
+/// Returns true if \p lb matches the lower bound of the \p box's
+/// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
+/// then \p lb may be an integer constant 1.
+bool isBoxLb(mlir::Value box, std::int64_t dim, mlir::Value lb,
+             bool mayHaveNonDefaultLowerBounds = true) {
+  if (auto op = lb.getDefiningOp<fir::BoxDimsOp>()) {
+    if (op.getVal() == box && op.getLowerBound() == lb)
+      if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
+        return *dimOperand == dim;
+  } else if (!mayHaveNonDefaultLowerBounds) {
+    if (auto constantLb = fir::getIntIfConstant(lb))
+      return *constantLb == 1;
+  }
+  return false;
+}
+
+/// Returns true if \p ub matches the upper bound of the \p box's
+/// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
+/// then the dimension's lower bound may be an integer constant 1.
+/// Note that the upper bound is usually a result of computation
+/// involving the lower bound and the extent, and the function
+/// tries its best to recognize the computation pattern.
+/// The conservative result 'false' does not necessarily mean
+/// that \p ub is not an actual upper bound value.
+bool isBoxUb(mlir::Value box, std::int64_t dim, mlir::Value ub,
+             bool mayHaveNonDefaultLowerBounds = true) {
+  if (auto sub1 = ub.getDefiningOp<mlir::arith::SubIOp>()) {
+    auto one = fir::getIntIfConstant(sub1.getOperand(1));
+    if (!one || *one != 1)
+      return false;
+    if (auto add = sub1.getOperand(0).getDefiningOp<mlir::arith::AddIOp>())
+      if ((isBoxLb(box, dim, add.getOperand(0)) &&
+           isBoxExtent(box, dim, add.getOperand(1))) ||
+          (isBoxLb(box, dim, add.getOperand(1)) &&
+           isBoxExtent(box, dim, add.getOperand(0))))
+        return true;
+  } else if (!mayHaveNonDefaultLowerBounds) {
+    return isBoxExtent(box, dim, ub);
+  }
+  return false;
+}
+
+/// Checks if the given \p sliceOp specifies a contiguous
+/// array slice. If \p checkWhole is true, then the check
+/// is done for all dimensions, otherwise, only for the innermost
+/// dimension.
+/// The simplest way to prove that this is an contiguous slice
+/// is to check whether the slice stride(s) is 1.
+/// For more complex cases, extra information must be provided
+/// by the caller:
+///   * \p origBox - if not null, then the source array is represented
+///     with this !fir.box value. The box is used to recognize
+///     the full dimension slices, which are specified by the triplets
+///     computed from the dimensions' lower bounds and extents.
+///   * \p mayHaveNonDefaultLowerBounds may be set to false to indicate
+///     that the source entity has default lower bounds, so the full
+///     dimension slices computations may use 1 for the lower bound.
+static bool isContiguousArraySlice(fir::SliceOp sliceOp, bool checkWhole = true,
+                                   mlir::Value origBox = nullptr,
+                                   bool mayHaveNonDefaultLowerBounds = true) {
+  if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
+    // TODO: generalize code for the triples analysis with
+    // hlfir::designatePreservesContinuity, especially when
+    // recognition of the whole dimension slices is added.
+    auto triples = sliceOp.getTriples();
+    assert((triples.size() % 3) == 0 && "invalid triples size");
+
+    // A slice with step=1 in the innermost dimension preserves
+    // the continuity of the array in the innermost dimension.
+    // If checkWhole is false, then check only the innermost slice triples.
+    std::size_t checkUpTo = checkWhole ? triples.size() : 3;
+    checkUpTo = std::min(checkUpTo, triples.size());
+    for (std::size_t i = 0; i < checkUpTo; i += 3) {
+      if (triples[i] != triples[i + 1]) {
+        // This is a section of the dimension. Only allow it
+        // to be the first triple, if the source of the slice
+        // is a boxed array. If it is a raw pointer, then
+        // the result will still be contiguous, as long as
+        // the strides are all ones.
+        // When origBox is not null, we must prove that the triple
+        // covers the whole dimension and the stride is one,
+        // before claiming contiguity for this dimension.
+        if (i != 0 && origBox) {
+          std::int64_t dim = i / 3;
+          if (!isBoxLb(origBox, dim, triples[i],
+                       mayHaveNonDefaultLowerBounds) ||
+              !isBoxUb(origBox, dim, triples[i + 1],
+                       mayHaveNonDefaultLowerBounds))
+            return false;
+        }
+        auto constantStep = fir::getIntIfConstant(triples[i + 2]);
+        if (!constantStep || *constantStep != 1)
+          return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+bool fir::isContiguousEmbox(fir::EmboxOp embox, bool checkWhole) {
+  auto sliceArg = embox.getSlice();
+  if (!sliceArg)
+    return true;
+
+  if (auto sliceOp =
+          mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp()))
+    return isContiguousArraySlice(sliceOp, checkWhole);
+
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // EmboxCharOp
 //===----------------------------------------------------------------------===//
@@ -4794,7 +4916,9 @@ mlir::Type fir::applyPathToType(mlir::Type eleTy, mlir::ValueRange path) {
   return eleTy;
 }
 
-bool fir::reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole) {
+bool fir::reboxPreservesContinuity(fir::ReboxOp rebox,
+                                   bool mayHaveNonDefaultLowerBounds,
+                                   bool checkWhole) {
   // If slicing is not involved, then the rebox does not affect
   // the continuity of the array.
   auto sliceArg = rebox.getSlice();
@@ -4802,33 +4926,10 @@ bool fir::reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole) {
     return true;
 
   if (auto sliceOp =
-          mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp())) {
-    if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
-      // TODO: generalize code for the triples analysis with
-      // hlfir::designatePreservesContinuity, especially when
-      // recognition of the whole dimension slices is added.
-      auto triples = sliceOp.getTriples();
-      assert((triples.size() % 3) == 0 && "invalid triples size");
-
-      // A slice with step=1 in the innermost dimension preserves
-      // the continuity of the array in the innermost dimension.
-      // If checkWhole is false, then check only the innermost slice triples.
-      std::size_t checkUpTo = checkWhole ? triples.size() : 3;
-      checkUpTo = std::min(checkUpTo, triples.size());
-      for (std::size_t i = 0; i < checkUpTo; i += 3) {
-        if (triples[i] != triples[i + 1]) {
-          // This is a section of the dimension. Only allow it
-          // to be the first triple.
-          if (i != 0)
-            return false;
-          auto constantStep = fir::getIntIfConstant(triples[i + 2]);
-          if (!constantStep || *constantStep != 1)
-            return false;
-        }
-      }
-      return true;
-    }
-  }
+          mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp()))
+    return isContiguousArraySlice(sliceOp, checkWhole, rebox.getBox(),
+                                  mayHaveNonDefaultLowerBounds);
+
   return false;
 }
 
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 333bfc8b7aed8..ca8e820608688 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -207,6 +207,10 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
   pm.addPass(fir::createPolymorphicOpConversion());
   pm.addPass(fir::createAssumedRankOpConversion());
 
+  // Optimize redundant array repacking operations,
+  // if the source is known to be contiguous.
+  if (pc.OptLevel.isOptimizingForSpeed())
+    pm.addPass(fir::createOptimizeArrayRepacking());
   pm.addPass(fir::createLowerRepackArraysPass());
   // Expand FIR operations that may use SCF dialect for their
   // implementation. This is a mandatory pass.
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 846d6c64dbd04..a8812e08c1ccd 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -34,6 +34,7 @@ add_flang_library(FIRTransforms
   SetRuntimeCallAttributes.cpp
   GenRuntimeCallsForTest.cpp
   SimplifyFIROperations.cpp
+  OptimizeArrayRepacking.cpp
 
   DEPENDS
   CUFAttrs
diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
index 50e7ee5599ab1..056bdf63d914f 100644
--- a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
+++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
@@ -214,7 +214,9 @@ static mlir::Value unwrapPassThroughOps(mlir::Value val) {
 /// of the value, otherwise return the value
 static mlir::Value unwrapReboxOp(mlir::Value val) {
   while (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) {
-    if (!fir::reboxPreservesContinuity(rebox, /*checkWhole=*/false)) {
+    if (!fir::reboxPreservesContinuity(rebox,
+                                       /*mayHaveNonDefaultLowerBounds=*/true,
+                                       /*checkWhole=*/false)) {
       LLVM_DEBUG(llvm::dbgs() << "REBOX may produce non-contiguous array: "
                               << rebox << '\n');
       break;
diff --git a/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp b/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp
new file mode 100644
index 0000000000000..1688f2887a57a
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp
@@ -0,0 +1,90 @@
+//===- OptimizeArrayRepacking.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass removes redundant fir.pack_array operations, if it can prove
+/// that the source array is contiguous. In this case, it relink all uses
+/// of fir.pack_array result to the source. If such a rewrite happens,
+/// it may turn the using fir.unpack_array operation into one with the same
+/// temp and original operands - these are also removed as redundant.
+//===----------------------------------------------------------------------===//
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Support/Utils.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace fir {
+#define GEN_PASS_DEF_OPTIMIZEARRAYREPACKING
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+namespace {
+class OptimizeArrayRepackingPass
+    : public fir::impl::OptimizeArrayRepackingBase<OptimizeArrayRepackingPass> {
+public:
+  void runOnOperation() override;
+};
+
+/// Relinks all uses of redundant fir.pack_array to the source.
+class PackingOfContiguous : public mlir::OpRewritePattern<fir::PackArrayOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  mlir::LogicalResult matchAndRewrite(fir::PackArrayOp,
+                                      mlir::PatternRewriter &) const override;
+};
+
+/// Erases fir.unpack_array with have the matching temp and original
+/// operands.
+class NoopUnpacking : public mlir::OpRewritePattern<fir::UnpackArrayOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  mlir::LogicalResult matchAndRewrite(fir::UnpackArrayOp,
+                                      mlir::PatternRewriter &) const override;
+};
+} // namespace
+
+mlir::LogicalResult
+PackingOfContiguous::matchAndRewrite(fir::PackArrayOp op,
+                                     mlir::PatternRewriter &rewriter) const {
+  mlir::Value box = op.getArray();
+  if (hlfir::isSimplyContiguous(box, !op.getInnermost())) {
+    rewriter.replaceAllUsesWith(op, box);
+    rewriter.eraseOp(op);
+    return mlir::success();
+  }
+  return mlir::failure();
+}
+
+mlir::LogicalResult
+NoopUnpacking::matchAndRewrite(fir::UnpackArrayOp op,
+                               mlir::PatternRewriter &rewriter) const {
+  if (op.getTemp() == op.getOriginal()) {
+    rewriter.eraseOp(op);
+    return mlir::success();
+  }
+  return mlir::failure();
+}
+
+void OptimizeArrayRepackingPass::runOnOperation() {
+  mlir::func::FuncOp funcOp = getOperation();
+  mlir::MLIRContext *context = &getContext();
+  mlir::RewritePatternSet patterns(context);
+  mlir::GreedyRewriteConfig config;
+  config.setRegionSimplificationLevel(
+      mlir::GreedySimplifyRegionLevel::Disabled);
+  patterns.insert<PackingOfContiguous>(context);
+  patterns.insert<NoopUnpacking>(context);
+  if (mlir::failed(
+          mlir::applyPatternsGreedily(funcOp, std::move(patterns), config))) {
+    mlir::emitError(funcOp.getLoc(), "failure in array repacking optimization");
+    signalPassFailure();
+  }
+}
diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
index 137c19608c38f..f3791fe9f8dc3 100644
--- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90
+++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
@@ -47,6 +47,8 @@
 
 ! CHECK-NEXT: PolymorphicOpConversion
 ! CHECK-NEXT: AssumedRankOpConversion
+! CHECK-NEXT: 'func.func' Pipeline
+! CHECK-NEXT:   OptimizeArrayRepacking
 ! CHECK-NEXT: LowerRepackArraysPass
 ! CHECK-NEXT: SimplifyFIROperations
 
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 99f192ce7aec2..0bcd055a84b87 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -105,6 +105,8 @@
 
 ! ALL-NEXT: PolymorphicOpConversion
 ! ALL-NEXT: AssumedRankOpConversion
+! O2-NEXT:  'func.func' Pipeline
+! O2-NEXT:    OptimizeArrayRepacking
 ! ALL-NEXT: LowerRepackArraysPass
 ! ALL-NEXT: SimplifyFIROperations
 ! O2-NEXT:  AddAliasTags
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index 7ac8b92f48953..c9fe53bf093a1 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -103,6 +103,8 @@ func.func @_QQmain() {
 
 // PASSES-NEXT: PolymorphicOpConversion
 // PASSES-NEXT: AssumedRankOpConversion
+// PASSES-NEXT: 'func.func' Pipeline
+// PASSES-NEXT:   OptimizeArrayRepacking
 // PASSES-NEXT: LowerRepackArraysPass
 // PASSES-NEXT: SimplifyFIROperations
 // PASSES-NEXT: AddAliasTags
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
index 35530c66f4038..8684a429ea5b4 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
@@ -41,19 +41,11 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_17]] : (i64) -> index
 // CHECK:               fir.do_loop %[[VAL_28:.*]] = %[[VAL_2]] to %[[VAL_27]] step %[[VAL_2]] unordered {
 // CHECK:                 %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (index) -> i64
-// CHECK:                 %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_26]], %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:                 %[[VAL_31:.*]] = fir.convert %[[VAL_29]] : (i64) -> index
-// CHECK:                 %[[VAL_32:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_2]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_33:.*]] = arith.addi %[[VAL_31]], %[[VAL_32]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_34:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_33]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[VAL_34:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_29]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
 // CHECK:                 %[[VAL_36:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
 // CHECK:                 %[[VAL_37:.*]] = arith.addi %[[VAL_29]], %[[VAL_36]] overflow<nsw, nuw> : i64
-// CHECK:                 %[[VAL_38:.*]]:3 = fir.box_dims %[[VAL_20]], %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:                 %[[VAL_39:.*]] = fir.convert %[[VAL_37]] : (i64) -> index
-// CHECK:                 %[[VAL_40:.*]] = arith.subi %[[VAL_38]]#0, %[[VAL_2]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_41:.*]] = arith.addi %[[VAL_39]], %[[VAL_40]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_41]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[VAL_42:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_37]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 hlfir.assign %[[VAL_35]] to %[[VAL_42]] : i32, !fir.ref<i32>
 // CHECK:               }
 // CHECK:               %[[VAL_43:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
@@ -61,17 +53,9 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:               fir.do_loop %[[VAL_45:.*]] = %[[VAL_2]] to %[[VAL_44]] step %[[VAL_2]] unordered {
 // CHECK:                 %[[VAL_46:.*]] = fir.convert %[[VAL_45]] : (index) -> i64
 // CHECK:                 %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_17]] overflow<nsw, nuw> : i64
-// CHECK:                 %[[VAL_48:.*]]:3 = fir.box_dims %[[VAL_26]], %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:                 %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (i64) -> index
-// CHECK:                 %[[VAL_50:.*]] = arith.subi %[[VAL_48]]#0, %[[VAL_2]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_51:.*]] = arith.addi %[[VAL_49]], %[[VAL_50]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_52:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_51]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[VAL_52:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_47]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref<i32>
-// CHECK:                 %[[VAL_54:.*]]:3 = fir.box_dims %[[VAL_20]], %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:                 %[[VAL_55:.*]] = fir.convert %[[VAL_46]] : (i64) -> index
-// CHECK:                 %[[VAL_56:.*]] = arith.subi %[[VAL_54]]#0, %[[VAL_2]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_57:.*]] = arith.addi %[[VAL_55]], %[[VAL_56]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_58:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_57]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[VAL_58:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_46]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 hlfir.assign %[[VAL_53]] to %[[VAL_58]] : i32, !fir.ref<i32>
 // CHECK:               }
 // CHECK:             } else {
@@ -86,11 +70,7 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:                 %[[VAL_67:.*]] = fir.load %[[VAL_66]] : !fir.ref<i32>
 // CHECK:                 %[[VAL_68:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
 // CHECK:                 %[[VAL_69:.*]] = arith.addi %[[VAL_61]], %[[VAL_68]] overflow<nsw, nuw> : i64
-// CHECK:                 %[[VAL_70:.*]]:3 = fir.box_dims %[[VAL_20]], %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:                 %[[VAL_71:.*]] = fir.convert %[[VAL_69]] : (i64) -> index
-// CHECK:                 %[[VAL_72:.*]] = arith.subi %[[VAL_70]]#0, %[[VAL_2]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_73:.*]] = arith.addi %[[VAL_71]], %[[VAL_72]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_74:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_73]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[VAL_74:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_69]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 hlfir.assign %[[VAL_67]] to %[[VAL_74]] : i32, !fir.ref<i32>
 // CHECK:               }
 // CHECK:               %[[VAL_75:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow<nsw, nuw> : i64
@@ -104,11 +84,7 @@ func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32
 // CHECK:                 %[[VAL_83:.*]] = arith.addi %[[VAL_81]], %[[VAL_82]] overflow<nsw, nuw> : index
 // CHECK:                 %[[VAL_84:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_83]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:                 %[[VAL_85:.*]] = fir.load %[[VAL_84]] : !fir.ref<i32>
-// CHECK:                 %[[VAL_86:.*]]:3 = fir.box_dims %[[VAL_20]], %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:                 %[[VAL_87:.*]] = fir.convert %[[VAL_78]] : (i64) -> index
-// CHECK:                 %[[VAL_88:.*]] = arith.subi %[[VAL_86]]#0, %[[VAL_2]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_89:.*]] = arith.addi %[[VAL_87]], %[[VAL_88]] overflow<nsw, nuw> : index
-// CHECK:                 %[[VAL_90:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_89]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[VAL_90:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_78]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
 // CHECK:                 hlfir.assign %[[VAL_85]] to %[[VAL_90]] : i32, !fir.ref<i32>
 // CHECK:               }
 // CHECK:             }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-matmul.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-matmul.fir
index d29e9a26c20ba..73e6e5c2421ff 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-matmul.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-matmul.fir
@@ -13,7 +13,6 @@ func.func @matmul_matrix_matrix_integer(%arg0: !hlfir.expr<?x?xi16>, %arg1: !hlf
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant 0 : i32
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xi16>) -> !fir.shape<2>
@@ -27,26 +26,14 @@ func.func @matmul_matrix_matrix_integer(%arg0: !hlfir.expr<?x?xi16>, %arg1: !hlf
 // ANSE:             %[[VAL_13:.*]] = fir.embox %[[VAL_12]](%[[VAL_10]]) : (!fir.ref<!fir.array<?x?xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xi32>>
 // ANSE:             fir.do_loop %[[VAL_14:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_15:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                 %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_18:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_19:.*]] = arith.addi %[[VAL_15]], %[[VAL_18]] : index
-// ANSE:                 %[[VAL_20:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_21:.*]] = arith.addi %[[VAL_14]], %[[VAL_20]] : index
-// ANSE:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_19]], %[[VAL_21]])  : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// ANSE:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_15]], %[[VAL_14]])  : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
 // ANSE:                 hlfir.assign %[[VAL_4]] to %[[VAL_22]] : i32, !fir.ref<i32>
 // ANSE:               }
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_23:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_24:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:                 fir.do_loop %[[VAL_25:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                   %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_28:.*]] = arith.subi %[[VAL_26]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_29:.*]] = arith.addi %[[VAL_25]], %[[VAL_28]] : index
-// ANSE:                   %[[VAL_30:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_31:.*]] = arith.addi %[[VAL_24]], %[[VAL_30]] : index
-// ANSE:                   %[[VAL_32:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_29]], %[[VAL_31]])  : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// ANSE:                   %[[VAL_32:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_25]], %[[VAL_24]])  : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
 // ANSE:                   %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
 // ANSE:                   %[[VAL_34:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_23]] : (!hlfir.expr<?x?xi16>, index, index) -> i16
 // ANSE:                   %[[VAL_35:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_23]], %[[VAL_24]] : (!hlfir.expr<?x?xi32>, index, index) -> i32
@@ -94,7 +81,6 @@ func.func @matmul_matrix_matrix_real(%arg0: !hlfir.expr<?x?xf32>, %arg1: !hlfir.
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf32>) -> !fir.shape<2>
@@ -108,26 +94,14 @@ func.func @matmul_matrix_matrix_real(%arg0: !hlfir.expr<?x?xf32>, %arg1: !hlfir.
 // ANSE:             %[[VAL_13:.*]] = fir.embox %[[VAL_12]](%[[VAL_10]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.array<?x?xf32>>
 // ANSE:             fir.do_loop %[[VAL_14:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_15:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                 %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_18:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_19:.*]] = arith.addi %[[VAL_15]], %[[VAL_18]] : index
-// ANSE:                 %[[VAL_20:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_21:.*]] = arith.addi %[[VAL_14]], %[[VAL_20]] : index
-// ANSE:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_19]], %[[VAL_21]])  : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+// ANSE:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_15]], %[[VAL_14]])  : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
 // ANSE:                 hlfir.assign %[[VAL_4]] to %[[VAL_22]] : f32, !fir.ref<f32>
 // ANSE:               }
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_23:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_3]] {
 // ANSE:               fir.do_loop %[[VAL_24:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] {
 // ANSE:                 fir.do_loop %[[VAL_25:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] {
-// ANSE:                   %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_28:.*]] = arith.subi %[[VAL_26]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_29:.*]] = arith.addi %[[VAL_25]], %[[VAL_28]] : index
-// ANSE:                   %[[VAL_30:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_31:.*]] = arith.addi %[[VAL_24]], %[[VAL_30]] : index
-// ANSE:                   %[[VAL_32:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_29]], %[[VAL_31]])  : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+// ANSE:                   %[[VAL_32:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_25]], %[[VAL_24]])  : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
 // ANSE:                   %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<f32>
 // ANSE:                   %[[VAL_34:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_23]] : (!hlfir.expr<?x?xf32>, index, index) -> f32
 // ANSE:                   %[[VAL_35:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_23]], %[[VAL_24]] : (!hlfir.expr<?x?xf16>, index, index) -> f16
@@ -175,7 +149,6 @@ func.func @matmul_matrix_matrix_complex(%arg0: !hlfir.expr<?x?xcomplex<f32>>, %a
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xcomplex<f32>>) -> !fir.shape<2>
@@ -192,26 +165,14 @@ func.func @matmul_matrix_matrix_complex(%arg0: !hlfir.expr<?x?xcomplex<f32>>, %a
 // ANSE:             %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_4]], [1 : index] : (complex<f32>, f32) -> complex<f32>
 // ANSE:             fir.do_loop %[[VAL_17:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_18:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                 %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_21:.*]] = arith.subi %[[VAL_19]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_22:.*]] = arith.addi %[[VAL_18]], %[[VAL_21]] : index
-// ANSE:                 %[[VAL_23:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_24:.*]] = arith.addi %[[VAL_17]], %[[VAL_23]] : index
-// ANSE:                 %[[VAL_25:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_22]], %[[VAL_24]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
+// ANSE:                 %[[VAL_25:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_18]], %[[VAL_17]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
 // ANSE:                 hlfir.assign %[[VAL_16]] to %[[VAL_25]] : complex<f32>, !fir.ref<complex<f32>>
 // ANSE:               }
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_26:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_3]] {
 // ANSE:               fir.do_loop %[[VAL_27:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] {
 // ANSE:                 fir.do_loop %[[VAL_28:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] {
-// ANSE:                   %[[VAL_29:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_31:.*]] = arith.subi %[[VAL_29]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_32:.*]] = arith.addi %[[VAL_28]], %[[VAL_31]] : index
-// ANSE:                   %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_34:.*]] = arith.addi %[[VAL_27]], %[[VAL_33]] : index
-// ANSE:                   %[[VAL_35:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_32]], %[[VAL_34]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
+// ANSE:                   %[[VAL_35:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_28]], %[[VAL_27]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
 // ANSE:                   %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<complex<f32>>
 // ANSE:                   %[[VAL_37:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_28]], %[[VAL_26]] : (!hlfir.expr<?x?xcomplex<f32>>, index, index) -> complex<f32>
 // ANSE:                   %[[VAL_38:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_26]], %[[VAL_27]] : (!hlfir.expr<?x?xcomplex<f16>>, index, index) -> complex<f16>
@@ -262,7 +223,6 @@ func.func @matmul_matrix_matrix_complex_real(%arg0: !hlfir.expr<?x?xcomplex<f32>
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xcomplex<f32>>) -> !fir.shape<2>
@@ -279,26 +239,14 @@ func.func @matmul_matrix_matrix_complex_real(%arg0: !hlfir.expr<?x?xcomplex<f32>
 // ANSE:             %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_4]], [1 : index] : (complex<f32>, f32) -> complex<f32>
 // ANSE:             fir.do_loop %[[VAL_17:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_18:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                 %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_21:.*]] = arith.subi %[[VAL_19]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_22:.*]] = arith.addi %[[VAL_18]], %[[VAL_21]] : index
-// ANSE:                 %[[VAL_23:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_24:.*]] = arith.addi %[[VAL_17]], %[[VAL_23]] : index
-// ANSE:                 %[[VAL_25:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_22]], %[[VAL_24]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
+// ANSE:                 %[[VAL_25:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_18]], %[[VAL_17]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
 // ANSE:                 hlfir.assign %[[VAL_16]] to %[[VAL_25]] : complex<f32>, !fir.ref<complex<f32>>
 // ANSE:               }
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_26:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_3]] {
 // ANSE:               fir.do_loop %[[VAL_27:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] {
 // ANSE:                 fir.do_loop %[[VAL_28:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] {
-// ANSE:                   %[[VAL_29:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_31:.*]] = arith.subi %[[VAL_29]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_32:.*]] = arith.addi %[[VAL_28]], %[[VAL_31]] : index
-// ANSE:                   %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_34:.*]] = arith.addi %[[VAL_27]], %[[VAL_33]] : index
-// ANSE:                   %[[VAL_35:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_32]], %[[VAL_34]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
+// ANSE:                   %[[VAL_35:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_28]], %[[VAL_27]])  : (!fir.box<!fir.array<?x?xcomplex<f32>>>, index, index) -> !fir.ref<complex<f32>>
 // ANSE:                   %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<complex<f32>>
 // ANSE:                   %[[VAL_37:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_28]], %[[VAL_26]] : (!hlfir.expr<?x?xcomplex<f32>>, index, index) -> complex<f32>
 // ANSE:                   %[[VAL_38:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_26]], %[[VAL_27]] : (!hlfir.expr<?x?xf16>, index, index) -> f16
@@ -357,7 +305,6 @@ func.func @matmul_matrix_matrix_logical(%arg0: !hlfir.expr<?x?x!fir.logical<1>>,
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant false
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<1>>) -> !fir.shape<2>
@@ -372,26 +319,14 @@ func.func @matmul_matrix_matrix_logical(%arg0: !hlfir.expr<?x?x!fir.logical<1>>,
 // ANSE:             %[[VAL_14:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4>
 // ANSE:             fir.do_loop %[[VAL_15:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_16:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_19:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_20:.*]] = arith.addi %[[VAL_16]], %[[VAL_19]] : index
-// ANSE:                 %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_22:.*]] = arith.addi %[[VAL_15]], %[[VAL_21]] : index
-// ANSE:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_20]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// ANSE:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_16]], %[[VAL_15]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
 // ANSE:                 hlfir.assign %[[VAL_14]] to %[[VAL_23]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 // ANSE:               }
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_24:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_3]] unordered {
 // ANSE:               fir.do_loop %[[VAL_25:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered {
 // ANSE:                 fir.do_loop %[[VAL_26:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:                   %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_28:.*]]:3 = fir.box_dims %[[VAL_13]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
-// ANSE:                   %[[VAL_29:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_30:.*]] = arith.addi %[[VAL_26]], %[[VAL_29]] : index
-// ANSE:                   %[[VAL_31:.*]] = arith.subi %[[VAL_28]]#0, %[[VAL_3]] : index
-// ANSE:                   %[[VAL_32:.*]] = arith.addi %[[VAL_25]], %[[VAL_31]] : index
-// ANSE:                   %[[VAL_33:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_30]], %[[VAL_32]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// ANSE:                   %[[VAL_33:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_26]], %[[VAL_25]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
 // ANSE:                   %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<!fir.logical<4>>
 // ANSE:                   %[[VAL_35:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_26]], %[[VAL_24]] : (!hlfir.expr<?x?x!fir.logical<1>>, index, index) -> !fir.logical<1>
 // ANSE:                   %[[VAL_36:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_24]], %[[VAL_25]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
@@ -446,7 +381,6 @@ func.func @matmul_matrix_vector_real(%arg0: !hlfir.expr<?x?xf32>, %arg1: !hlfir.
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf32>) -> !fir.shape<2>
@@ -457,18 +391,12 @@ func.func @matmul_matrix_vector_real(%arg0: !hlfir.expr<?x?xf32>, %arg1: !hlfir.
 // ANSE:           ^bb0(%[[VAL_10:.*]]: !fir.ref<!fir.array<?xf32>>):
 // ANSE:             %[[VAL_11:.*]] = fir.embox %[[VAL_10]](%[[VAL_8]]) : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
 // ANSE:             fir.do_loop %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] unordered {
-// ANSE:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_11]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-// ANSE:               %[[VAL_14:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_3]] : index
-// ANSE:               %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_14]] : index
-// ANSE:               %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_15]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// ANSE:               %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_12]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // ANSE:               hlfir.assign %[[VAL_4]] to %[[VAL_16]] : f32, !fir.ref<f32>
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_17:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_3]] {
 // ANSE:               fir.do_loop %[[VAL_18:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] {
-// ANSE:                 %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_11]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_20:.*]] = arith.subi %[[VAL_19]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_21:.*]] = arith.addi %[[VAL_18]], %[[VAL_20]] : index
-// ANSE:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_21]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// ANSE:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_18]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // ANSE:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<f32>
 // ANSE:                 %[[VAL_24:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_18]], %[[VAL_17]] : (!hlfir.expr<?x?xf32>, index, index) -> f32
 // ANSE:                 %[[VAL_25:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_17]] : (!hlfir.expr<?xf16>, index) -> f16
@@ -513,7 +441,6 @@ func.func @matmul_vector_matrix_real(%arg0: !hlfir.expr<?xf32>, %arg1: !hlfir.ex
 
 // NOANSE: hlfir.matmul
 
-// ANSE:           %[[VAL_2:.*]] = arith.constant 0 : index
 // ANSE:           %[[VAL_3:.*]] = arith.constant 1 : index
 // ANSE:           %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32
 // ANSE:           %[[VAL_5:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
@@ -525,18 +452,12 @@ func.func @matmul_vector_matrix_real(%arg0: !hlfir.expr<?xf32>, %arg1: !hlfir.ex
 // ANSE:           ^bb0(%[[VAL_11:.*]]: !fir.ref<!fir.array<?xf32>>):
 // ANSE:             %[[VAL_12:.*]] = fir.embox %[[VAL_11]](%[[VAL_9]]) : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
 // ANSE:             fir.do_loop %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_3]] unordered {
-// ANSE:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_12]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-// ANSE:               %[[VAL_15:.*]] = arith.subi %[[VAL_14]]#0, %[[VAL_3]] : index
-// ANSE:               %[[VAL_16:.*]] = arith.addi %[[VAL_13]], %[[VAL_15]] : index
-// ANSE:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_12]] (%[[VAL_16]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// ANSE:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_12]] (%[[VAL_13]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // ANSE:               hlfir.assign %[[VAL_4]] to %[[VAL_17]] : f32, !fir.ref<f32>
 // ANSE:             }
 // ANSE:             fir.do_loop %[[VAL_18:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_3]] {
 // ANSE:               fir.do_loop %[[VAL_19:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_3]] {
-// ANSE:                 %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_12]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-// ANSE:                 %[[VAL_21:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_3]] : index
-// ANSE:                 %[[VAL_22:.*]] = arith.addi %[[VAL_19]], %[[VAL_21]] : index
-// ANSE:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]] (%[[VAL_22]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// ANSE:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]] (%[[VAL_19]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // ANSE:                 %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
 // ANSE:                 %[[VAL_25:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_18]] : (!hlfir.expr<?xf32>, index) -> f32
 // ANSE:                 %[[VAL_26:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_18]], %[[VAL_19]] : (!hlfir.expr<?x?xf16>, index, index) -> f16
diff --git a/flang/test/Transforms/optimize-array-repacking.fir b/flang/test/Transforms/optimize-array-repacking.fir
new file mode 100644
index 0000000000000..6269fa441fe44
--- /dev/null
+++ b/flang/test/Transforms/optimize-array-repacking.fir
@@ -0,0 +1,660 @@
+// Test that the redundant fir.[un]pack_array operations
+// are optimized away, when the source is statically known
+// to be contiguous.
+// RUN: fir-opt --optimize-array-repacking %s | FileCheck %s
+
+// FIR is produced by compiling the sources with -mllvm -inline-all.
+// module inner
+// contains
+//   subroutine inner_repack1(x)
+//     real :: x(:)
+//   end subroutine inner_repack1
+//   subroutine inner_repack2(x)
+//     real :: x(:,:)
+//   end subroutine inner_repack2
+// end module inner
+
+// subroutine test_explicit_shape_cst(x)
+//   real :: x(100)
+//   call repack(x(1:50))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine test_explicit_shape_cst
+//
+// CHECK-LABEL:   func.func @_QPtest_explicit_shape_cst(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_explicit_shape_cst(%arg0: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x"}) {
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %c100 = arith.constant 100 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_cstEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<100xf32>>
+  %3 = fir.shape %c50 : (index) -> !fir.shape<1>
+  %4 = fir.array_coor %2(%1) %c1 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+  %5 = fir.convert %4 : (!fir.ref<f32>) -> !fir.ref<!fir.array<50xf32>>
+  %6 = fir.embox %5(%3) : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<50xf32>>
+  %7 = fir.convert %6 : (!fir.box<!fir.array<50xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.dummy_scope : !fir.dscope
+  %9 = fir.pack_array %7 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %10 = fir.declare %9 dummy_scope %8 {uniq_name = "_QFtest_explicit_shape_cstFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_explicit_shape_var(x, n, l, u)
+//   integer :: n, l, u
+//   real :: x(n)
+//   call repack(x(l:u))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine test_explicit_shape_var
+//
+// CHECK-LABEL:   func.func @_QPtest_explicit_shape_var(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_explicit_shape_var(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}, %arg2: !fir.ref<i32> {fir.bindc_name = "l"}, %arg3: !fir.ref<i32> {fir.bindc_name = "u"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_varEl"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_varEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %3 = fir.declare %arg3 dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_varEu"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %4 = fir.load %2 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.shape %7 : (index) -> !fir.shape<1>
+  %9 = fir.declare %arg0(%8) dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_varEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  %10 = fir.load %1 : !fir.ref<i32>
+  %11 = fir.load %3 : !fir.ref<i32>
+  %12 = fir.convert %10 : (i32) -> index
+  %13 = fir.convert %11 : (i32) -> index
+  %14 = fir.slice %12, %13, %c1 : (index, index, index) -> !fir.slice<1>
+  %15 = fir.embox %9(%8) [%14] : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %16 = fir.dummy_scope : !fir.dscope
+  %17 = fir.pack_array %15 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %18 = fir.declare %17 dummy_scope %16 {uniq_name = "_QFtest_explicit_shape_varFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %17 to %15 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_assumed_size_cst(x)
+//   real :: x(*)
+//   call repack(x(1:50))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine test_assumed_size_cst
+//
+// CHECK-LABEL:   func.func @_QPtest_assumed_size_cst(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_assumed_size_cst(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %c-1 = arith.constant -1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.shape %c-1 : (index) -> !fir.shape<1>
+  %2 = fir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtest_assumed_size_cstEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  %3 = fir.shape %c50 : (index) -> !fir.shape<1>
+  %4 = fir.array_coor %2(%1) %c1 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+  %5 = fir.convert %4 : (!fir.ref<f32>) -> !fir.ref<!fir.array<50xf32>>
+  %6 = fir.embox %5(%3) : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<50xf32>>
+  %7 = fir.convert %6 : (!fir.box<!fir.array<50xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.dummy_scope : !fir.dscope
+  %9 = fir.pack_array %7 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %10 = fir.declare %9 dummy_scope %8 {uniq_name = "_QFtest_assumed_size_cstFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_assumed_size_var(x, l, u)
+//   integer :: l, u
+//   real :: x(*)
+//   call repack(x(l:u))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine test_assumed_size_var
+//
+// CHECK-LABEL:   func.func @_QPtest_assumed_size_var(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_assumed_size_var(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "l"}, %arg2: !fir.ref<i32> {fir.bindc_name = "u"}) {
+  %c1 = arith.constant 1 : index
+  %c-1 = arith.constant -1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_assumed_size_varEl"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtest_assumed_size_varEu"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %3 = fir.shape %c-1 : (index) -> !fir.shape<1>
+  %4 = fir.declare %arg0(%3) dummy_scope %0 {uniq_name = "_QFtest_assumed_size_varEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  %5 = fir.load %1 : !fir.ref<i32>
+  %6 = fir.load %2 : !fir.ref<i32>
+  %7 = fir.convert %5 : (i32) -> index
+  %8 = fir.convert %6 : (i32) -> index
+  %9 = fir.slice %7, %8, %c1 : (index, index, index) -> !fir.slice<1>
+  %10 = fir.embox %4(%3) [%9] : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.dummy_scope : !fir.dscope
+  %12 = fir.pack_array %10 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %13 = fir.declare %12 dummy_scope %11 {uniq_name = "_QFtest_assumed_size_varFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %12 to %10 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_allocatable_cst(x)
+//   real, allocatable :: x(:)
+//   call repack(x(10:50))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine test_allocatable_cst
+//
+// CHECK-LABEL:   func.func @_QPtest_allocatable_cst(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_allocatable_cst(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {fir.bindc_name = "x"}) {
+  %c0 = arith.constant 0 : index
+  %c41 = arith.constant 41 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_cstEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %2 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %3 = fir.shape %c41 : (index) -> !fir.shape<1>
+  %4 = fir.box_addr %2 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+  %5:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+  %6 = fir.shape_shift %5#0, %5#1 : (index, index) -> !fir.shapeshift<1>
+  %7 = fir.array_coor %4(%6) %c10 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, index) -> !fir.ref<f32>
+  %8 = fir.convert %7 : (!fir.ref<f32>) -> !fir.ref<!fir.array<41xf32>>
+  %9 = fir.embox %8(%3) : (!fir.ref<!fir.array<41xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<41xf32>>
+  %10 = fir.convert %9 : (!fir.box<!fir.array<41xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.dummy_scope : !fir.dscope
+  %12 = fir.pack_array %10 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %13 = fir.declare %12 dummy_scope %11 {uniq_name = "_QFtest_allocatable_cstFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %12 to %10 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_allocatable_var(x, l, u)
+//   integer :: l, u
+//   real, allocatable :: x(:)
+//   call repack(x(l:u))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine test_allocatable_var
+//
+// CHECK-LABEL:   func.func @_QPtest_allocatable_var(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_allocatable_var(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "l"}, %arg2: !fir.ref<i32> {fir.bindc_name = "u"}) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_allocatable_varEl"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtest_allocatable_varEu"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %3 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_varEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %4 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %5 = fir.load %1 : !fir.ref<i32>
+  %6 = fir.load %2 : !fir.ref<i32>
+  %7 = fir.convert %5 : (i32) -> index
+  %8 = fir.convert %6 : (i32) -> index
+  %9 = fir.box_addr %4 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+  %10:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+  %11 = fir.shape_shift %10#0, %10#1 : (index, index) -> !fir.shapeshift<1>
+  %12 = fir.slice %7, %8, %c1 : (index, index, index) -> !fir.slice<1>
+  %13 = fir.embox %9(%11) [%12] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %14 = fir.dummy_scope : !fir.dscope
+  %15 = fir.pack_array %13 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %16 = fir.declare %15 dummy_scope %14 {uniq_name = "_QFtest_allocatable_varFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %15 to %13 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_allocatable_full(x)
+//   real, allocatable :: x(:,:)
+//   call repack(x(:,:))
+// contains
+//   subroutine repack(x)
+//     real :: x(:,:)
+//   end subroutine repack
+// end subroutine test_allocatable_full
+//
+// CHECK-LABEL:   func.func @_QPtest_allocatable_full(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_allocatable_full(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> {fir.bindc_name = "x"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_fullEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %2 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %3:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+  %4:3 = fir.box_dims %2, %c1 : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+  %5 = arith.addi %3#0, %3#1 : index
+  %6 = arith.subi %5, %c1 : index
+  %7 = arith.addi %4#0, %4#1 : index
+  %8 = arith.subi %7, %c1 : index
+  %9 = fir.box_addr %2 : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
+  %10 = fir.shape_shift %3#0, %3#1, %4#0, %4#1 : (index, index, index, index) -> !fir.shapeshift<2>
+  %11 = fir.slice %3#0, %6, %c1, %4#0, %8, %c1 : (index, index, index, index, index, index) -> !fir.slice<2>
+  %12 = fir.embox %9(%10) [%11] : (!fir.heap<!fir.array<?x?xf32>>, !fir.shapeshift<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x?xf32>>
+  %13 = fir.dummy_scope : !fir.dscope
+  %14 = fir.pack_array %12 heap innermost : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+  %15 = fir.declare %14 dummy_scope %13 {uniq_name = "_QFtest_allocatable_fullFrepackEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+  fir.unpack_array %14 to %12 heap : !fir.box<!fir.array<?x?xf32>>
+  return
+}
+
+// subroutine test_explicit_shape_cst_chain(x)
+//   real :: x(100)
+//   call repack(x(1:50))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:)
+//     call inner_repack1(x)
+//   end subroutine repack
+// end subroutine test_explicit_shape_cst_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_explicit_shape_cst_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_explicit_shape_cst_chain(%arg0: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x"}) {
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %c100 = arith.constant 100 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_cst_chainEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<100xf32>>
+  %3 = fir.shape %c50 : (index) -> !fir.shape<1>
+  %4 = fir.array_coor %2(%1) %c1 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+  %5 = fir.convert %4 : (!fir.ref<f32>) -> !fir.ref<!fir.array<50xf32>>
+  %6 = fir.embox %5(%3) : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<50xf32>>
+  %7 = fir.convert %6 : (!fir.box<!fir.array<50xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.dummy_scope : !fir.dscope
+  %9 = fir.pack_array %7 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %10 = fir.declare %9 dummy_scope %8 {uniq_name = "_QFtest_explicit_shape_cst_chainFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.rebox %10 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %12 = fir.dummy_scope : !fir.dscope
+  %13 = fir.pack_array %11 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %14 = fir.declare %13 dummy_scope %12 {uniq_name = "_QMinnerFinner_repack1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %13 to %11 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_explicit_shape_var_chain(x, n, l, u)
+//   integer :: n, l, u
+//   real :: x(n)
+//   call repack(x(l:u))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:)
+//     call inner_repack1(x)
+//   end subroutine repack
+// end subroutine test_explicit_shape_var_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_explicit_shape_var_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_explicit_shape_var_chain(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}, %arg2: !fir.ref<i32> {fir.bindc_name = "l"}, %arg3: !fir.ref<i32> {fir.bindc_name = "u"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_var_chainEl"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_var_chainEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %3 = fir.declare %arg3 dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_var_chainEu"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %4 = fir.load %2 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = arith.cmpi sgt, %5, %c0 : index
+  %7 = arith.select %6, %5, %c0 : index
+  %8 = fir.shape %7 : (index) -> !fir.shape<1>
+  %9 = fir.declare %arg0(%8) dummy_scope %0 {uniq_name = "_QFtest_explicit_shape_var_chainEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  %10 = fir.load %1 : !fir.ref<i32>
+  %11 = fir.load %3 : !fir.ref<i32>
+  %12 = fir.convert %10 : (i32) -> index
+  %13 = fir.convert %11 : (i32) -> index
+  %14 = fir.slice %12, %13, %c1 : (index, index, index) -> !fir.slice<1>
+  %15 = fir.embox %9(%8) [%14] : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %16 = fir.dummy_scope : !fir.dscope
+  %17 = fir.pack_array %15 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %18 = fir.declare %17 dummy_scope %16 {uniq_name = "_QFtest_explicit_shape_var_chainFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %19 = fir.rebox %18 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %20 = fir.dummy_scope : !fir.dscope
+  %21 = fir.pack_array %19 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %22 = fir.declare %21 dummy_scope %20 {uniq_name = "_QMinnerFinner_repack1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %21 to %19 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %17 to %15 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_assumed_size_cst_chain(x)
+//   real :: x(*)
+//   call repack(x(1:50))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:)
+//     call inner_repack1(x)
+//   end subroutine repack
+// end subroutine test_assumed_size_cst_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_assumed_size_cst_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_assumed_size_cst_chain(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %c-1 = arith.constant -1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.shape %c-1 : (index) -> !fir.shape<1>
+  %2 = fir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFtest_assumed_size_cst_chainEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  %3 = fir.shape %c50 : (index) -> !fir.shape<1>
+  %4 = fir.array_coor %2(%1) %c1 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+  %5 = fir.convert %4 : (!fir.ref<f32>) -> !fir.ref<!fir.array<50xf32>>
+  %6 = fir.embox %5(%3) : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<50xf32>>
+  %7 = fir.convert %6 : (!fir.box<!fir.array<50xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.dummy_scope : !fir.dscope
+  %9 = fir.pack_array %7 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %10 = fir.declare %9 dummy_scope %8 {uniq_name = "_QFtest_assumed_size_cst_chainFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.rebox %10 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %12 = fir.dummy_scope : !fir.dscope
+  %13 = fir.pack_array %11 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %14 = fir.declare %13 dummy_scope %12 {uniq_name = "_QMinnerFinner_repack1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %13 to %11 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_assumed_size_var_chain(x, l, u)
+//   integer :: l, u
+//   real :: x(*)
+//   call repack(x(l:u))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:)
+//     call inner_repack1(x)
+//   end subroutine repack
+// end subroutine test_assumed_size_var_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_assumed_size_var_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_assumed_size_var_chain(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "l"}, %arg2: !fir.ref<i32> {fir.bindc_name = "u"}) {
+  %c1 = arith.constant 1 : index
+  %c-1 = arith.constant -1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_assumed_size_var_chainEl"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtest_assumed_size_var_chainEu"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %3 = fir.shape %c-1 : (index) -> !fir.shape<1>
+  %4 = fir.declare %arg0(%3) dummy_scope %0 {uniq_name = "_QFtest_assumed_size_var_chainEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  %5 = fir.load %1 : !fir.ref<i32>
+  %6 = fir.load %2 : !fir.ref<i32>
+  %7 = fir.convert %5 : (i32) -> index
+  %8 = fir.convert %6 : (i32) -> index
+  %9 = fir.slice %7, %8, %c1 : (index, index, index) -> !fir.slice<1>
+  %10 = fir.embox %4(%3) [%9] : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.dummy_scope : !fir.dscope
+  %12 = fir.pack_array %10 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %13 = fir.declare %12 dummy_scope %11 {uniq_name = "_QFtest_assumed_size_var_chainFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %14 = fir.rebox %13 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %15 = fir.dummy_scope : !fir.dscope
+  %16 = fir.pack_array %14 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %17 = fir.declare %16 dummy_scope %15 {uniq_name = "_QMinnerFinner_repack1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %16 to %14 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %12 to %10 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_allocatable_cst_chain(x)
+//   real, allocatable :: x(:)
+//   call repack(x(10:50))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:)
+//     call inner_repack1(x)
+//   end subroutine repack
+// end subroutine test_allocatable_cst_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_allocatable_cst_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_allocatable_cst_chain(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {fir.bindc_name = "x"}) {
+  %c0 = arith.constant 0 : index
+  %c41 = arith.constant 41 : index
+  %c10 = arith.constant 10 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_cst_chainEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %2 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %3 = fir.shape %c41 : (index) -> !fir.shape<1>
+  %4 = fir.box_addr %2 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+  %5:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+  %6 = fir.shape_shift %5#0, %5#1 : (index, index) -> !fir.shapeshift<1>
+  %7 = fir.array_coor %4(%6) %c10 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, index) -> !fir.ref<f32>
+  %8 = fir.convert %7 : (!fir.ref<f32>) -> !fir.ref<!fir.array<41xf32>>
+  %9 = fir.embox %8(%3) : (!fir.ref<!fir.array<41xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<41xf32>>
+  %10 = fir.convert %9 : (!fir.box<!fir.array<41xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %11 = fir.dummy_scope : !fir.dscope
+  %12 = fir.pack_array %10 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %13 = fir.declare %12 dummy_scope %11 {uniq_name = "_QFtest_allocatable_cst_chainFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %14 = fir.rebox %13 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %15 = fir.dummy_scope : !fir.dscope
+  %16 = fir.pack_array %14 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %17 = fir.declare %16 dummy_scope %15 {uniq_name = "_QMinnerFinner_repack1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %16 to %14 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %12 to %10 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_allocatable_var_chain(x, l, u)
+//   integer :: l, u
+//   real, allocatable :: x(:)
+//   call repack(x(l:u))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:)
+//     call inner_repack1(x)
+//   end subroutine repack
+// end subroutine test_allocatable_var_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_allocatable_var_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_allocatable_var_chain(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "l"}, %arg2: !fir.ref<i32> {fir.bindc_name = "u"}) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_allocatable_var_chainEl"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtest_allocatable_var_chainEu"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %3 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_var_chainEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %4 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+  %5 = fir.load %1 : !fir.ref<i32>
+  %6 = fir.load %2 : !fir.ref<i32>
+  %7 = fir.convert %5 : (i32) -> index
+  %8 = fir.convert %6 : (i32) -> index
+  %9 = fir.box_addr %4 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+  %10:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+  %11 = fir.shape_shift %10#0, %10#1 : (index, index) -> !fir.shapeshift<1>
+  %12 = fir.slice %7, %8, %c1 : (index, index, index) -> !fir.slice<1>
+  %13 = fir.embox %9(%11) [%12] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %14 = fir.dummy_scope : !fir.dscope
+  %15 = fir.pack_array %13 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %16 = fir.declare %15 dummy_scope %14 {uniq_name = "_QFtest_allocatable_var_chainFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %17 = fir.rebox %16 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %18 = fir.dummy_scope : !fir.dscope
+  %19 = fir.pack_array %17 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %20 = fir.declare %19 dummy_scope %18 {uniq_name = "_QMinnerFinner_repack1Ex"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %19 to %17 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %15 to %13 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine test_allocatable_full_chain(x)
+//   real, allocatable :: x(:,:)
+//   call repack(x(:,:))
+// contains
+//   subroutine repack(x)
+//     use inner
+//     real :: x(:,:)
+//     call inner_repack2(x)
+//   end subroutine repack
+// end subroutine test_allocatable_full_chain
+//
+// CHECK-LABEL:   func.func @_QPtest_allocatable_full_chain(
+// CHECK-NOT: fir.{{.*}}pack_array
+func.func @_QPtest_allocatable_full_chain(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> {fir.bindc_name = "x"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_allocatable_full_chainEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %2 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+  %3:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+  %4:3 = fir.box_dims %2, %c1 : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+  %5 = arith.addi %3#0, %3#1 : index
+  %6 = arith.subi %5, %c1 : index
+  %7 = arith.addi %4#0, %4#1 : index
+  %8 = arith.subi %7, %c1 : index
+  %9 = fir.box_addr %2 : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
+  %10 = fir.shape_shift %3#0, %3#1, %4#0, %4#1 : (index, index, index, index) -> !fir.shapeshift<2>
+  %11 = fir.slice %3#0, %6, %c1, %4#0, %8, %c1 : (index, index, index, index, index, index) -> !fir.slice<2>
+  %12 = fir.embox %9(%10) [%11] : (!fir.heap<!fir.array<?x?xf32>>, !fir.shapeshift<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x?xf32>>
+  %13 = fir.dummy_scope : !fir.dscope
+  %14 = fir.pack_array %12 heap innermost : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+  %15 = fir.declare %14 dummy_scope %13 {uniq_name = "_QFtest_allocatable_full_chainFrepackEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+  %16 = fir.rebox %15 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+  %17 = fir.dummy_scope : !fir.dscope
+  %18 = fir.pack_array %16 heap innermost : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+  %19 = fir.declare %18 dummy_scope %17 {uniq_name = "_QMinnerFinner_repack2Ex"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+  fir.unpack_array %18 to %16 heap : !fir.box<!fir.array<?x?xf32>>
+  fir.unpack_array %14 to %12 heap : !fir.box<!fir.array<?x?xf32>>
+  return
+}
+
+// TODO: if both fir.pack_array have the same property,
+// then the second one is redundant, because the first
+// repack makes 'x' contiguous.
+// subroutine neg_test_assumed_shape(x)
+//   real :: x(:)
+//   call repack(x(1:50))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine neg_test_assumed_shape
+//
+// CHECK-LABEL:   func.func @_QPneg_test_assumed_shape(
+// CHECK: fir.pack_array
+// CHECK: fir.pack_array
+// CHECK: fir.unpack_array
+// CHECK: fir.unpack_array
+func.func @_QPneg_test_assumed_shape(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.pack_array %arg0 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %2 = fir.declare %1 dummy_scope %0 {uniq_name = "_QFneg_test_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  %3 = fir.rebox %2 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %4 = fir.slice %c1, %c50, %c1 : (index, index, index) -> !fir.slice<1>
+  %5 = fir.rebox %3 [%4] : (!fir.box<!fir.array<?xf32>>, !fir.slice<1>) -> !fir.box<!fir.array<50xf32>>
+  %6 = fir.convert %5 : (!fir.box<!fir.array<50xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %7 = fir.dummy_scope : !fir.dscope
+  %8 = fir.pack_array %6 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %9 = fir.declare %8 dummy_scope %7 {uniq_name = "_QFneg_test_assumed_shapeFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %8 to %6 heap : !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %1 to %arg0 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine neg_test_non_contig_slice_cst(x)
+//   real :: x(100)
+//   call repack(x(1:50:2))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine neg_test_non_contig_slice_cst
+//
+// CHECK-LABEL:   func.func @_QPneg_test_non_contig_slice_cst(
+// CHECK: fir.pack_array
+// CHECK: fir.unpack_array
+func.func @_QPneg_test_non_contig_slice_cst(%arg0: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x"}) {
+  %c2 = arith.constant 2 : index
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %c100 = arith.constant 100 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.declare %arg0(%1) dummy_scope %0 {uniq_name = "_QFneg_test_non_contig_slice_cstEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<100xf32>>
+  %3 = fir.slice %c1, %c50, %c2 : (index, index, index) -> !fir.slice<1>
+  %4 = fir.embox %2(%1) [%3] : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<25xf32>>
+  %5 = fir.convert %4 : (!fir.box<!fir.array<25xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %6 = fir.dummy_scope : !fir.dscope
+  %7 = fir.pack_array %5 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.declare %7 dummy_scope %6 {uniq_name = "_QFneg_test_non_contig_slice_cstFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %7 to %5 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine neg_test_non_contig_slice_var(x, s)
+//   integer :: s
+//   real :: x(100)
+//   call repack(x(1:50:s))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine neg_test_non_contig_slice_var
+//
+// CHECK-LABEL:   func.func @_QPneg_test_non_contig_slice_var(
+// CHECK: fir.pack_array
+// CHECK: fir.unpack_array
+func.func @_QPneg_test_non_contig_slice_var(%arg0: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "s"}) {
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %c100 = arith.constant 100 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFneg_test_non_contig_slice_varEs"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %2 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %3 = fir.declare %arg0(%2) dummy_scope %0 {uniq_name = "_QFneg_test_non_contig_slice_varEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<100xf32>>
+  %4 = fir.load %1 : !fir.ref<i32>
+  %5 = fir.convert %4 : (i32) -> index
+  %6 = fir.slice %c1, %c50, %5 : (index, index, index) -> !fir.slice<1>
+  %7 = fir.embox %3(%2) [%6] : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.dummy_scope : !fir.dscope
+  %9 = fir.pack_array %7 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %10 = fir.declare %9 dummy_scope %8 {uniq_name = "_QFneg_test_non_contig_slice_varFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
+  return
+}
+
+// subroutine neg_test_pointer(x)
+//   real, pointer :: x(:)
+//   call repack(x(1:50))
+// contains
+//   subroutine repack(x)
+//     real :: x(:)
+//   end subroutine repack
+// end subroutine neg_test_pointer
+//
+// CHECK-LABEL:   func.func @_QPneg_test_pointer(
+// CHECK: fir.pack_array
+// CHECK: fir.unpack_array
+func.func @_QPneg_test_pointer(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {fir.bindc_name = "x"}) {
+  %c0 = arith.constant 0 : index
+  %c50 = arith.constant 50 : index
+  %c1 = arith.constant 1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFneg_test_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+  %2 = fir.load %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+  %3:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> (index, index, index)
+  %4 = fir.shift %3#0 : (index) -> !fir.shift<1>
+  %5 = fir.slice %c1, %c50, %c1 : (index, index, index) -> !fir.slice<1>
+  %6 = fir.rebox %2(%4) [%5] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, !fir.shift<1>, !fir.slice<1>) -> !fir.box<!fir.array<50xf32>>
+  %7 = fir.convert %6 : (!fir.box<!fir.array<50xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %8 = fir.dummy_scope : !fir.dscope
+  %9 = fir.pack_array %7 heap whole : (!fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %10 = fir.declare %9 dummy_scope %8 {uniq_name = "_QFneg_test_pointerFrepackEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+  fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
+  return
+}

From be0a1596ee2380e699d697df5e1bbd079c7e7199 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin@nvidia.com>
Date: Thu, 10 Jul 2025 09:29:08 -0700
Subject: [PATCH 2/2] Made utils static.

---
 flang/lib/Optimizer/Dialect/FIROps.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index d04306a48e922..372b7b4610e05 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -1946,7 +1946,7 @@ llvm::LogicalResult fir::EmboxOp::verify() {
 
 /// Returns true if \p extent matches the extent of the \p box's
 /// dimension \p dim.
-bool isBoxExtent(mlir::Value box, std::int64_t dim, mlir::Value extent) {
+static bool isBoxExtent(mlir::Value box, std::int64_t dim, mlir::Value extent) {
   if (auto op = extent.getDefiningOp<fir::BoxDimsOp>())
     if (op.getVal() == box && op.getExtent() == extent)
       if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
@@ -1957,8 +1957,8 @@ bool isBoxExtent(mlir::Value box, std::int64_t dim, mlir::Value extent) {
 /// Returns true if \p lb matches the lower bound of the \p box's
 /// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
 /// then \p lb may be an integer constant 1.
-bool isBoxLb(mlir::Value box, std::int64_t dim, mlir::Value lb,
-             bool mayHaveNonDefaultLowerBounds = true) {
+static bool isBoxLb(mlir::Value box, std::int64_t dim, mlir::Value lb,
+                    bool mayHaveNonDefaultLowerBounds = true) {
   if (auto op = lb.getDefiningOp<fir::BoxDimsOp>()) {
     if (op.getVal() == box && op.getLowerBound() == lb)
       if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
@@ -1978,8 +1978,8 @@ bool isBoxLb(mlir::Value box, std::int64_t dim, mlir::Value lb,
 /// tries its best to recognize the computation pattern.
 /// The conservative result 'false' does not necessarily mean
 /// that \p ub is not an actual upper bound value.
-bool isBoxUb(mlir::Value box, std::int64_t dim, mlir::Value ub,
-             bool mayHaveNonDefaultLowerBounds = true) {
+static bool isBoxUb(mlir::Value box, std::int64_t dim, mlir::Value ub,
+                    bool mayHaveNonDefaultLowerBounds = true) {
   if (auto sub1 = ub.getDefiningOp<mlir::arith::SubIOp>()) {
     auto one = fir::getIntIfConstant(sub1.getOperand(1));
     if (!one || *one != 1)