ROCm
diff --git a/‎src/include/migraphx/byte.hpp‎
Lines changed: 1 addition & 1 deletion b/‎src/include/migraphx/byte.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/include/migraphx/op/pack_fp4.hpp‎
Lines changed: 21 additions & 21 deletions b/‎src/include/migraphx/op/pack_fp4.hpp‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎src/include/migraphx/op/unpack_fp4.hpp‎
Lines changed: 11 additions & 14 deletions b/‎src/include/migraphx/op/unpack_fp4.hpp‎
Lines changed: 11 additions & 14 deletions
diff --git a/‎src/include/migraphx/reduce_dims.hpp‎
Lines changed: 2 additions & 1 deletion b/‎src/include/migraphx/reduce_dims.hpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/permutation.cpp‎
Lines changed: 2 additions & 1 deletion b/‎src/permutation.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/shape.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/shape.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/targets/gpu/jit/pack_fp4.cpp‎
Lines changed: 88 additions & 0 deletions b/‎src/targets/gpu/jit/pack_fp4.cpp‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/targets/gpu/jit/unpack_fp4.cpp‎
Lines changed: 88 additions & 0 deletions b/‎src/targets/gpu/jit/unpack_fp4.cpp‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp‎
Lines changed: 1 addition & 0 deletions b/‎src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp‎
Lines changed: 1 addition & 0 deletions
@@ -46,7 +46,7 @@ template <class IntType,
           MIGRAPHX_REQUIRES(std::is_integral<IntType>{} and std::is_unsigned<IntType>{})>
 constexpr byte operator<<(byte b, IntType shift) noexcept
 {
-    return static_cast<byte>(static_cast<unsigned char>(b) << shift);
+    return static_cast<byte>(static_cast<uint8_t>(b) << shift);
 };
 
 template <class IntType,
 
@@ -57,6 +57,10 @@ struct pack_fp4
     {
         check_shapes{inputs, *this}.same_dims().has(1);
         const auto& in_shape = inputs.front();
+        if(in_shape.type() != migraphx::shape::float_type)
+        {
+            MIGRAPHX_THROW("PACK_FP4: Only float32 type input is supported");
+        }
         auto new_lens        = in_shape.lens();
         if(new_lens[axis] % 2 != 0)
         {
@@ -68,31 +72,27 @@ struct pack_fp4
 
     argument compute(const shape& output_shape, const std::vector<argument>& args) const
     {
-        auto input    = args.front();
+        const auto& input = args.front();
         auto in_shape = input.get_shape();
 
-        migraphx::shape uint8_shape = shape{migraphx::shape::uint8_type, output_shape.lens()};
-        argument uint8_arg{uint8_shape};
-        uint8_arg.visit([&](auto out) {
-            input.visit([&](auto inp) {
-                par_for(output_shape.elements(), [&](auto i) {
-                    using inp_type         = typename decltype(inp)::value_type;
-                    auto data_idx          = output_shape.multi(i);
-                    auto in_data_multi_idx = data_idx;
-                    in_data_multi_idx[axis] *= 2;
-                    inp_type inp_val0 = inp[in_data_multi_idx];
-                    in_data_multi_idx[axis] += 1;
-                    inp_type inp_val1 = inp[in_data_multi_idx];
-                    uint8_t out_val0  = float_to_fp4(inp_val0);
-                    uint8_t out_val1  = float_to_fp4(inp_val1);
-                    // NOTE: integral promotion occurs when bitshifting for uint8_t
-                    out[i] = static_cast<uint8_t>(out_val1 << 4u) |
-                             static_cast<uint8_t>(out_val0 & 0xFu);
-                });
+        argument result{output_shape};
+        auto out = result.get<uint8_t>();
+        input.visit([&](auto inp) {
+            par_for(output_shape.elements(), [&](auto i) {
+                using inp_type         = typename decltype(inp)::value_type;
+                auto data_idx          = output_shape.multi(i);
+                auto in_data_multi_idx = data_idx;
+                in_data_multi_idx[axis] *= 2;
+                inp_type inp_val0 = inp[in_data_multi_idx];
+                in_data_multi_idx[axis] += 1;
+                inp_type inp_val1 = inp[in_data_multi_idx];
+                uint8_t out_val0  = float_to_fp4(inp_val0);
+                uint8_t out_val1  = float_to_fp4(inp_val1);
+                // NOTE: integral promotion occurs when bitshifting for uint8_t
+                out[i] =
+                    static_cast<uint8_t>(out_val1 << 4u) | static_cast<uint8_t>(out_val0 & 0xFu);
             });
         });
-        migraphx::argument result =
-            uint8_arg.reshape({migraphx::shape::fp4x2_type, output_shape.lens()});
         return result;
     }
 };
 
@@ -73,24 +73,21 @@ struct unpack_fp4
         const auto& input = args.front();
         auto in_shape     = input.get_shape();
 
-        argument uint8_input        = input.reshape({migraphx::shape::uint8_type, in_shape.lens()});
         migraphx::shape float_shape = shape{migraphx::shape::float_type, output_shape.lens()};
         argument float_arg{float_shape};
-
+        auto inp = input.get<uint8_t>();
         float_arg.visit([&](auto out) {
-            uint8_input.visit([&](auto inp) {
-                par_for(in_shape.elements(), [&](auto i) {
-                    auto data_idx = in_shape.multi(i);
-                    data_idx[axis] *= 2;
-                    // unpacking 2 unsigned parts
-                    // unpacking 4 least significant bits first
-                    uint8_t fp4_val = inp[i];
-                    out[data_idx]   = fp4_to_float(fp4_val);
+            par_for(in_shape.elements(), [&](auto i) {
+                auto data_idx = in_shape.multi(i);
+                data_idx[axis] *= 2;
+                // unpacking 2 unsigned parts
+                // unpacking 4 least significant bits first
+                uint8_t fp4_val = inp[i];
+                out[data_idx]   = fp4_to_float(fp4_val);
 
-                    data_idx[axis] += 1;
-                    fp4_val       = fp4_val >> 4u;
-                    out[data_idx] = fp4_to_float(fp4_val);
-                });
+                data_idx[axis] += 1;
+                fp4_val       = fp4_val >> 4u;
+                out[data_idx] = fp4_to_float(fp4_val);
             });
         });
         return float_arg;
 
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -31,6 +31,7 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 
+/// Collapse adjacent shape dimensions that are the same between shapes.
 MIGRAPHX_EXPORT std::vector<shape> reduce_dims(const std::vector<shape>& shapes);
 
 } // namespace MIGRAPHX_INLINE_NS
 
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -74,6 +74,7 @@ std::vector<int64_t> find_permutation(const std::vector<shape>& shapes)
     return it->first;
 }
 
+/// Normalize shapes by reordering them by their permutation
 std::vector<shape> normalize_permutation(const std::vector<shape>& shapes)
 {
     auto result = shapes;
 
@@ -270,7 +270,7 @@ std::string shape::cpp_type(shape::type_t t)
     switch(t)
     {
     case tuple_type: MIGRAPHX_THROW("No C++ type for tuple");
-    case fp4x2_type: MIGRAPHX_THROW("No C++ type for fp4x2_type");
+    case fp4x2_type: return "uint8_t";
 #define MIGRAPHX_SHAPE_GENERATE_CPP_TYPE_CASE(x, t) \
     case x: return #t;
         MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_CPP_TYPE_CASE)
 
@@ -0,0 +1,88 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "migraphx/instruction.hpp"
+#include "migraphx/instruction_ref.hpp"
+#include <migraphx/reduce_dims.hpp>
+#include <migraphx/gpu/compiler.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/compile_hip_code_object.hpp>
+#include <migraphx/gpu/compile_hip.hpp>
+#include <migraphx/gpu/compile_gen.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+static const char* const pack_fp4_kernel = R"__migraphx__(
+#include <migraphx/kernels/pack_fp4.hpp>
+#include <args.hpp>
+
+namespace migraphx {
+
+extern "C" {
+
+MIGRAPHX_GLOBAL void ${kernel}(${params}) 
+{
+    transform_args(make_tensors())(${args})([](auto... xs) {
+        pack_fp4<${axis}>(xs...);
+    });
+}
+    
+}
+
+} // namespace migraphx
+
+)__migraphx__";
+
+struct pack_fp4_compiler : compiler<pack_fp4_compiler>
+{
+    std::vector<std::string> names() const { return {"pack_fp4"}; }
+
+    operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
+    {
+        hip_compile_options options;
+        options.inputs         = inputs;
+        options.output         = inputs.back();
+        options.virtual_inputs = reduce_dims(normalize_permutation(options.inputs));
+        options.kernel_name    = "pack_fp4_kernel";
+        options.set_launch_params(v, compute_global_for(ctx, inputs.back().elements()));
+
+        auto src =
+            interpolate_string(pack_fp4_kernel,
+                               {{"kernel", options.kernel_name},
+                                {"params", enum_params(options.inputs.size(), "void * private_p")},
+                                {"args", enum_params(options.inputs.size(), "private_p")},
+                                {"axis", std::to_string(v.at("axis").to<int>())}});
+        return compile_hip_code_object(ctx, src, options);
+    }
+
+    compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
+    {
+        return compile_op(ctx, to_shapes(ins->inputs()), op.to_value());
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
@@ -0,0 +1,88 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "migraphx/instruction.hpp"
+#include "migraphx/instruction_ref.hpp"
+#include <migraphx/reduce_dims.hpp>
+#include <migraphx/gpu/compiler.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/compile_hip_code_object.hpp>
+#include <migraphx/gpu/compile_hip.hpp>
+#include <migraphx/gpu/compile_gen.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+static const char* const unpack_fp4_kernel = R"__migraphx__(
+#include <migraphx/kernels/unpack_fp4.hpp>
+#include <args.hpp>
+
+namespace migraphx {
+
+extern "C" {
+
+MIGRAPHX_GLOBAL void ${kernel}(${params}) 
+{
+    transform_args(make_tensors())(${args})([](auto... xs) {
+        unpack_fp4<${axis}>(xs...);
+    });
+}
+    
+}
+
+} // namespace migraphx
+
+)__migraphx__";
+
+struct unpack_fp4_compiler : compiler<unpack_fp4_compiler>
+{
+    std::vector<std::string> names() const { return {"unpack_fp4"}; }
+
+    operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
+    {
+        hip_compile_options options;
+        options.inputs         = inputs;
+        options.output         = inputs.back();
+        options.virtual_inputs = reduce_dims(normalize_permutation(options.inputs));
+        options.kernel_name    = "unpack_fp4_kernel";
+        options.set_launch_params(v, compute_global_for(ctx, inputs.front().elements()));
+
+        auto src =
+            interpolate_string(unpack_fp4_kernel,
+                               {{"kernel", options.kernel_name},
+                                {"params", enum_params(options.inputs.size(), "void * private_p")},
+                                {"args", enum_params(options.inputs.size(), "private_p")},
+                                {"axis", std::to_string(v.at("axis").to<int>())}});
+        return compile_hip_code_object(ctx, src, options);
+    }
+
+    compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
+    {
+        return compile_op(ctx, to_shapes(ins->inputs()), op.to_value());
+    }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
@@ -281,6 +281,7 @@ constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value, Co
 
     while(count > 0)
     {
+        // NOLINTNEXTLINE(readability-qualified-auto)
         auto it   = first;
         auto step = count / 2;
         it += step;
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,7 @@ template <class IntType,`
`46`	`46`	`MIGRAPHX_REQUIRES(std::is_integral<IntType>{} and std::is_unsigned<IntType>{})>`
`47`	`47`	`constexpr byte operator<<(byte b, IntType shift) noexcept`
`48`	`48`	`{`
`49`		`- return static_cast<byte>(static_cast<unsigned char>(b) << shift);`
	`49`	`+ return static_cast<byte>(static_cast<uint8_t>(b) << shift);`
`50`	`50`	`};`
`51`	`51`
`52`	`52`	`template <class IntType,`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`/*`
`2`	`2`	`* The MIT License (MIT)`
`3`	`3`	`*`
`4`		`- * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.`
	`4`	`+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.`
`5`	`5`	`*`
`6`	`6`	`* Permission is hereby granted, free of charge, to any person obtaining a copy`
`7`	`7`	`* of this software and associated documentation files (the "Software"), to deal`
`@@ -31,6 +31,7 @@`
`31`	`31`	`namespace migraphx {`
`32`	`32`	`inline namespace MIGRAPHX_INLINE_NS {`
`33`	`33`
	`34`	`+/// Collapse adjacent shape dimensions that are the same between shapes.`
`34`	`35`	`MIGRAPHX_EXPORT std::vector<shape> reduce_dims(const std::vector<shape>& shapes);`
`35`	`36`
`36`	`37`	`} // namespace MIGRAPHX_INLINE_NS`
Original file line number	Diff line number	Diff line change
`@@ -270,7 +270,7 @@ std::string shape::cpp_type(shape::type_t t)`
`270`	`270`	`switch(t)`
`271`	`271`	`{`
`272`	`272`	`case tuple_type: MIGRAPHX_THROW("No C++ type for tuple");`
`273`		`- case fp4x2_type: MIGRAPHX_THROW("No C++ type for fp4x2_type");`
	`273`	`+ case fp4x2_type: return "uint8_t";`
`274`	`274`	`#define MIGRAPHX_SHAPE_GENERATE_CPP_TYPE_CASE(x, t) \`
`275`	`275`	`case x: return #t;`
`276`	`276`	`MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_CPP_TYPE_CASE)`
Original file line number	Diff line number	Diff line change
`@@ -281,6 +281,7 @@ constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value, Co`
`281`	`281`
`282`	`282`	`while(count > 0)`
`283`	`283`	`{`
	`284`	`+ // NOLINTNEXTLINE(readability-qualified-auto)`
`284`	`285`	`auto it = first;`
`285`	`286`	`auto step = count / 2;`
`286`	`287`	`it += step;`