ROCm
diff --git a/‎Jenkinsfile‎
Lines changed: 1 addition & 1 deletion b/‎Jenkinsfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎src/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/api/api.cpp‎
Lines changed: 2 additions & 0 deletions b/‎src/api/api.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/api/include/migraphx/migraphx.h‎
Lines changed: 1 addition & 0 deletions b/‎src/api/include/migraphx/migraphx.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/include/migraphx/bit_cast.hpp‎
Lines changed: 18 additions & 15 deletions b/‎src/include/migraphx/bit_cast.hpp‎
Lines changed: 18 additions & 15 deletions
diff --git a/‎src/include/migraphx/fp4_casts.hpp‎
Lines changed: 80 additions & 0 deletions b/‎src/include/migraphx/fp4_casts.hpp‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎src/include/migraphx/iota_iterator.hpp‎
Lines changed: 36 additions & 97 deletions b/‎src/include/migraphx/iota_iterator.hpp‎
Lines changed: 36 additions & 97 deletions
@@ -138,7 +138,7 @@ node("(rocmtest || migraphx)") {
             checkout scm
             def calculateImageTagScript = """
                 shopt -s globstar
-                sha256sum **/Dockerfile **/*requirements.txt **/install_prereqs.sh **/rbuild.ini | sha256sum | cut -d " " -f 1
+                sha256sum **/Dockerfile **/*requirements.txt **/install_prereqs.sh **/rbuild.ini **/test/onnx/.onnxrt-commit | sha256sum | cut -d " " -f 1
             """
             env.IMAGE_TAG = sh(script: "bash -c '${calculateImageTagScript}'", returnStdout: true).trim()
             imageExists = sh(script: "docker manifest inspect ${DOCKER_IMAGE}:${IMAGE_TAG}", returnStatus: true) == 0
 
@@ -28,4 +28,4 @@ pybind/pybind11@3e9dfa2866941655c56877882565e7577de6fc7b --build
 msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off -DCMAKE_POLICY_VERSION_MINIMUM=3.5
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 ROCm/composable_kernel@b7775add2d28251674d81e220cd4a857b90b997a -DCK_BUILD_JIT_LIB=On -DCMAKE_POSITION_INDEPENDENT_CODE=On
-ROCm/rocMLIR@59c57483d3bf9b738c586580dbb793b05aa41c3e -DBUILD_FAT_LIBROCKCOMPILER=On -DLLVM_INCLUDE_TESTS=Off
+ROCm/rocMLIR@ff95d9286f1c273c1adc7a04db248a018be15550 -DBUILD_FAT_LIBROCKCOMPILER=On -DLLVM_INCLUDE_TESTS=Off
@@ -230,6 +230,7 @@ register_migraphx_ops(
     nonzero
     onehot
     outline
+    pack_fp4
     pack_int4
     pad
     pointwise
@@ -294,6 +295,7 @@ register_migraphx_ops(
     undefined
     unique
     unknown
+    unpack_fp4
     unpack_int4
     unsqueeze
     where
 
@@ -98,6 +98,7 @@ static shape::type_t to_shape_type(migraphx_shape_datatype_t t)
     switch(t)
     {
     case migraphx_shape_tuple_type: return shape::tuple_type;
+    case migraphx_shape_fp4x2_type: return shape::fp4x2_type;
 #define MIGRAPHX_DETAIL_SHAPE_CASE_CONVERT(x, y) \
     case migraphx_shape_##x: return shape::x;
         MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_DETAIL_SHAPE_CASE_CONVERT)
@@ -111,6 +112,7 @@ static migraphx_shape_datatype_t to_shape_type(shape::type_t t)
     switch(t)
     {
     case shape::tuple_type: return migraphx_shape_tuple_type;
+    case shape::fp4x2_type: return migraphx_shape_fp4x2_type;
 #define MIGRAPHX_DETAIL_SHAPE_CASE_CONVERT(x, y) \
     case shape::x: return migraphx_shape_##x;
         MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_DETAIL_SHAPE_CASE_CONVERT)
 
@@ -71,6 +71,7 @@ typedef enum
 typedef enum
 {
     migraphx_shape_tuple_type,
+    migraphx_shape_fp4x2_type,
     MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES)
 } migraphx_shape_datatype_t;
 #undef MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES
 
@@ -1,24 +1,26 @@
-/* ************************************************************************
- * Copyright (C) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
- * ies of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
- * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
- * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
- * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
  *
- * ************************************************************************ */
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
 #ifndef MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP
 #define MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP
 #include <type_traits>
@@ -42,6 +44,7 @@ template <typename To,
                             std::is_trivially_copyable<From>{})>
 constexpr To bit_cast(From fr) noexcept
 {
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
     static_assert(sizeof(To) == sizeof(From));
 #if defined(__GNUC__) and !defined(__clang__)
     return MIGRAPHX_CONST_FOLD(*reinterpret_cast<To*>(&fr));
 
@@ -0,0 +1,80 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_FLOAT4_CASTS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_FLOAT4_CASTS_HPP
+
+#include <cstdint>
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <iterator>
+#include <migraphx/errors.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+namespace fp4_detail {
+static constexpr std::array<float, 16> fp4_lut = {
+    0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0, -0.0, -0.5, -1.0, -1.5, -2.0, -3.0, -4.0, -6.0};
+
+// pair is {fp4_tie_value, round_to_zero}
+// if round_to_zero round tie towards zero, else round tie away from zero
+static constexpr std::array<std::pair<float, uint8_t>, 7> fp4_even_round = {
+    {{0.25, 1}, {0.75, 0}, {1.25, 1}, {1.75, 0}, {2.5, 1}, {3.5, 0}, {5, 1}}};
+} // namespace fp4_detail
+
+// converts 4 LSB to float
+constexpr float fp4_to_float(uint8_t x)
+{
+    return fp4_detail::fp4_lut[x % fp4_detail::fp4_lut.size()];
+}
+
+// rounding mode = roundToNearestRoundTiesToEven
+// Reference quantization code from Microsoft:
+// https://github.com/microsoft/microxcaling/blob/main/mx/elemwise_ops.py#L82
+// Not constexpr because std::signbit is not constexpr until C++23
+inline uint8_t float_to_fp4(float f_x)
+{
+    using fp4_detail::fp4_even_round;
+    using fp4_detail::fp4_lut;
+    if(std::isnan(f_x))
+    {
+        return 0;
+    }
+    bool sign        = std::signbit(f_x);
+    uint8_t sign_add = sign ? fp4_lut.size() / 2 : 0u;
+    float abs_f      = std::abs(f_x);
+    // index value is the positive fp4 value
+    uint8_t i = std::upper_bound(fp4_even_round.begin(),
+                                 fp4_even_round.end(),
+                                 std::make_pair(abs_f, uint8_t{0})) -
+                fp4_even_round.begin();
+
+    return i + sign_add;
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -26,134 +26,73 @@
 
 #include <migraphx/config.hpp>
 #include <migraphx/functional.hpp>
+#include <migraphx/iterator.hpp>
 #include <iterator>
-#include <type_traits>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 
 template <class F, class Iterator = std::ptrdiff_t>
-struct basic_iota_iterator
+struct basic_iota_iterator : iterator_operators<basic_iota_iterator<F, Iterator>>,
+                             iterator_types<decltype(std::declval<F>()(std::declval<Iterator>())),
+                                            std::random_access_iterator_tag>
 {
     Iterator index;
     F f;
 
-    using difference_type   = std::ptrdiff_t;
-    using reference         = decltype(f(std::declval<Iterator>()));
-    using value_type        = typename std::remove_reference<reference>::type;
-    using pointer           = typename std::add_pointer<value_type>::type;
-    using iterator_category = std::random_access_iterator_tag;
+    using reference = decltype(std::declval<F>()(std::declval<Iterator>()));
 
-    basic_iota_iterator& operator+=(int n)
+    // cppcheck-suppress uninitMemberVar
+    constexpr basic_iota_iterator() = default;
+
+    template <class... Ts>
+    constexpr basic_iota_iterator(Iterator i, Ts&&... xs) : index(i), f{std::forward<Ts>(xs)...}
+    {
+    }
+
+    constexpr basic_iota_iterator::reference operator*() const { return f(index); }
+
+    template <class U>
+    static constexpr auto increment(U& x) -> decltype(++x.index)
     {
-        index += n;
-        return *this;
+        return ++x.index;
     }
 
-    basic_iota_iterator& operator-=(int n)
+    template <class U>
+    static constexpr auto decrement(U& x) -> decltype(--x.index)
     {
-        index -= n;
-        return *this;
+        return --x.index;
     }
 
-    basic_iota_iterator& operator++()
+    template <class U, class I>
+    static constexpr auto advance(U& x, I n) -> decltype(x.index += n)
     {
-        index++;
-        return *this;
+        return x.index += n;
     }
 
-    basic_iota_iterator& operator--()
+    template <class U, class V>
+    static constexpr auto distance(const U& x, const V& y) -> decltype(y.index - x.index)
     {
-        index--;
-        return *this;
+        return y.index - x.index;
     }
 
-    basic_iota_iterator operator++(int) // NOLINT
+    template <class U, class V>
+    static constexpr auto equal(const U& x, const V& y) -> decltype(x.index == y.index)
     {
-        basic_iota_iterator it = *this;
-        index++;
-        return it;
+        return x.index == y.index;
     }
 
-    basic_iota_iterator operator--(int) // NOLINT
+    template <class Stream>
+    friend Stream& operator<<(Stream& s, const basic_iota_iterator& x)
     {
-        basic_iota_iterator it = *this;
-        index--;
-        return it;
+        return s << x.index;
     }
-    reference operator*() const { return f(index); }
-    pointer operator->() const { return &f(index); }
-    reference operator[](int n) const { return f(index + n); }
 };
 
 template <class T, class F>
-inline basic_iota_iterator<F, T> make_basic_iota_iterator(T x, F f)
-{
-    return basic_iota_iterator<F, T>{x, f};
-}
-
-template <class F, class Iterator>
-inline basic_iota_iterator<F, Iterator> operator+(basic_iota_iterator<F, Iterator> x,
-                                                  std::ptrdiff_t y)
-{
-    return x += y;
-}
-
-template <class F, class Iterator>
-inline basic_iota_iterator<F, Iterator> operator+(std::ptrdiff_t x,
-                                                  basic_iota_iterator<F, Iterator> y)
-{
-    return y + x;
-}
-
-template <class F, class Iterator>
-inline std::ptrdiff_t operator-(basic_iota_iterator<F, Iterator> x,
-                                basic_iota_iterator<F, Iterator> y)
-{
-    return x.index - y.index;
-}
-
-template <class F, class Iterator>
-inline basic_iota_iterator<F, Iterator> operator-(basic_iota_iterator<F, Iterator> x,
-                                                  std::ptrdiff_t y)
-{
-    return x -= y;
-}
-
-template <class F, class Iterator>
-inline bool operator==(basic_iota_iterator<F, Iterator> x, basic_iota_iterator<F, Iterator> y)
-{
-    return x.index == y.index;
-}
-
-template <class F, class Iterator>
-inline bool operator!=(basic_iota_iterator<F, Iterator> x, basic_iota_iterator<F, Iterator> y)
-{
-    return x.index != y.index;
-}
-
-template <class F, class Iterator>
-inline bool operator<(basic_iota_iterator<F, Iterator> x, basic_iota_iterator<F, Iterator> y)
-{
-    return x.index < y.index;
-}
-
-template <class F, class Iterator>
-inline bool operator>(basic_iota_iterator<F, Iterator> x, basic_iota_iterator<F, Iterator> y)
-{
-    return x.index > y.index;
-}
-
-template <class F, class Iterator>
-inline bool operator>=(basic_iota_iterator<F, Iterator> x, basic_iota_iterator<F, Iterator> y)
-{
-    return x.index >= y.index;
-}
-
-template <class F, class Iterator>
-inline bool operator<=(basic_iota_iterator<F, Iterator> x, basic_iota_iterator<F, Iterator> y)
+basic_iota_iterator<F, T> make_basic_iota_iterator(T x, F f)
 {
-    return x.index <= y.index;
+    return {x, f};
 }
 
 using iota_iterator = basic_iota_iterator<id>;
Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@ typedef enum`
`71`	`71`	`typedef enum`
`72`	`72`	`{`
`73`	`73`	`migraphx_shape_tuple_type,`
	`74`	`+ migraphx_shape_fp4x2_type,`
`74`	`75`	`MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES)`
`75`	`76`	`} migraphx_shape_datatype_t;`
`76`	`77`	`#undef MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES`