Fix conversion issue in layernorm fusion (#1483) (#1493)

causten · web-flow · commit 1b7af5455e50 · 2022-12-16T09:57:57.000-05:00
diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
     std::string name() const { return "hip::copy_to_gpu"; }
     shape compute_shape(std::vector<shape> inputs) const
     {
-        check_shapes{inputs, *this}.has(1, 2);
+        check_shapes{inputs, *this}.has(1, 2).same_type();
         return inputs.at(0);
     }
     argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
     std::string name() const { return "hip::copy_from_gpu"; }
     shape compute_shape(std::vector<shape> inputs) const
     {
-        check_shapes{inputs, *this}.has(1, 2);
+        check_shapes{inputs, *this}.has(1, 2).same_type();
         return inputs.at(0);
     }
     argument
@@ -159,7 +159,7 @@ struct hip_copy
     std::string name() const { return "hip::copy"; }
     shape compute_shape(std::vector<shape> inputs) const
     {
-        check_shapes{inputs, *this}.has(2);
+        check_shapes{inputs, *this}.has(2).same_type();
         return inputs.at(1);
     }
     argument compute(context& ctx, const shape&, std::vector<argument> args) const
diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
@@ -25,6 +25,7 @@
 #define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
 #include <migraphx/kernels/reduce.hpp>
 #include <migraphx/kernels/ops.hpp>
+#include <migraphx/kernels/vec.hpp>
 #include <migraphx/kernels/print.hpp>
 
 namespace migraphx {
diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
@@ -33,38 +33,6 @@
 
 namespace migraphx {
 
-template <class T>
-struct implicit_conversion_op
-{
-    T x;
-
-    template <index_int N, class U>
-    constexpr operator vec<U, N>() const
-    {
-        if constexpr(vec_size<T>() == 0)
-        {
-            return x;
-        }
-        else
-        {
-            static_assert(vec_size<T>() == N, "Vector mismatch size");
-            return __builtin_convertvector(x, vec<U, N>);
-        }
-    }
-
-    template <class U>
-    constexpr operator U() const
-    {
-        return x;
-    }
-};
-
-template <class T>
-constexpr implicit_conversion_op<T> implicit_conversion(T x)
-{
-    return {x};
-}
-
 template <class F, class T, class... Ts>
 __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
 {
diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
     }
 }
 
+template <class T>
+struct implicit_conversion_op
+{
+    T x;
+
+    template <index_int N, class U>
+    constexpr operator vec<U, N>() const
+    {
+        if constexpr(vec_size<T>() == 0)
+        {
+            return x;
+        }
+        else
+        {
+            static_assert(vec_size<T>() == N, "Vector mismatch size");
+            return __builtin_convertvector(x, vec<U, N>);
+        }
+    }
+
+    template <class U>
+    constexpr operator U() const
+    {
+        return x;
+    }
+};
+
+template <class T>
+constexpr implicit_conversion_op<T> implicit_conversion(T x)
+{
+    return {x};
+}
+
 } // namespace migraphx
 #endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
diff --git a/src/targets/gpu/prefuse_ops.cpp b/src/targets/gpu/prefuse_ops.cpp
@@ -51,17 +51,20 @@ struct layernorm_base
         }
         check_shapes{inputs, static_cast<const Derived&>(*this)}.has(nargs + N);
         auto s = inputs.at(0);
+        auto t = s.type();
+        if(not mods.empty())
+            t = mods.front()->get_output_shapes().front().type();
         if(s.scalar())
         {
             return s;
         }
         else if(s.broadcasted())
         {
-            return {s.type(), s.lens()};
+            return {t, s.lens()};
         }
         else
         {
-            return s.with_lens(s.lens());
+            return s.with_lens(t, s.lens());
         }
     }
 };

Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,7 @@ struct hip_copy_to_gpu`
`105`	`105`	`std::string name() const { return "hip::copy_to_gpu"; }`
`106`	`106`	`shape compute_shape(std::vector<shape> inputs) const`
`107`	`107`	`{`
`108`		`- check_shapes{inputs, *this}.has(1, 2);`
	`108`	`+ check_shapes{inputs, *this}.has(1, 2).same_type();`
`109`	`109`	`return inputs.at(0);`
`110`	`110`	`}`
`111`	`111`	`argument compute(context& ctx, const shape&, const std::vector<argument>& args) const`
`@@ -131,7 +131,7 @@ struct hip_copy_from_gpu`
`131`	`131`	`std::string name() const { return "hip::copy_from_gpu"; }`
`132`	`132`	`shape compute_shape(std::vector<shape> inputs) const`
`133`	`133`	`{`
`134`		`- check_shapes{inputs, *this}.has(1, 2);`
	`134`	`+ check_shapes{inputs, *this}.has(1, 2).same_type();`
`135`	`135`	`return inputs.at(0);`
`136`	`136`	`}`
`137`	`137`	`argument`
`@@ -159,7 +159,7 @@ struct hip_copy`
`159`	`159`	`std::string name() const { return "hip::copy"; }`
`160`	`160`	`shape compute_shape(std::vector<shape> inputs) const`
`161`	`161`	`{`
`162`		`- check_shapes{inputs, *this}.has(2);`
	`162`	`+ check_shapes{inputs, *this}.has(2).same_type();`
`163`	`163`	`return inputs.at(1);`
`164`	`164`	`}`
`165`	`165`	`argument compute(context& ctx, const shape&, std::vector<argument> args) const`
Original file line number	Diff line number	Diff line change
`@@ -51,17 +51,20 @@ struct layernorm_base`
`51`	`51`	`}`
`52`	`52`	`check_shapes{inputs, static_cast<const Derived&>(*this)}.has(nargs + N);`
`53`	`53`	`auto s = inputs.at(0);`
	`54`	`+ auto t = s.type();`
	`55`	`+ if(not mods.empty())`
	`56`	`+ t = mods.front()->get_output_shapes().front().type();`
`54`	`57`	`if(s.scalar())`
`55`	`58`	`{`
`56`	`59`	`return s;`
`57`	`60`	`}`
`58`	`61`	`else if(s.broadcasted())`
`59`	`62`	`{`
`60`		`- return {s.type(), s.lens()};`
	`63`	`+ return {t, s.lens()};`
`61`	`64`	`}`
`62`	`65`	`else`
`63`	`66`	`{`
`64`		`- return s.with_lens(s.lens());`
	`67`	`+ return s.with_lens(t, s.lens());`
`65`	`68`	`}`
`66`	`69`	`}`
`67`	`70`	`};`