ROCm · ethansaurusrex · Jul 29, 2025 · Jul 30, 2025 · Jul 31, 2025 · Jul 31, 2025
@@ -58,6 +58,11 @@
 #include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/register_target.hpp>
 
+#include <migraphx/gpu/code_object_op.hpp>
+#include <migraphx/gpu/compile_bytecode.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/gpu/context.hpp>
+
 #include <migraphx/netron_output.hpp>
 
 #include <fstream>
@@ -581,6 +586,10 @@
            {"--exhaustive-tune"},
            ap.help("Exhastively search for best tuning parameters for kernels"),
            ap.set_value(true));
+        ap(co.portable,
+           {"--portable"},
+           ap.help("PROTOTYPE: Create portable kernels that get finalized in ./driver run"),
+           ap.set_value(true));
         ap(to_fp16, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(true));
         ap(to_bf16, {"--bf16"}, ap.help("Quantize for bf16"), ap.set_value(true));
         ap(to_int8, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(true));
@@ -598,13 +607,46 @@
         return parameters.generate(p, ct.get_target(), true, l.batch);
     }
 
+    bool has_portable_ops(program& p)
+    {
+        auto mods = p.get_modules();
+        for(const auto* mod : mods)
+        {
+            for(const auto& ins : *mod)
+            {
+                if(ins.name() == "gpu::code_object")
+                {
+                    migraphx::gpu::code_object_op migx_co =
+                        migraphx::any_cast<migraphx::gpu::code_object_op>(ins.get_operator());
+                    if(migx_co.is_mlir())
+                        return true;
+                }
+            }
+        }
+        return false;
+    }
+
     program compile()
     {
+        std::cout << "Loading file\n";
         auto p = l.load();
         // Dont compile if its already been compiled
 
         if(p.is_compiled())
         {
+            std::cout << "Already compiled\n";
+
+            bool has_port_ops = has_portable_ops(p);
+            if(has_port_ops) // means we must finalize it
+            {
+                auto ctx      = ct.get_target().get_context();
+                auto& gpu_ctx = any_cast<migraphx::gpu::context>(ctx);
+                migraphx::run_passes(*p.get_main_module(),
+                                     {migraphx::gpu::compile_bytecode{&gpu_ctx}});
+                p.finalize();
+                l.save(p);
+            }
+
             if(ct.target_name == "gpu")
             {
                 if(is_offload_copy_set(p) and not co.offload_copy)
@@ -625,7 +667,11 @@
                 }
             }
 
-            std::cout << "The program is already compiled, skipping compilation ..." << std::endl;
+            if(!has_port_ops)
+            {
+                std::cout << "The program is already compiled, skipping compilation ..."
+                          << std::endl;
+            }
             if(to_fp16 or to_bf16 or to_int8 or to_fp8 or to_int4)
             {
                 std::cerr
@@ -830,9 +876,7 @@
     void run()
     {
         auto p = c.compile();
-        std::cout << "Allocating params ... " << std::endl;
         auto m = c.params(p);
-        std::cout << "Running performance report ... " << std::endl;
         p.perf_report(std::cout, n, m, c.l.batch, detailed);
     }
 };
@@ -844,10 +888,8 @@
 
     void run()
     {
-        auto p = c.compile();
-        std::cout << "Allocating params ... " << std::endl;
-        auto m = c.params(p);
-        std::cout << "rocTX:\tLoading rocTX library..." << std::endl;
+        auto p   = c.compile();
+        auto m   = c.params(p);
         auto rtx = create_marker_roctx();
         p.mark(m, std::move(rtx));
     }

@@ -41,6 +41,8 @@ struct compile_options
     bool fast_math       = true;
     bool exhaustive_tune = false;
 
+    bool portable = false;
+
     tracer trace{};
 };
 

@@ -144,6 +144,7 @@ add_library(migraphx_gpu
     argmax.cpp
     argmin.cpp
     code_object_op.cpp
+    compile_bytecode.cpp
     compile_ops.cpp
     compile_gen.cpp
     compile_hip.cpp

@@ -48,18 +48,24 @@ shape code_object_op::compute_shape(std::vector<shape> inputs) const
 argument
 code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& args) const
 {
-    auto fargs = flatten(args);
-    std::vector<void*> kargs(fargs.size());
-    std::transform(
-        fargs.begin(), fargs.end(), kargs.begin(), [](const argument& a) { return a.data(); });
-    auto [start, stop] = ctx.get_perf_events();
-    k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
+    if(this->format == code_object_format::binary)
+    {
+        auto fargs = flatten(args);
+        std::vector<void*> kargs(fargs.size());
+        std::transform(
+            fargs.begin(), fargs.end(), kargs.begin(), [](const argument& a) { return a.data(); });
+        auto [start, stop] = ctx.get_perf_events();
+        k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
+    }
     return args[get_output_arg(args.size())];
 }
 void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
 {
     assert(not code_object.empty());
-    k = kernel(code_object, symbol_name);
+    if(this->format == code_object_format::binary)
+    {
+        k = kernel(code_object, symbol_name);
+    }
 }
 
 } // namespace gpu