Formatting

ethansaurusrex · ethansaurusrex · commit 881774f8d67e · 2025-08-06T12:53:49.000Z
diff --git a/src/driver/main.cpp b/src/driver/main.cpp
@@ -586,10 +586,10 @@ struct compiler
            {"--exhaustive-tune"},
            ap.help("Exhastively search for best tuning parameters for kernels"),
            ap.set_value(true));
-	ap(co.portable,
-	   {"--portable"},
-	   ap.help("PROTOTYPE: Create portable kernels that get finalized in ./driver run"),
-	   ap.set_value(true));
+        ap(co.portable,
+           {"--portable"},
+           ap.help("PROTOTYPE: Create portable kernels that get finalized in ./driver run"),
+           ap.set_value(true));
         ap(to_fp16, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(true));
         ap(to_bf16, {"--bf16"}, ap.help("Quantize for bf16"), ap.set_value(true));
         ap(to_int8, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(true));
@@ -607,21 +607,21 @@ struct compiler
         return parameters.generate(p, ct.get_target(), true, l.batch);
     }
 
-    bool has_portable_ops(program& p) 
+    bool has_portable_ops(program& p)
     {
         auto mods = p.get_modules();
-        for(const auto* mod: mods)
+        for(const auto* mod : mods)
         {
-	        for(const auto& ins : *mod)
-	        {                
-	            if(ins.name() == "gpu::code_object") 
+            for(const auto& ins : *mod)
+            {
+                if(ins.name() == "gpu::code_object")
                 {
-                    migraphx::gpu::code_object_op migx_co = migraphx::any_cast<migraphx::gpu::code_object_op>(ins.get_operator());
+                    migraphx::gpu::code_object_op migx_co =
+                        migraphx::any_cast<migraphx::gpu::code_object_op>(ins.get_operator());
                     if(migx_co.is_mlir())
                         return true;
                 }
-            
-	        }
+            }
         }
         return false;
     }
@@ -637,15 +637,16 @@ struct compiler
             std::cout << "Already compiled\n";
 
             bool has_port_ops = has_portable_ops(p);
-	        if(has_port_ops) // means we must finalize it
-	        {
-                auto ctx = ct.get_target().get_context();
+            if(has_port_ops) // means we must finalize it
+            {
+                auto ctx      = ct.get_target().get_context();
                 auto& gpu_ctx = any_cast<migraphx::gpu::context>(ctx);
-		        migraphx::run_passes(*p.get_main_module(), {migraphx::gpu::compile_bytecode{&gpu_ctx}});
+                migraphx::run_passes(*p.get_main_module(),
+                                     {migraphx::gpu::compile_bytecode{&gpu_ctx}});
                 p.finalize();
                 l.save(p);
-	        }  
-            
+            }
+
             if(ct.target_name == "gpu")
             {
                 if(is_offload_copy_set(p) and not co.offload_copy)
@@ -666,9 +667,10 @@ struct compiler
                 }
             }
 
-            if(!has_port_ops) 
+            if(!has_port_ops)
             {
-                std::cout << "The program is already compiled, skipping compilation ..." << std::endl;
+                std::cout << "The program is already compiled, skipping compilation ..."
+                          << std::endl;
             }
             if(to_fp16 or to_bf16 or to_int8 or to_fp8 or to_int4)
             {
@@ -886,8 +888,8 @@ struct roctx : command<roctx>
 
     void run()
     {
-        auto p = c.compile();
-        auto m = c.params(p);
+        auto p   = c.compile();
+        auto m   = c.params(p);
         auto rtx = create_marker_roctx();
         p.mark(m, std::move(rtx));
     }
diff --git a/src/targets/gpu/code_object_op.cpp b/src/targets/gpu/code_object_op.cpp
@@ -60,7 +60,7 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
     return args[get_output_arg(args.size())];
 }
 void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
-{    
+{
     assert(not code_object.empty());
     if(this->format == code_object_format::binary)
     {
diff --git a/src/targets/gpu/compile_bytecode.cpp b/src/targets/gpu/compile_bytecode.cpp
@@ -44,15 +44,15 @@ namespace gpu {
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_BENCHMARKING);
 
-
 // For the most part this is just a modified compile_ops file, changed to deal with MLIR bytecode
-// important thing here to note is that we do not save the mlir module anywhere, causing us to have to reread
-// each bytecode sequence over and over again, this is extremely inefficient. We would need to export mlir.cpp 
-// structs/APIs to expose the mlir objects, then we would need to read each bytecode sequence once and when we
-// need to compile, affix tuning params, then run, we can just clone the module via: 
+// important thing here to note is that we do not save the mlir module anywhere, causing us to have
+// to reread each bytecode sequence over and over again, this is extremely inefficient. We would
+// need to export mlir.cpp structs/APIs to expose the mlir objects, then we would need to read each
+// bytecode sequence once and when we need to compile, affix tuning params, then run, we can just
+// clone the module via:
 //      mlir_module new_module = original_module.clone()
-// this might be useful for the general pipeline as well since we won't have to rerun the pipeline from
-// start to finish, instead we can start from right before affixing parameters and arch info.
+// this might be useful for the general pipeline as well since we won't have to rerun the pipeline
+// from start to finish, instead we can start from right before affixing parameters and arch info.
 
 struct bc_compiled_result
 {
@@ -72,20 +72,18 @@ struct bc_compile_plan
     operation preop;
     instruction_ref ins;
     module_ref mod;
-    optional<tuning_config> config                 = nullopt;
+    optional<tuning_config> config                    = nullopt;
     std::vector<optional<bc_compiled_result>> results = {};
-    void update_config(bool exhaustive)
-    {
-        config = get_tuning_config_mlir(*ctx, ins, exhaustive);
-    }
+    void update_config(bool exhaustive) { config = get_tuning_config_mlir(*ctx, ins, exhaustive); }
     template <class Vector>
     void insert_compiles(Vector& compiles, const value& solution, std::size_t i)
     {
         compiles.emplace_back([=] {
             try
             {
                 /* maybe change what compiled_result is, we dont want to substitute */
-                results[i] = bc_compiled_result{compile_mlir(*ctx, ins, any_cast<code_object_op>(preop), solution), ins};
+                results[i] = bc_compiled_result{
+                    compile_mlir(*ctx, ins, any_cast<code_object_op>(preop), solution), ins};
             }
             catch(const std::exception& e)
             {
@@ -191,7 +189,7 @@ struct bc_compile_plan
             return *results.front();
         }
         if(not config)
-            MIGRAPHX_THROW("Multiple kernels without config for " + preop.name());        
+            MIGRAPHX_THROW("Multiple kernels without config for " + preop.name());
         if(trace_level > 1)
             std::cout << "Problem: " << config->problem << std::endl;
 
@@ -212,7 +210,7 @@ struct bc_compile_plan
                            }
                            if(trace_level > 2)
                                std::cout << *cr << std::endl;
-                           
+
                            /*
                            create a small program with insturction being compiled and call "replace"
                            on that which would insert all the compiled code objects, prefills etc.
@@ -235,7 +233,8 @@ struct bc_compile_plan
                            run_passes(*bench_mm, {dead_code_elimination{}});
                            // by default, measure runtime with bundle of 1 benchmark config,
                            // repeat 20 times
-                           auto t = time_program(*ctx, bench_prog, std::unordered_map<std::string, double>{}, 1, 20);
+                           auto t = time_program(
+                               *ctx, bench_prog, std::unordered_map<std::string, double>{}, 1, 20);
                            if(trace_level > 1)
                                std::cout << t << "ms" << std::endl;
                            return t;
@@ -325,7 +324,7 @@ void compile_bytecode::apply(module& m) const
     {
         if(ins->name() != "gpu::code_object")
             continue;
-        
+
         operation preop = any_cast<code_object_op>(ins->get_operator());
 
         if(any_cast<code_object_op>(preop).format == code_object_format::binary)
@@ -341,5 +340,5 @@ void compile_bytecode::apply(module& m) const
 }
 
 } // namespace gpu
-} // namespace migraphx
 } // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
diff --git a/src/targets/gpu/compile_hip_code_object.cpp b/src/targets/gpu/compile_hip_code_object.cpp
@@ -203,9 +203,8 @@ compile_hip_code_object(context& ctx, const std::string& content, hip_compile_op
     options.params.insert(options.params.end(), warnings.begin(), warnings.end());
     options.emplace_param("-ftemplate-backtrace-limit=0");
     options.emplace_param("-Werror");
-    auto cos = ctx.get_portable_flag()
-                   ? compile_hip_src(srcs, options.params, "amdgcnspirv")
-                   : compile_hip_src(srcs, options.params, get_device_name());
+    auto cos = ctx.get_portable_flag() ? compile_hip_src(srcs, options.params, "amdgcnspirv")
+                                       : compile_hip_src(srcs, options.params, get_device_name());
     if(cos.size() != 1)
         MIGRAPHX_THROW("No code object");
     return code_object_op{value::binary{cos.front()},
diff --git a/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp b/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
@@ -34,7 +34,7 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 
-enum struct code_object_format 
+enum struct code_object_format
 {
     binary,
     mlir_bytecode
@@ -54,14 +54,16 @@ struct code_object_op
     kernel k{};
     code_object_format format = code_object_format::binary;
     // maybe add instruction_ref ins;
-    /* this allows us to simply convert the mlirbc back to mlir, then run the remaining passes 
+    /* this allows us to simply convert the mlirbc back to mlir, then run the remaining passes
        after we can just compile it using compile_ops, might need some small adjustments
        mainly: how are we to start at the "end" of the pipeline at the kernel_pass?
 
        try:
-       MIGRAPHX_TRACE_COMPILE=1 MIGRAPHX_TRACE_MLIR=1 MIGRAPHX_MLIR_TUNE_LIMIT=1 ./bin/driver compile ../spirv/MXRs/gemm.mxr
+       MIGRAPHX_TRACE_COMPILE=1 MIGRAPHX_TRACE_MLIR=1 MIGRAPHX_MLIR_TUNE_LIMIT=1 ./bin/driver
+       compile ../spirv/MXRs/gemm.mxr
 
-       MIGRAPHX_TRACE_CMD_EXECUTE=1 MIGRAPHX_TRACE_COMPILE=1 MIGRAPHX_TRACE_MLIR=1 MIGRAPHX_MLIR_TUNE_LIMIT=1 ./bin/driver compile --portable ../spirv/MXRs/pointwise.mxr
+       MIGRAPHX_TRACE_CMD_EXECUTE=1 MIGRAPHX_TRACE_COMPILE=1 MIGRAPHX_TRACE_MLIR=1
+       MIGRAPHX_MLIR_TUNE_LIMIT=1 ./bin/driver compile --portable ../spirv/MXRs/pointwise.mxr
      */
 
     template <class Self, class F>
@@ -94,7 +96,7 @@ struct code_object_op
     {
         return get_output_arg(shapes.size());
     }
-    bool is_mlir() const {return format == code_object_format::mlir_bytecode; }
+    bool is_mlir() const { return format == code_object_format::mlir_bytecode; }
 
     friend std::ostream& operator<<(std::ostream& os, const code_object_op& op)
     {
@@ -105,7 +107,7 @@ struct code_object_op
         os << "local=" << op.local << ",";
         if(op.output_arg != -1)
             os << "output_arg=" << op.output_arg << ",";
-        os << "format=" << (op.format == code_object_format::binary ? "binary" : "mlir_bytecode");          
+        os << "format=" << (op.format == code_object_format::binary ? "binary" : "mlir_bytecode");
         os << "]";
         return os;
     }
diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -270,7 +270,7 @@ struct context
     void set_exhaustive_tune_flag(bool t) { exhaustive_tune = t; }
 
     bool get_portable_flag() const { return portable; }
-    
+
     void set_portable_flag(bool p) { portable = p; }
 
     hip_device::stream& get_stream() { return get_current_device().get_stream(); }
@@ -386,7 +386,7 @@ struct context
     std::shared_ptr<hip_device> current_device;
     std::vector<shared<hip_event_ptr>> events;
     bool exhaustive_tune = false;
-    bool portable = false;
+    bool portable        = false;
     bool measure_perf    = false;
     // for event perf timing
     shared<hip_event_ptr> start_event = nullptr;
diff --git a/src/targets/gpu/include/migraphx/gpu/mlir.hpp b/src/targets/gpu/include/migraphx/gpu/mlir.hpp
@@ -64,7 +64,7 @@ MIGRAPHX_GPU_EXPORT mlir_code_object compile_mlir(const context& migraphx_ctx,
 MIGRAPHX_GPU_EXPORT mlir_code_object compile_mlir(const context& migraphx_ctx,
                                                   instruction_ref ins,
                                                   code_object_op co,
-                                                  const value& solution);                                                 
+                                                  const value& solution);
 
 MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
                                                 instruction_ref ins,
@@ -74,11 +74,11 @@ MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
 MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(const context& migraphx_ctx,
                                                          module m,
                                                          const std::vector<shape>& inputs,
-                                                         bool exhaustive);    
-                                                         
+                                                         bool exhaustive);
+
 MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(const context& migraphx_ctx,
-                                                        instruction_ref ins,
-                                                        bool exhaustive);                                                     
+                                                         instruction_ref ins,
+                                                         bool exhaustive);
 
 MIGRAPHX_GPU_EXPORT void
 dump_mlir_to_mxr(module m, const std::vector<instruction_ref>& inputs, const fs::path& location);
diff --git a/src/targets/gpu/jit/mlir.cpp b/src/targets/gpu/jit/mlir.cpp
@@ -146,7 +146,7 @@ struct mlir_compiler : compiler<mlir_compiler>
         if(gemm_like_ins != smod->end() and pointwise_ins != smod->end() and
            not is_module_fusible(*smod, ctx, solution))
         {
-	    std::cout << "Compiling fused gemm w/perfConf\n";
+            std::cout << "Compiling fused gemm w/perfConf\n";
             auto input_args = ins->inputs();
             // remove alloc buffer
             input_args.pop_back();
diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp
diff --git a/tools/format.py b/tools/format.py

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&`
`60`	`60`	`return args[get_output_arg(args.size())];`
`61`	`61`	`}`
`62`	`62`	`void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)`
`63`		`-{`
	`63`	`+{`
`64`	`64`	`assert(not code_object.empty());`
`65`	`65`	`if(this->format == code_object_format::binary)`
`66`	`66`	`{`
Original file line number	Diff line number	Diff line change
`@@ -146,7 +146,7 @@ struct mlir_compiler : compiler<mlir_compiler>`
`146`	`146`	`if(gemm_like_ins != smod->end() and pointwise_ins != smod->end() and`
`147`	`147`	`not is_module_fusible(*smod, ctx, solution))`
`148`	`148`	`{`
`149`		`- std::cout << "Compiling fused gemm w/perfConf\n";`
	`149`	`+ std::cout << "Compiling fused gemm w/perfConf\n";`
`150`	`150`	`auto input_args = ins->inputs();`
`151`	`151`	`// remove alloc buffer`
`152`	`152`	`input_args.pop_back();`