@@ -44,15 +44,15 @@ namespace gpu {
4444MIGRAPHX_DECLARE_ENV_VAR (MIGRAPHX_GPU_COMPILE_PARALLEL);
4545MIGRAPHX_DECLARE_ENV_VAR (MIGRAPHX_TRACE_BENCHMARKING);
4646
47- 
4847//  For the most part this is just a modified compile_ops file, changed to deal with MLIR bytecode
49- //  important thing here to note is that we do not save the mlir module anywhere, causing us to have to reread
50- //  each bytecode sequence over and over again, this is extremely inefficient. We would need to export mlir.cpp 
51- //  structs/APIs to expose the mlir objects, then we would need to read each bytecode sequence once and when we
52- //  need to compile, affix tuning params, then run, we can just clone the module via: 
48+ //  important thing here to note is that we do not save the mlir module anywhere, causing us to have
49+ //  to reread each bytecode sequence over and over again, this is extremely inefficient. We would
50+ //  need to export mlir.cpp structs/APIs to expose the mlir objects, then we would need to read each
51+ //  bytecode sequence once and when we need to compile, affix tuning params, then run, we can just
52+ //  clone the module via:
5353//       mlir_module new_module = original_module.clone()
54- //  this might be useful for the general pipeline as well since we won't have to rerun the pipeline from 
55- //  start to finish, instead we can start from right before affixing parameters and arch info.
54+ //  this might be useful for the general pipeline as well since we won't have to rerun the pipeline
55+ //  from  start to finish, instead we can start from right before affixing parameters and arch info.
5656
5757struct  bc_compiled_result 
5858{
@@ -72,20 +72,18 @@ struct bc_compile_plan
7272    operation preop;
7373    instruction_ref ins;
7474    module_ref mod;
75-     optional<tuning_config> config                 = nullopt ;
75+     optional<tuning_config> config                     = nullopt ;
7676    std::vector<optional<bc_compiled_result>> results = {};
77-     void  update_config (bool  exhaustive)
78-     {
79-         config = get_tuning_config_mlir (*ctx, ins, exhaustive);
80-     }
77+     void  update_config (bool  exhaustive) { config = get_tuning_config_mlir (*ctx, ins, exhaustive); }
8178    template  <class  Vector >
8279    void  insert_compiles (Vector& compiles, const  value& solution, std::size_t  i)
8380    {
8481        compiles.emplace_back ([=] {
8582            try 
8683            {
8784                /*  maybe change what compiled_result is, we dont want to substitute */ 
88-                 results[i] = bc_compiled_result{compile_mlir (*ctx, ins, any_cast<code_object_op>(preop), solution), ins};
85+                 results[i] = bc_compiled_result{
86+                     compile_mlir (*ctx, ins, any_cast<code_object_op>(preop), solution), ins};
8987            }
9088            catch (const  std::exception& e)
9189            {
@@ -191,7 +189,7 @@ struct bc_compile_plan
191189            return  *results.front ();
192190        }
193191        if (not  config)
194-             MIGRAPHX_THROW (" Multiple kernels without config for " name ());         
192+             MIGRAPHX_THROW (" Multiple kernels without config for " name ());
195193        if (trace_level > 1 )
196194            std::cout << " Problem: " problem  << std::endl;
197195
@@ -212,7 +210,7 @@ struct bc_compile_plan
212210                           }
213211                           if (trace_level > 2 )
214212                               std::cout << *cr << std::endl;
215-                             
213+ 
216214                           /* 
217215                           create a small program with insturction being compiled and call "replace" 
218216                           on that which would insert all the compiled code objects, prefills etc. 
@@ -235,7 +233,8 @@ struct bc_compile_plan
235233                           run_passes (*bench_mm, {dead_code_elimination{}});
236234                           //  by default, measure runtime with bundle of 1 benchmark config,
237235                           //  repeat 20 times
238-                            auto  t = time_program (*ctx, bench_prog, std::unordered_map<std::string, double >{}, 1 , 20 );
236+                            auto  t = time_program (
237+                                *ctx, bench_prog, std::unordered_map<std::string, double >{}, 1 , 20 );
239238                           if (trace_level > 1 )
240239                               std::cout << t << " ms" 
241240                           return  t;
@@ -325,7 +324,7 @@ void compile_bytecode::apply(module& m) const
325324    {
326325        if (ins->name () != " gpu::code_object" 
327326            continue ;
328-          
327+ 
329328        operation preop = any_cast<code_object_op>(ins->get_operator ());
330329
331330        if (any_cast<code_object_op>(preop).format  == code_object_format::binary)
@@ -341,5 +340,5 @@ void compile_bytecode::apply(module& m) const
341340}
342341
343342} //  namespace gpu
344- } //  namespace migraphx
345343} //  namespace MIGRAPHX_INLINE_NS
344+ } //  namespace migraphx
0 commit comments