@@ -261,3 +261,121 @@ function run_and_collect(cmd)
261
261
262
262
return proc, log
263
263
end
264
+
265
+
266
+
267
+ # # opaque closures
268
+
269
+ # TODO : once stabilised, move bits of this into GPUCompiler.jl
270
+
271
+ using Core. Compiler: IRCode
272
+ using Core: CodeInfo, MethodInstance, CodeInstance, LineNumberNode
273
+
274
+ struct OpaqueClosure{F, E, A, R} # func, env, args, ret
275
+ env:: E
276
+ end
277
+
278
+ # XXX : because we can't call functions from other CUDA modules, we effectively need to
279
+ # recompile when the target function changes. this, and because of how GPUCompiler's
280
+ # deferred compilation mechanism currently works, is why we have `F` as a type param.
281
+
282
+ # XXX : because of GPU code requiring specialized signatures, we also need to recompile
283
+ # when the environment or argument types change. together with the above, this
284
+ # negates much of the benefit of opaque closures.
285
+
286
+ # TODO : support for constructing an opaque closure from source code
287
+
288
+ # TODO : complete support for passing an environment. this probably requires a split into
289
+ # host and device structures to, e.g., root a CuArray and pass a CuDeviceArray.
290
+
291
+ function compute_ir_rettype (ir:: IRCode )
292
+ rt = Union{}
293
+ for i = 1 : length (ir. stmts)
294
+ stmt = ir. stmts[i][:inst ]
295
+ if isa (stmt, Core. Compiler. ReturnNode) && isdefined (stmt, :val )
296
+ rt = Core. Compiler. tmerge (Core. Compiler. argextype (stmt. val, ir), rt)
297
+ end
298
+ end
299
+ return Core. Compiler. widenconst (rt)
300
+ end
301
+
302
+ function compute_oc_signature (ir:: IRCode , nargs:: Int , isva:: Bool )
303
+ argtypes = Vector {Any} (undef, nargs)
304
+ for i = 1 : nargs
305
+ argtypes[i] = Core. Compiler. widenconst (ir. argtypes[i+ 1 ])
306
+ end
307
+ if isva
308
+ lastarg = pop! (argtypes)
309
+ if lastarg <: Tuple
310
+ append! (argtypes, lastarg. parameters)
311
+ else
312
+ push! (argtypes, Vararg{Any})
313
+ end
314
+ end
315
+ return Tuple{argtypes... }
316
+ end
317
+
318
+ function OpaqueClosure (ir:: IRCode , @nospecialize env... ; isva:: Bool = false )
319
+ # NOTE: we need ir.argtypes[1] == typeof(env)
320
+ ir = Core. Compiler. copy (ir)
321
+ nargs = length (ir. argtypes)- 1
322
+ sig = compute_oc_signature (ir, nargs, isva)
323
+ rt = compute_ir_rettype (ir)
324
+ src = ccall (:jl_new_code_info_uninit , Ref{CodeInfo}, ())
325
+ src. slotnames = Base. fill (:none , nargs+ 1 )
326
+ src. slotflags = Base. fill (zero (UInt8), length (ir. argtypes))
327
+ src. slottypes = copy (ir. argtypes)
328
+ src. rettype = rt
329
+ src = Core. Compiler. ir_to_codeinf! (src, ir)
330
+ config = compiler_config (device (); kernel= false )
331
+ return generate_opaque_closure (config, src, sig, rt, nargs, isva, env... )
332
+ end
333
+
334
+ function OpaqueGPUClosure (src:: CodeInfo , @nospecialize env... )
335
+ src. inferred || throw (ArgumentError (" Expected inferred src::CodeInfo" ))
336
+ mi = src. parent:: Core.MethodInstance
337
+ sig = Base. tuple_type_tail (mi. specTypes)
338
+ method = mi. def:: Method
339
+ nargs = method. nargs- 1
340
+ isva = method. isva
341
+ return generate_opaque_closure (config, src, sig, src. rettype, nargs, isva, env... )
342
+ end
343
+
344
+ function generate_opaque_closure (config:: CompilerConfig , src:: CodeInfo ,
345
+ @nospecialize (sig), @nospecialize (rt),
346
+ nargs:: Int , isva:: Bool , @nospecialize env... ;
347
+ mod:: Module = @__MODULE__ ,
348
+ file:: Union{Nothing,Symbol} = nothing , line:: Int = 0 )
349
+ # create a method (like `jl_make_opaque_closure_method`)
350
+ meth = ccall (:jl_new_method_uninit , Ref{Method}, (Any,), Main)
351
+ meth. sig = Tuple
352
+ meth. isva = isva # XXX : probably not supported?
353
+ meth. is_for_opaque_closure = 0 # XXX : do we want this?
354
+ meth. name = Symbol (" opaque gpu closure" )
355
+ meth. nargs = nargs + 1
356
+ meth. file = something (file, Symbol ())
357
+ meth. line = line
358
+ ccall (:jl_method_set_source , Nothing, (Any, Any), meth, src)
359
+
360
+ # look up a method instance and create a compiler job
361
+ full_sig = Tuple{typeof (env), sig. parameters... }
362
+ mi = ccall (:jl_specializations_get_linfo , Ref{MethodInstance},
363
+ (Any, Any, Any), meth, full_sig, Core. svec ())
364
+ job = CompilerJob (mi, config) # this captures the current world age
365
+
366
+ # create a code instance and store it in the cache
367
+ ci = CodeInstance (mi, rt, C_NULL , src, Int32 (0 ), meth. primary_world, typemax (UInt),
368
+ UInt32 (0 ), UInt32 (0 ), nothing , UInt8 (0 ))
369
+ Core. Compiler. setindex! (GPUCompiler. ci_cache (job), ci, mi)
370
+
371
+ id = length (GPUCompiler. deferred_codegen_jobs) + 1
372
+ GPUCompiler. deferred_codegen_jobs[id] = job
373
+ return OpaqueClosure {id, typeof(env), sig, rt} (env)
374
+ end
375
+
376
+ # device-side call to an opaque closure
377
+ function (oc:: OpaqueClosure{F} )(a, b) where F
378
+ ptr = ccall (" extern deferred_codegen" , llvmcall, Ptr{Cvoid}, (Int,), F)
379
+ assume (ptr != C_NULL )
380
+ return ccall (ptr, Int, (Int, Int), a, b)
381
+ end
0 commit comments