@@ -257,3 +257,121 @@ function run_and_collect(cmd)
257
257
258
258
return proc, log
259
259
end
260
+
261
+
262
+
263
+ # # opaque closures
264
+
265
+ # TODO : once stabilised, move bits of this into GPUCompiler.jl
266
+
267
+ using Core. Compiler: IRCode
268
+ using Core: CodeInfo, MethodInstance, CodeInstance, LineNumberNode
269
+
270
+ struct OpaqueClosure{F, E, A, R} # func, env, args, ret
271
+ env:: E
272
+ end
273
+
274
+ # XXX : because we can't call functions from other CUDA modules, we effectively need to
275
+ # recompile when the target function changes. this, and because of how GPUCompiler's
276
+ # deferred compilation mechanism currently works, is why we have `F` as a type param.
277
+
278
+ # XXX : because of GPU code requiring specialized signatures, we also need to recompile
279
+ # when the environment or argument types change. together with the above, this
280
+ # negates much of the benefit of opaque closures.
281
+
282
+ # TODO : support for constructing an opaque closure from source code
283
+
284
+ # TODO : complete support for passing an environment. this probably requires a split into
285
+ # host and device structures to, e.g., root a CuArray and pass a CuDeviceArray.
286
+
287
+ function compute_ir_rettype (ir:: IRCode )
288
+ rt = Union{}
289
+ for i = 1 : length (ir. stmts)
290
+ stmt = ir. stmts[i][:inst ]
291
+ if isa (stmt, Core. Compiler. ReturnNode) && isdefined (stmt, :val )
292
+ rt = Core. Compiler. tmerge (Core. Compiler. argextype (stmt. val, ir), rt)
293
+ end
294
+ end
295
+ return Core. Compiler. widenconst (rt)
296
+ end
297
+
298
+ function compute_oc_signature (ir:: IRCode , nargs:: Int , isva:: Bool )
299
+ argtypes = Vector {Any} (undef, nargs)
300
+ for i = 1 : nargs
301
+ argtypes[i] = Core. Compiler. widenconst (ir. argtypes[i+ 1 ])
302
+ end
303
+ if isva
304
+ lastarg = pop! (argtypes)
305
+ if lastarg <: Tuple
306
+ append! (argtypes, lastarg. parameters)
307
+ else
308
+ push! (argtypes, Vararg{Any})
309
+ end
310
+ end
311
+ return Tuple{argtypes... }
312
+ end
313
+
314
+ function OpaqueClosure (ir:: IRCode , @nospecialize env... ; isva:: Bool = false )
315
+ # NOTE: we need ir.argtypes[1] == typeof(env)
316
+ ir = Core. Compiler. copy (ir)
317
+ nargs = length (ir. argtypes)- 1
318
+ sig = compute_oc_signature (ir, nargs, isva)
319
+ rt = compute_ir_rettype (ir)
320
+ src = ccall (:jl_new_code_info_uninit , Ref{CodeInfo}, ())
321
+ src. slotnames = Base. fill (:none , nargs+ 1 )
322
+ src. slotflags = Base. fill (zero (UInt8), length (ir. argtypes))
323
+ src. slottypes = copy (ir. argtypes)
324
+ src. rettype = rt
325
+ src = Core. Compiler. ir_to_codeinf! (src, ir)
326
+ config = compiler_config (device (); kernel= false )
327
+ return generate_opaque_closure (config, src, sig, rt, nargs, isva, env... )
328
+ end
329
+
330
+ function OpaqueGPUClosure (src:: CodeInfo , @nospecialize env... )
331
+ src. inferred || throw (ArgumentError (" Expected inferred src::CodeInfo" ))
332
+ mi = src. parent:: Core.MethodInstance
333
+ sig = Base. tuple_type_tail (mi. specTypes)
334
+ method = mi. def:: Method
335
+ nargs = method. nargs- 1
336
+ isva = method. isva
337
+ return generate_opaque_closure (config, src, sig, src. rettype, nargs, isva, env... )
338
+ end
339
+
340
+ function generate_opaque_closure (config:: CompilerConfig , src:: CodeInfo ,
341
+ @nospecialize (sig), @nospecialize (rt),
342
+ nargs:: Int , isva:: Bool , @nospecialize env... ;
343
+ mod:: Module = @__MODULE__ ,
344
+ file:: Union{Nothing,Symbol} = nothing , line:: Int = 0 )
345
+ # create a method (like `jl_make_opaque_closure_method`)
346
+ meth = ccall (:jl_new_method_uninit , Ref{Method}, (Any,), Main)
347
+ meth. sig = Tuple
348
+ meth. isva = isva # XXX : probably not supported?
349
+ meth. is_for_opaque_closure = 0 # XXX : do we want this?
350
+ meth. name = Symbol (" opaque gpu closure" )
351
+ meth. nargs = nargs + 1
352
+ meth. file = something (file, Symbol ())
353
+ meth. line = line
354
+ ccall (:jl_method_set_source , Nothing, (Any, Any), meth, src)
355
+
356
+ # look up a method instance and create a compiler job
357
+ full_sig = Tuple{typeof (env), sig. parameters... }
358
+ mi = ccall (:jl_specializations_get_linfo , Ref{MethodInstance},
359
+ (Any, Any, Any), meth, full_sig, Core. svec ())
360
+ job = CompilerJob (mi, config) # this captures the current world age
361
+
362
+ # create a code instance and store it in the cache
363
+ ci = CodeInstance (mi, rt, C_NULL , src, Int32 (0 ), meth. primary_world, typemax (UInt),
364
+ UInt32 (0 ), UInt32 (0 ), nothing , UInt8 (0 ))
365
+ Core. Compiler. setindex! (GPUCompiler. ci_cache (job), ci, mi)
366
+
367
+ id = length (GPUCompiler. deferred_codegen_jobs) + 1
368
+ GPUCompiler. deferred_codegen_jobs[id] = job
369
+ return OpaqueClosure {id, typeof(env), sig, rt} (env)
370
+ end
371
+
372
+ # device-side call to an opaque closure
373
+ function (oc:: OpaqueClosure{F} )(a, b) where F
374
+ ptr = ccall (" extern deferred_codegen" , llvmcall, Ptr{Cvoid}, (Int,), F)
375
+ assume (ptr != C_NULL )
376
+ return ccall (ptr, Int, (Int, Int), a, b)
377
+ end
0 commit comments